Overview
Comment: | [graphspell][fx] lexicographer update: same code for Python and JavaScript (remove deprecated code) |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | fx | graphspell | salxg |
Files: | files | file ages | folders |
SHA3-256: |
891500b92a4b12944293c70593356b6c |
User & Date: | olr on 2020-09-10 13:15:03 |
Other Links: | branch diff | manifest | tags |
Context
2020-09-10
| ||
15:30 | [core][graphspell][fx][cli] new lexicographer check-in: 4fdd6a9337 user: olr tags: trunk, cli, core, fx, graphspell | |
13:15 | [graphspell][fx] lexicographer update: same code for Python and JavaScript (remove deprecated code) Closed-Leaf check-in: 891500b92a user: olr tags: fx, graphspell, salxg | |
12:17 | [core][graphspell][fx][cli] lexicographer: update check-in: e0ce6b10d7 user: olr tags: cli, core, fx, graphspell, salxg | |
Changes
Modified gc_lang/fr/webext/gce_worker.js from [e86439bcff] to [879c183120].
︙ | ︙ | |||
240 241 242 243 244 245 246 | function getListOfTokens (sText, oInfo={}) { // lexicographer try { sText = sText.replace(//g, "").normalize("NFC"); for (let sParagraph of text.getParagraph(sText)) { if (sParagraph.trim() !== "") { | > > > > | | 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 | function getListOfTokens (sText, oInfo={}) { // lexicographer try { sText = sText.replace(//g, "").normalize("NFC"); for (let sParagraph of text.getParagraph(sText)) { if (sParagraph.trim() !== "") { let lTokens = [ ...oTokenizer.genTokens(sParagraph) ]; for (let oToken of lTokens) { oSpellChecker.setLabelsOnToken(oToken); } postMessage(createResponse("getListOfTokens", { sParagraph: sParagraph, lTokens: lTokens }, oInfo, false)); } } postMessage(createResponse("getListOfTokens", null, oInfo, true)); } catch (e) { console.error(e); postMessage(createResponse("getListOfTokens", createErrorResult(e, "no tokens"), oInfo, true, true)); |
︙ | ︙ |
Modified graphspell-js/lexgraph_fr.js from [841b4146b0] to [816b039883].
︙ | ︙ | |||
391 392 393 394 395 396 397 | ['>', "supérieur à"], ['⩽', "inférieur ou égal à"], ['⩾', "supérieur ou égal à"], ['%', "signe de pourcentage"], ['‰', "signe pour mille"], ]), | < < < < | < < < < < < < | | 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 | ['>', "supérieur à"], ['⩽', "inférieur ou égal à"], ['⩾', "supérieur ou égal à"], ['%', "signe de pourcentage"], ['‰', "signe pour mille"], ]), _zPartDemForm: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)-(là|ci)$", "i"), _aPartDemExceptList: new Set(["celui", "celle", "ceux", "celles", "de", "jusque", "par", "marie-couche-toi"]), _zInterroVerb: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$", "i"), _zImperatifVerb: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)(-(?:l(?:es?|a)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts]['’ʼ‘‛´`′‵՚ꞌꞋ](?:y|en)|les?|la|[mt]oi|leur|lui))$", "i"), _zTag: new RegExp("[:;/][a-zA-Z0-9ÑÂĴĈŔÔṼŴ!][^:;/]*", "g"), split: function (sWord) { // returns an arry of strings (prefix, trimed_word, suffix) let sPrefix = ""; let sSuffix = ""; // préfixe élidé let m = /^([ldmtsnjcç]|lorsqu|presqu|jusqu|puisqu|quoiqu|quelqu|qu)['’ʼ‘‛´`′‵՚ꞌꞋ]([a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]+)/i.exec(sWord); if (m) { sPrefix = m[1] + "’"; sWord = m[2]; } // mots composés m = /^([a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st]+)(-(?:(?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts]’(?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous|ce))$/i.exec(sWord); if (m) { |
︙ | ︙ | |||
555 556 557 558 559 560 561 | oToken["aLabels"] = ["token de nature inconnue"]; } } catch (e) { console.error(e); } }, | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 | oToken["aLabels"] = ["token de nature inconnue"]; } } catch (e) { console.error(e); } }, // Other functions filterSugg: function (aSugg) { return aSugg.filter((sSugg) => { return !sSugg.endsWith("è") && !sSugg.endsWith("È"); }); } } if (typeof(exports) !== 'undefined') { exports.lexgraph_fr = lexgraph_fr; } |
Modified graphspell/lexgraph_fr.py from [60d5043aa3] to [f36c3bc666].
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | """ Lexicographer for the French language """ # Note: # This mode must contains at least: # <dSugg> : a dictionary for default suggestions. # <bLexicographer> : a boolean False # if the boolean is True, 4 functions are required: # split(sWord) -> returns a list of string (that will be analyzed) # analyze(sWord) -> returns a string with the meaning of word # readableMorph(sMorph) -> returns a string with the meaning of tags # filterSugg(aWord) -> returns a filtered list of suggestions import re #### Suggestions | > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | """ Lexicographer for the French language """ # Note: # This mode must contains at least: # <dSugg> : a dictionary for default suggestions. # <bLexicographer> : a boolean False # if the boolean is True, 4 functions are required: # split(sWord) -> returns a list of string (that will be analyzed) # analyze(sWord) -> returns a string with the meaning of word # readableMorph(sMorph) -> returns a string with the meaning of tags # setLabelsOnToken(dToken) -> adds readable information on token # filterSugg(aWord) -> returns a filtered list of suggestions import re #### Suggestions |
︙ | ︙ | |||
415 416 417 418 419 420 421 | if not sRes: return " [" + sMorph + "]: étiquettes inconnues" return sRes.rstrip(",") _zPartDemForm = re.compile("([\\w]+)-(là|ci)$") _zInterroVerb = re.compile("([\\w]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$") | | | 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 | if not sRes: return " [" + sMorph + "]: étiquettes inconnues" return sRes.rstrip(",") _zPartDemForm = re.compile("([\\w]+)-(là|ci)$") _zInterroVerb = re.compile("([\\w]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$") _zImperatifVerb = re.compile("([\\w]+)(-(?:l(?:es?|a)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts]['’ʼ‘‛´`′‵՚ꞌꞋ](?:y|en)|les?|la|[mt]oi|leur|lui))$") def setLabelsOnToken (dToken): # Token: .sType, .sValue, .nStart, .nEnd, .lMorph try: if dToken["sType"] == "PUNC" or dToken["sType"] == "SIGN": dToken["aLabels"] = [_dValues.get(dToken["sValue"], "signe de ponctuation divers")] elif dToken["sType"] == 'NUM': |
︙ | ︙ | |||
437 438 439 440 441 442 443 | elif dToken["sType"] == 'PSEUDOHTML': dToken["aLabels"] = ["balise pseudo-HTML"] elif dToken["sType"] == 'HTMLENTITY': dToken["aLabels"] = ["entité caractère XML/HTML"] elif dToken["sType"] == 'HOUR': dToken["aLabels"] = ["heure"] elif dToken["sType"] == 'WORD_ELIDED': | | | 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 | elif dToken["sType"] == 'PSEUDOHTML': dToken["aLabels"] = ["balise pseudo-HTML"] elif dToken["sType"] == 'HTMLENTITY': dToken["aLabels"] = ["entité caractère XML/HTML"] elif dToken["sType"] == 'HOUR': dToken["aLabels"] = ["heure"] elif dToken["sType"] == 'WORD_ELIDED': dToken["aLabels"] = [_dValues.get(dToken["sValue"].lower(), "préfixe élidé inconnu")] elif dToken["sType"] == 'WORD_ORDINAL': dToken["aLabels"] = ["nombre ordinal"] elif dToken["sType"] == 'FOLDERUNIX': dToken["aLabels"] = ["dossier UNIX (et dérivés)"] elif dToken["sType"] == 'FOLDERWIN': dToken["aLabels"] = ["dossier Windows"] elif dToken["sType"] == 'WORD_ACRONYM': |
︙ | ︙ |