Overview
| Comment: | [graphspell][fx] lexicographer update: same code for Python and JavaScript (remove deprecated code) |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | fx | graphspell | salxg |
| Files: | files | file ages | folders |
| SHA3-256: |
891500b92a4b12944293c70593356b6c |
| User & Date: | olr on 2020-09-10 13:15:03 |
| Other Links: | branch diff | manifest | tags |
Context
|
2020-09-10
| ||
| 15:30 | [core][graphspell][fx][cli] new lexicographer check-in: 4fdd6a9337 user: olr tags: trunk, cli, core, fx, graphspell | |
| 13:15 | [graphspell][fx] lexicographer update: same code for Python and JavaScript (remove deprecated code) Closed-Leaf check-in: 891500b92a user: olr tags: fx, graphspell, salxg | |
| 12:17 | [core][graphspell][fx][cli] lexicographer: update check-in: e0ce6b10d7 user: olr tags: cli, core, fx, graphspell, salxg | |
Changes
Modified gc_lang/fr/webext/gce_worker.js from [e86439bcff] to [879c183120].
| ︙ | ︙ | |||
240 241 242 243 244 245 246 |
function getListOfTokens (sText, oInfo={}) {
// lexicographer
try {
sText = sText.replace(//g, "").normalize("NFC");
for (let sParagraph of text.getParagraph(sText)) {
if (sParagraph.trim() !== "") {
| > > > > | | 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 |
function getListOfTokens (sText, oInfo={}) {
// lexicographer
try {
sText = sText.replace(//g, "").normalize("NFC");
for (let sParagraph of text.getParagraph(sText)) {
if (sParagraph.trim() !== "") {
let lTokens = [ ...oTokenizer.genTokens(sParagraph) ];
for (let oToken of lTokens) {
oSpellChecker.setLabelsOnToken(oToken);
}
postMessage(createResponse("getListOfTokens", { sParagraph: sParagraph, lTokens: lTokens }, oInfo, false));
}
}
postMessage(createResponse("getListOfTokens", null, oInfo, true));
}
catch (e) {
console.error(e);
postMessage(createResponse("getListOfTokens", createErrorResult(e, "no tokens"), oInfo, true, true));
|
| ︙ | ︙ |
Modified graphspell-js/lexgraph_fr.js from [841b4146b0] to [816b039883].
| ︙ | ︙ | |||
391 392 393 394 395 396 397 |
['>', "supérieur à"],
['⩽', "inférieur ou égal à"],
['⩾', "supérieur ou égal à"],
['%', "signe de pourcentage"],
['‰', "signe pour mille"],
]),
| < < < < | < < < < < < < | | 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 |
['>', "supérieur à"],
['⩽', "inférieur ou égal à"],
['⩾', "supérieur ou égal à"],
['%', "signe de pourcentage"],
['‰', "signe pour mille"],
]),
_zPartDemForm: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)-(là|ci)$", "i"),
_aPartDemExceptList: new Set(["celui", "celle", "ceux", "celles", "de", "jusque", "par", "marie-couche-toi"]),
_zInterroVerb: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$", "i"),
_zImperatifVerb: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)(-(?:l(?:es?|a)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts]['’ʼ‘‛´`′‵՚ꞌꞋ](?:y|en)|les?|la|[mt]oi|leur|lui))$", "i"),
_zTag: new RegExp("[:;/][a-zA-Z0-9ÑÂĴĈŔÔṼŴ!][^:;/]*", "g"),
split: function (sWord) {
// returns an arry of strings (prefix, trimed_word, suffix)
let sPrefix = "";
let sSuffix = "";
// préfixe élidé
let m = /^([ldmtsnjcç]|lorsqu|presqu|jusqu|puisqu|quoiqu|quelqu|qu)['’ʼ‘‛´`′‵՚ꞌꞋ]([a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]+)/i.exec(sWord);
if (m) {
sPrefix = m[1] + "’";
sWord = m[2];
}
// mots composés
m = /^([a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st]+)(-(?:(?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts]’(?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous|ce))$/i.exec(sWord);
if (m) {
|
| ︙ | ︙ | |||
555 556 557 558 559 560 561 |
oToken["aLabels"] = ["token de nature inconnue"];
}
} catch (e) {
console.error(e);
}
},
| < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 |
oToken["aLabels"] = ["token de nature inconnue"];
}
} catch (e) {
console.error(e);
}
},
// Other functions
filterSugg: function (aSugg) {
return aSugg.filter((sSugg) => { return !sSugg.endsWith("è") && !sSugg.endsWith("È"); });
}
}
if (typeof(exports) !== 'undefined') {
exports.lexgraph_fr = lexgraph_fr;
}
|
Modified graphspell/lexgraph_fr.py from [60d5043aa3] to [f36c3bc666].
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | """ Lexicographer for the French language """ # Note: # This mode must contains at least: # <dSugg> : a dictionary for default suggestions. # <bLexicographer> : a boolean False # if the boolean is True, 4 functions are required: # split(sWord) -> returns a list of string (that will be analyzed) # analyze(sWord) -> returns a string with the meaning of word # readableMorph(sMorph) -> returns a string with the meaning of tags # filterSugg(aWord) -> returns a filtered list of suggestions import re #### Suggestions | > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | """ Lexicographer for the French language """ # Note: # This mode must contains at least: # <dSugg> : a dictionary for default suggestions. # <bLexicographer> : a boolean False # if the boolean is True, 4 functions are required: # split(sWord) -> returns a list of string (that will be analyzed) # analyze(sWord) -> returns a string with the meaning of word # readableMorph(sMorph) -> returns a string with the meaning of tags # setLabelsOnToken(dToken) -> adds readable information on token # filterSugg(aWord) -> returns a filtered list of suggestions import re #### Suggestions |
| ︙ | ︙ | |||
415 416 417 418 419 420 421 |
if not sRes:
return " [" + sMorph + "]: étiquettes inconnues"
return sRes.rstrip(",")
_zPartDemForm = re.compile("([\\w]+)-(là|ci)$")
_zInterroVerb = re.compile("([\\w]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$")
| | | 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 |
if not sRes:
return " [" + sMorph + "]: étiquettes inconnues"
return sRes.rstrip(",")
_zPartDemForm = re.compile("([\\w]+)-(là|ci)$")
_zInterroVerb = re.compile("([\\w]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$")
_zImperatifVerb = re.compile("([\\w]+)(-(?:l(?:es?|a)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts]['’ʼ‘‛´`′‵՚ꞌꞋ](?:y|en)|les?|la|[mt]oi|leur|lui))$")
def setLabelsOnToken (dToken):
# Token: .sType, .sValue, .nStart, .nEnd, .lMorph
try:
if dToken["sType"] == "PUNC" or dToken["sType"] == "SIGN":
dToken["aLabels"] = [_dValues.get(dToken["sValue"], "signe de ponctuation divers")]
elif dToken["sType"] == 'NUM':
|
| ︙ | ︙ | |||
437 438 439 440 441 442 443 |
elif dToken["sType"] == 'PSEUDOHTML':
dToken["aLabels"] = ["balise pseudo-HTML"]
elif dToken["sType"] == 'HTMLENTITY':
dToken["aLabels"] = ["entité caractère XML/HTML"]
elif dToken["sType"] == 'HOUR':
dToken["aLabels"] = ["heure"]
elif dToken["sType"] == 'WORD_ELIDED':
| | | 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 |
elif dToken["sType"] == 'PSEUDOHTML':
dToken["aLabels"] = ["balise pseudo-HTML"]
elif dToken["sType"] == 'HTMLENTITY':
dToken["aLabels"] = ["entité caractère XML/HTML"]
elif dToken["sType"] == 'HOUR':
dToken["aLabels"] = ["heure"]
elif dToken["sType"] == 'WORD_ELIDED':
dToken["aLabels"] = [_dValues.get(dToken["sValue"].lower(), "préfixe élidé inconnu")]
elif dToken["sType"] == 'WORD_ORDINAL':
dToken["aLabels"] = ["nombre ordinal"]
elif dToken["sType"] == 'FOLDERUNIX':
dToken["aLabels"] = ["dossier UNIX (et dérivés)"]
elif dToken["sType"] == 'FOLDERWIN':
dToken["aLabels"] = ["dossier Windows"]
elif dToken["sType"] == 'WORD_ACRONYM':
|
| ︙ | ︙ |