Overview
| Comment: | [graphspell][fx] lexicographer update: same code for Python and JavaScript (remove deprecated code) | 
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive | 
| Timelines: | family | ancestors | descendants | both | fx | graphspell | salxg | 
| Files: | files | file ages | folders | 
| SHA3-256: | 891500b92a4b12944293c70593356b6c | 
| User & Date: | olr on 2020-09-10 13:15:03 | 
| Other Links: | branch diff | manifest | tags | 
Context
| 2020-09-10 | ||
| 15:30 | [core][graphspell][fx][cli] new lexicographer check-in: 4fdd6a9337 user: olr tags: trunk, cli, core, fx, graphspell | |
| 13:15 | [graphspell][fx] lexicographer update: same code for Python and JavaScript (remove deprecated code) Closed-Leaf check-in: 891500b92a user: olr tags: fx, graphspell, salxg | |
| 12:17 | [core][graphspell][fx][cli] lexicographer: update check-in: e0ce6b10d7 user: olr tags: cli, core, fx, graphspell, salxg | |
Changes
Modified gc_lang/fr/webext/gce_worker.js from [e86439bcff] to [879c183120].
| ︙ | ︙ | |||
| 240 241 242 243 244 245 246 | 
function getListOfTokens (sText, oInfo={}) {
    // lexicographer
    try {
        sText = sText.replace(//g, "").normalize("NFC");
        for (let sParagraph of text.getParagraph(sText)) {
            if (sParagraph.trim() !== "") {
 | > > > > | | 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 | 
function getListOfTokens (sText, oInfo={}) {
    // lexicographer
    try {
        sText = sText.replace(//g, "").normalize("NFC");
        for (let sParagraph of text.getParagraph(sText)) {
            if (sParagraph.trim() !== "") {
                let lTokens = [ ...oTokenizer.genTokens(sParagraph) ];
                for (let oToken of lTokens) {
                    oSpellChecker.setLabelsOnToken(oToken);
                }
                postMessage(createResponse("getListOfTokens", { sParagraph: sParagraph, lTokens: lTokens }, oInfo, false));
            }
        }
        postMessage(createResponse("getListOfTokens", null, oInfo, true));
    }
    catch (e) {
        console.error(e);
        postMessage(createResponse("getListOfTokens", createErrorResult(e, "no tokens"), oInfo, true, true));
 | 
| ︙ | ︙ | 
Modified graphspell-js/lexgraph_fr.js from [841b4146b0] to [816b039883].
| ︙ | ︙ | |||
| 391 392 393 394 395 396 397 | 
            ['>', "supérieur à"],
            ['⩽', "inférieur ou égal à"],
            ['⩾', "supérieur ou égal à"],
            ['%', "signe de pourcentage"],
            ['‰', "signe pour mille"],
        ]),
 | < < < < | < < < < < < < | | 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 | 
            ['>', "supérieur à"],
            ['⩽', "inférieur ou égal à"],
            ['⩾', "supérieur ou égal à"],
            ['%', "signe de pourcentage"],
            ['‰', "signe pour mille"],
        ]),
    _zPartDemForm: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)-(là|ci)$", "i"),
    _aPartDemExceptList: new Set(["celui", "celle", "ceux", "celles", "de", "jusque", "par", "marie-couche-toi"]),
    _zInterroVerb: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$", "i"),
    _zImperatifVerb: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)(-(?:l(?:es?|a)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts]['’ʼ‘‛´`′‵՚ꞌꞋ](?:y|en)|les?|la|[mt]oi|leur|lui))$", "i"),
    _zTag: new RegExp("[:;/][a-zA-Z0-9ÑÂĴĈŔÔṼŴ!][^:;/]*", "g"),
    split: function (sWord) {
        // returns an arry of strings (prefix, trimed_word, suffix)
        let sPrefix = "";
        let sSuffix = "";
        // préfixe élidé
        let m = /^([ldmtsnjcç]|lorsqu|presqu|jusqu|puisqu|quoiqu|quelqu|qu)['’ʼ‘‛´`′‵՚ꞌꞋ]([a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]+)/i.exec(sWord);
        if (m) {
            sPrefix = m[1] + "’";
            sWord = m[2];
        }
        // mots composés
        m = /^([a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st]+)(-(?:(?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts]’(?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous|ce))$/i.exec(sWord);
        if (m) {
 | 
| ︙ | ︙ | |||
| 555 556 557 558 559 560 561 | 
                    oToken["aLabels"] = ["token de nature inconnue"];
            }
        } catch (e) {
            console.error(e);
        }
    },
 | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 | 
                    oToken["aLabels"] = ["token de nature inconnue"];
            }
        } catch (e) {
            console.error(e);
        }
    },
    // Other functions
    filterSugg: function (aSugg) {
        return aSugg.filter((sSugg) => { return !sSugg.endsWith("è") && !sSugg.endsWith("È"); });
    }
}
if (typeof(exports) !== 'undefined') {
    exports.lexgraph_fr = lexgraph_fr;
}
 | 
Modified graphspell/lexgraph_fr.py from [60d5043aa3] to [f36c3bc666].
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | """ Lexicographer for the French language """ # Note: # This mode must contains at least: # <dSugg> : a dictionary for default suggestions. # <bLexicographer> : a boolean False # if the boolean is True, 4 functions are required: # split(sWord) -> returns a list of string (that will be analyzed) # analyze(sWord) -> returns a string with the meaning of word # readableMorph(sMorph) -> returns a string with the meaning of tags # filterSugg(aWord) -> returns a filtered list of suggestions import re #### Suggestions | > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | """ Lexicographer for the French language """ # Note: # This mode must contains at least: # <dSugg> : a dictionary for default suggestions. # <bLexicographer> : a boolean False # if the boolean is True, 4 functions are required: # split(sWord) -> returns a list of string (that will be analyzed) # analyze(sWord) -> returns a string with the meaning of word # readableMorph(sMorph) -> returns a string with the meaning of tags # setLabelsOnToken(dToken) -> adds readable information on token # filterSugg(aWord) -> returns a filtered list of suggestions import re #### Suggestions | 
| ︙ | ︙ | |||
| 415 416 417 418 419 420 421 | 
    if not sRes:
        return " [" + sMorph + "]: étiquettes inconnues"
    return sRes.rstrip(",")
_zPartDemForm = re.compile("([\\w]+)-(là|ci)$")
_zInterroVerb = re.compile("([\\w]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$")
 | | | 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 | 
    if not sRes:
        return " [" + sMorph + "]: étiquettes inconnues"
    return sRes.rstrip(",")
_zPartDemForm = re.compile("([\\w]+)-(là|ci)$")
_zInterroVerb = re.compile("([\\w]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$")
_zImperatifVerb = re.compile("([\\w]+)(-(?:l(?:es?|a)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts]['’ʼ‘‛´`′‵՚ꞌꞋ](?:y|en)|les?|la|[mt]oi|leur|lui))$")
def setLabelsOnToken (dToken):
    # Token: .sType, .sValue, .nStart, .nEnd, .lMorph
    try:
        if dToken["sType"] == "PUNC" or dToken["sType"] == "SIGN":
            dToken["aLabels"] = [_dValues.get(dToken["sValue"], "signe de ponctuation divers")]
        elif dToken["sType"] == 'NUM':
 | 
| ︙ | ︙ | |||
| 437 438 439 440 441 442 443 | 
        elif dToken["sType"] == 'PSEUDOHTML':
            dToken["aLabels"] = ["balise pseudo-HTML"]
        elif dToken["sType"] == 'HTMLENTITY':
            dToken["aLabels"] = ["entité caractère XML/HTML"]
        elif dToken["sType"] == 'HOUR':
            dToken["aLabels"] = ["heure"]
        elif dToken["sType"] == 'WORD_ELIDED':
 | | | 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 | 
        elif dToken["sType"] == 'PSEUDOHTML':
            dToken["aLabels"] = ["balise pseudo-HTML"]
        elif dToken["sType"] == 'HTMLENTITY':
            dToken["aLabels"] = ["entité caractère XML/HTML"]
        elif dToken["sType"] == 'HOUR':
            dToken["aLabels"] = ["heure"]
        elif dToken["sType"] == 'WORD_ELIDED':
            dToken["aLabels"] = [_dValues.get(dToken["sValue"].lower(), "préfixe élidé inconnu")]
        elif dToken["sType"] == 'WORD_ORDINAL':
            dToken["aLabels"] = ["nombre ordinal"]
        elif dToken["sType"] == 'FOLDERUNIX':
            dToken["aLabels"] = ["dossier UNIX (et dérivés)"]
        elif dToken["sType"] == 'FOLDERWIN':
            dToken["aLabels"] = ["dossier Windows"]
        elif dToken["sType"] == 'WORD_ACRONYM':
 | 
| ︙ | ︙ |