Grammalecte  Check-in [a1b165e276]

Overview
Comment:[graphspell][core] tokenizer: rename ELPFX tokens to WORD_ELIDED
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | graphspell | rg
Files: files | file ages | folders
SHA3-256: a1b165e27617202e63311b572053dbd71ca22c536d2e1186629ae12afa3f57a2
User & Date: olr on 2018-06-28 07:53:20
Original Comment: [graphspell][core] rename ELPFX tokens to WORD_ELIDED
Other Links: branch diff | manifest | tags
Context
2018-06-28
08:00
[graphspell] tokenizer: rename ORDINAL tokens to WORD_ORDINAL check-in: 20dbc28ded user: olr tags: graphspell, rg
07:53
[graphspell][core] tokenizer: rename ELPFX tokens to WORD_ELIDED check-in: a1b165e276 user: olr tags: core, graphspell, rg
2018-06-27
23:39
[build][fix] check regexes: memorize checked regexes check-in: 74d9c8e099 user: olr tags: build, rg
Changes

Modified gc_lang/fr/modules-js/lexicographe.js from [823f277d47] to [6f858d6849].

83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
    [':O2', [" 2ᵉ pers.,", "Pronom : 2ᵉ personne"]],
    [':O3', [" 3ᵉ pers.,", "Pronom : 3ᵉ personne"]],
    [':C', [" conjonction,", "Conjonction"]],
    [':Ĉ', [" conjonction (él.),", "Conjonction (élément)"]],
    [':Cc', [" conjonction de coordination,", "Conjonction de coordination"]],
    [':Cs', [" conjonction de subordination,", "Conjonction de subordination"]],
    [':Ĉs', [" conjonction de subordination (él.),", "Conjonction de subordination (élément)"]],
    
    [':Ñ', [" locution nominale (él.),", "Locution nominale (élément)"]],
    [':Â', [" locution adjectivale (él.),", "Locution adjectivale (élément)"]],
    [':Ṽ', [" locution verbale (él.),", "Locution verbale (élément)"]],
    [':Ŵ', [" locution adverbiale (él.),", "Locution adverbiale (élément)"]],
    [':Ŕ', [" locution prépositive (él.),", "Locution prépositive (élément)"]],
    [':Ĵ', [" locution interjective (él.),", "Locution interjective (élément)"]],








|







83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
    [':O2', [" 2ᵉ pers.,", "Pronom : 2ᵉ personne"]],
    [':O3', [" 3ᵉ pers.,", "Pronom : 3ᵉ personne"]],
    [':C', [" conjonction,", "Conjonction"]],
    [':Ĉ', [" conjonction (él.),", "Conjonction (élément)"]],
    [':Cc', [" conjonction de coordination,", "Conjonction de coordination"]],
    [':Cs', [" conjonction de subordination,", "Conjonction de subordination"]],
    [':Ĉs', [" conjonction de subordination (él.),", "Conjonction de subordination (élément)"]],

    [':Ñ', [" locution nominale (él.),", "Locution nominale (élément)"]],
    [':Â', [" locution adjectivale (él.),", "Locution adjectivale (élément)"]],
    [':Ṽ', [" locution verbale (él.),", "Locution verbale (élément)"]],
    [':Ŵ', [" locution adverbiale (él.),", "Locution adverbiale (élément)"]],
    [':Ŕ', [" locution prépositive (él.),", "Locution prépositive (élément)"]],
    [':Ĵ', [" locution interjective (él.),", "Locution interjective (élément)"]],

261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
                case 'LINK':
                    return {
                        sType: oToken.sType,
                        sValue: oToken.sValue.slice(0, 40) + "…",
                        aLabel: ["hyperlien"]
                    };
                    break;
                case 'ELPFX':
                    let sTemp = oToken.sValue.replace("’", "").replace("'", "").replace("`", "").toLowerCase();
                    return {
                        sType: oToken.sType,
                        sValue: oToken.sValue,
                        aLabel: [_dElidedPrefix.gl_get(sTemp, "préfixe élidé inconnu")]
                    };
                    break;







|







261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
                case 'LINK':
                    return {
                        sType: oToken.sType,
                        sValue: oToken.sValue.slice(0, 40) + "…",
                        aLabel: ["hyperlien"]
                    };
                    break;
                case 'WORD_ELIDED':
                    let sTemp = oToken.sValue.replace("’", "").replace("'", "").replace("`", "").toLowerCase();
                    return {
                        sType: oToken.sType,
                        sValue: oToken.sValue,
                        aLabel: [_dElidedPrefix.gl_get(sTemp, "préfixe élidé inconnu")]
                    };
                    break;
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
        let aTokenList = this.getListOfTokens(sText.replace("'", "’").trim(), false);
        let iKey = 0;
        let aElem = [];
        do {
            let oToken = aTokenList[iKey];
            let sMorphLoc = '';
            let aTokenTempList = [oToken];
            if (oToken.sType == "WORD" || oToken.sType == "ELPFX"){
                let iKeyTree = iKey + 1;
                let oLocNode = this.oLocGraph[oToken.sValue.toLowerCase()];
                while (oLocNode) {
                    let oTokenNext = aTokenList[iKeyTree];
                    iKeyTree++;
                    if (oTokenNext) {
                        oLocNode = oLocNode[oTokenNext.sValue.toLowerCase()];







|







452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
        let aTokenList = this.getListOfTokens(sText.replace("'", "’").trim(), false);
        let iKey = 0;
        let aElem = [];
        do {
            let oToken = aTokenList[iKey];
            let sMorphLoc = '';
            let aTokenTempList = [oToken];
            if (oToken.sType == "WORD" || oToken.sType == "WORD_ELIDED"){
                let iKeyTree = iKey + 1;
                let oLocNode = this.oLocGraph[oToken.sValue.toLowerCase()];
                while (oLocNode) {
                    let oTokenNext = aTokenList[iKeyTree];
                    iKeyTree++;
                    if (oTokenNext) {
                        oLocNode = oLocNode[oTokenNext.sValue.toLowerCase()];

Modified gc_lang/fr/webext/content_scripts/panel_lxg.css from [60aef30035] to [83fe0f37d1].

86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
}
div.grammalecte_lxg_token_LOC {
    background-color: hsla(150, 50%, 30%, 1);
}
div.grammalecte_lxg_token_WORD {
    background-color: hsla(150, 50%, 50%, 1);
}
div.grammalecte_lxg_token_ELPFX {
    background-color: hsla(150, 30%, 50%, 1);
}
div.grammalecte_lxg_token_UNKNOWN {
    background-color: hsla(0, 50%, 50%, 1);
}
div.grammalecte_lxg_token_NUM {
    background-color: hsla(180, 50%, 50%, 1);







|







86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
}
div.grammalecte_lxg_token_LOC {
    background-color: hsla(150, 50%, 30%, 1);
}
div.grammalecte_lxg_token_WORD {
    background-color: hsla(150, 50%, 50%, 1);
}
div.grammalecte_lxg_token_WORD_ELIDED {
    background-color: hsla(150, 30%, 50%, 1);
}
div.grammalecte_lxg_token_UNKNOWN {
    background-color: hsla(0, 50%, 50%, 1);
}
div.grammalecte_lxg_token_NUM {
    background-color: hsla(180, 50%, 50%, 1);

Modified gc_lang/fr/xpi/data/lxg_panel.css from [3d666aa76c] to [0f0ad23b15].

54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
    padding: 2px 5px;
    border-radius: 2px;
    text-decoration: none;
}
#wordlist b.WORD {
    background-color: hsla(150, 50%, 50%, 1);
}
#wordlist b.ELPFX {
    background-color: hsla(150, 30%, 50%, 1);
}
#wordlist b.UNKNOWN {
    background-color: hsla(0, 50%, 50%, 1);
}
#wordlist b.NUM {
    background-color: hsla(180, 50%, 50%, 1);







|







54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
    padding: 2px 5px;
    border-radius: 2px;
    text-decoration: none;
}
#wordlist b.WORD {
    background-color: hsla(150, 50%, 50%, 1);
}
#wordlist b.WORD_ELIDED {
    background-color: hsla(150, 30%, 50%, 1);
}
#wordlist b.UNKNOWN {
    background-color: hsla(0, 50%, 50%, 1);
}
#wordlist b.NUM {
    background-color: hsla(180, 50%, 50%, 1);

Modified graphspell-js/tokenizer.js from [5f94dc04ea] to [8dd855b1b3].

38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
            [/^[,.;:!?…«»“”‘’"(){}\[\]·–—]/, 'SEPARATOR'],
            [/^[A-Z][.][A-Z][.](?:[A-Z][.])*/, 'ACRONYM'],
            [/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
            [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'],
            [/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+ *>/, 'HTML'],
            [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'],
            [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
            [/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`]/i, 'ELPFX'],
            [/^\d\d?[hm]\d\d\b/, 'HOUR'],
            [/^\d+(?:ers?|nds?|es?|des?|ièmes?|èmes?|emes?|ᵉʳˢ?|ⁿᵈˢ?|ᵉˢ?|ᵈᵉˢ?)\b/, 'ORDINAL'],
            [/^-?\d+(?:[.,]\d+|)/, 'NUM'],
            [/^[%‰+=*/<>⩾⩽-]/, 'SIGN'],
            [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+)*/, 'WORD']
        ]
};







|







38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
            [/^[,.;:!?…«»“”‘’"(){}\[\]·–—]/, 'SEPARATOR'],
            [/^[A-Z][.][A-Z][.](?:[A-Z][.])*/, 'ACRONYM'],
            [/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
            [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'],
            [/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+ *>/, 'HTML'],
            [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'],
            [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
            [/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`]/i, 'WORD_ELIDED'],
            [/^\d\d?[hm]\d\d\b/, 'HOUR'],
            [/^\d+(?:ers?|nds?|es?|des?|ièmes?|èmes?|emes?|ᵉʳˢ?|ⁿᵈˢ?|ᵉˢ?|ᵈᵉˢ?)\b/, 'ORDINAL'],
            [/^-?\d+(?:[.,]\d+|)/, 'NUM'],
            [/^[%‰+=*/<>⩾⩽-]/, 'SIGN'],
            [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+)*/, 'WORD']
        ]
};

Modified graphspell/tokenizer.py from [7c766445e1] to [8cf6a6bb2e].

27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
            r'(?P<FOLDERWIN>[a-zA-Z]:\\(?:Program Files(?: [(]x86[)]|)|[\w.()]+)(?:\\[\w.()-]+)*)',
            r'(?P<PUNC>[][,.;:!?…«»“”‘’"(){}·–—])',
            r'(?P<ACRONYM>[A-Z][.][A-Z][.](?:[A-Z][.])*)',
            r'(?P<LINK>(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
            r'(?P<HASHTAG>[#@][\w-]+)',
            r'(?P<HTML><\w+.*?>|</\w+ *>)',
            r'(?P<PSEUDOHTML>\[/?\w+\])',
            r"(?P<ELPFX>(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`])",
            r'(?P<ORDINAL>\d+(?:ers?|nds?|es?|des?|ièmes?|èmes?|emes?|ᵉʳˢ?|ⁿᵈˢ?|ᵉˢ?|ᵈᵉˢ?)\b)',
            r'(?P<HOUR>\d\d?h\d\d\b)',
            r'(?P<NUM>-?\d+(?:[.,]\d+|))',
            r'(?P<SIGN>[%‰+=*/<>⩾⩽-])',
            r"(?P<WORD>\w+(?:[’'`-]\w+)*)"
        )
}







|







27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
            r'(?P<FOLDERWIN>[a-zA-Z]:\\(?:Program Files(?: [(]x86[)]|)|[\w.()]+)(?:\\[\w.()-]+)*)',
            r'(?P<PUNC>[][,.;:!?…«»“”‘’"(){}·–—])',
            r'(?P<ACRONYM>[A-Z][.][A-Z][.](?:[A-Z][.])*)',
            r'(?P<LINK>(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
            r'(?P<HASHTAG>[#@][\w-]+)',
            r'(?P<HTML><\w+.*?>|</\w+ *>)',
            r'(?P<PSEUDOHTML>\[/?\w+\])',
            r"(?P<WORD_ELIDED>(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`])",
            r'(?P<ORDINAL>\d+(?:ers?|nds?|es?|des?|ièmes?|èmes?|emes?|ᵉʳˢ?|ⁿᵈˢ?|ᵉˢ?|ᵈᵉˢ?)\b)',
            r'(?P<HOUR>\d\d?h\d\d\b)',
            r'(?P<NUM>-?\d+(?:[.,]\d+|))',
            r'(?P<SIGN>[%‰+=*/<>⩾⩽-])',
            r"(?P<WORD>\w+(?:[’'`-]\w+)*)"
        )
}