Grammalecte  Check-in [bd2ff57bf8]

Overview
Comment:[core][build][js] replace nbsp by nnbsp within the GC, instead of replacing them at build
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | fr | core | build
Files: files | file ages | folders
SHA3-256: bd2ff57bf894cbc948a4c44606c696b0b1bfd6c7ca5ac2a42f9007d04a50f75e
User & Date: olr on 2021-01-23 13:03:40
Other Links: manifest | tags
Context
2021-01-23
14:02
[core][fr][js] fix getGenderNumber() check-in: 3607227671 user: olr tags: trunk, fr, core
13:03
[core][build][js] replace nbsp by nnbsp within the GC, instead of replacing them at build check-in: bd2ff57bf8 user: olr tags: trunk, fr, core, build
2021-01-22
11:34
[core][fr] fix bug in suggestions for JS check-in: a6f73e3a7b user: olr tags: trunk, fr, core
Changes

Modified compile_rules_js_convert.py from [48401df6d7] to [bddec8a9b3].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
1
2
3
4
5
6
7
8
9
10
11
12

13
14
15
16
17
18
19












-







"""
Convert Python code and regexes to JavaScript code
"""

import copy
import re
import json


def py2js (sCode):
    "convert Python code to JavaScript code"
    # Python strings
    sCode = sCode.replace(' ', ' ')
    sCode = sCode.replace('(r"', '("')
    sCode = sCode.replace("(r'", "('")
    sCode = sCode.replace(' r"', ' "')
    sCode = sCode.replace(" r'", " '")
    sCode = sCode.replace(',r"', ',"')
    sCode = sCode.replace(",r'", ",'")
    # operators
123
124
125
126
127
128
129
130
131


132
133
134
135
136
137
138
122
123
124
125
126
127
128


129
130
131
132
133
134
135
136
137







-
-
+
+







    # graph rules
    if lRuleJS[0] == "@@@@":
        return lRuleJS
    del lRule[-1] # tGroups positioning codes are useless for Python
    # error messages
    for aAction in lRuleJS[6]:
        if aAction[1] == "-":
            aAction[2] = aAction[2].replace(" ", " ") # nbsp --> nnbsp
            aAction[4] = aAction[4].replace("« ", "« ").replace(" »", " »").replace(" :", " :").replace(" :", " :")
            aAction[4] = aAction[4].replace(" ", " ") # nbsp --> nnbsp
            aAction[4] = aAction[4].replace("« ", "« ").replace(" »", " »").replace(" :", " :").replace(" ;", " ;").replace(" ?", " ?").replace(" !", " :")
    # js regexes
    lRuleJS[1], lNegLookBehindRegex = regex2js(dJSREGEXES.get(lRuleJS[3], lRuleJS[1]), sWORDLIMITLEFT)
    lRuleJS.append(lNegLookBehindRegex)
    return lRuleJS


def writeRulesToJSArray (lRules):

Modified gc_core/js/lang_core/gc_engine.js from [a6cc51f719] to [a05d9994e2].

301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
301
302
303
304
305
306
307

308
309
310
311
312
313
314







-







                    if (!gc_engine.aIgnoredRules.has(sRuleId)) {
                        while ((m = zRegex.gl_exec2(sText, lGroups, lNegLookBefore)) !== null) {
                            let bCondMemo = null;
                            for (let [sFuncCond, cActionType, sWhat, ...eAct] of lActions) {
                                // action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ]
                                try {
                                    bCondMemo = (!sFuncCond || gc_functions[sFuncCond](sText, sText0, m, this.dTokenPos, sCountry, bCondMemo));
                                    //bCondMemo = (!sFuncCond || oEvalFunc[sFuncCond](sText, sText0, m, this.dTokenPos, sCountry, bCondMemo));
                                    if (bCondMemo) {
                                        switch (cActionType) {
                                            case "-":
                                                // grammar error
                                                //console.log("-> error detected in " + sLineId + "\nzRegex: " + zRegex.source);
                                                let nErrorStart = nOffset + m.start[eAct[0]];
                                                if (!this.dError.has(nErrorStart) || nPriority > this.dErrorPriority.get(nErrorStart)) {
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
326
327
328
329
330
331
332

333
334
335
336
337
338
339







-







                                                    console.log("~ " + sText + "  -- " + m[eAct[0]] + "  # " + sLineId);
                                                }
                                                break;
                                            case "=":
                                                // disambiguation
                                                //console.log("-> disambiguation by " + sLineId + "\nzRegex: " + zRegex.source);
                                                gc_functions[sWhat](sText, m, this.dTokenPos);
                                                //oEvalFunc[sWhat](sText, m, this.dTokenPos);
                                                if (bDebug) {
                                                    console.log("= " + m[0] + "  # " + sLineId, "\nDA:", this.dTokenPos);
                                                }
                                                break;
                                            case ">":
                                                // we do nothing, this test is just a condition to apply all following actions
                                                break;
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
658
659
660
661
662
663
664

665
666
667
668
669
670
671







-







                    // TextProcessor [ option, condition, "~", replacement/suggestion/action, iTokenStart, iTokenEnd, bCaseSvty ]
                    // Disambiguator [ option, condition, "=", replacement/suggestion/action ]
                    // Tag           [ option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ]
                    // Immunity      [ option, condition, "!", "",                            iTokenStart, iTokenEnd ]
                    // Test          [ option, condition, ">", "" ]
                    if (!sOption || dOptions.gl_get(sOption, false)) {
                        bCondMemo = !sFuncCond || gc_functions[sFuncCond](this.lTokens, nTokenOffset, nLastToken, sCountry, bCondMemo, this.dTags, this.sSentence, this.sSentence0);
                        //bCondMemo = !sFuncCond || oEvalFunc[sFuncCond](this.lTokens, nTokenOffset, nLastToken, sCountry, bCondMemo, this.dTags, this.sSentence, this.sSentence0);
                        if (bCondMemo) {
                            if (cActionType == "-") {
                                // grammar error
                                let [iTokenStart, iTokenEnd, cStartLimit, cEndLimit, bCaseSvty, nPriority, sMessage, iURL] = eAct;
                                let nTokenErrorStart = (iTokenStart > 0) ? nTokenOffset + iTokenStart : nLastToken + iTokenStart;
                                if (!this.lTokens[nTokenErrorStart].hasOwnProperty("sImmunity") || (this.lTokens[nTokenErrorStart]["sImmunity"] != "*" && !this.lTokens[nTokenErrorStart]["sImmunity"].includes(sOption))) {
                                    let nTokenErrorEnd = (iTokenEnd > 0) ? nTokenOffset + iTokenEnd : nLastToken + iTokenEnd;
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
691
692
693
694
695
696
697

698
699
700
701
702
703
704







-







                                if (bDebug) {
                                    console.log(`    TEXT_PROCESSOR: [${this.lTokens[nTokenStart]["sValue"]}:${this.lTokens[nTokenEnd]["sValue"]}]  > ${sWhat}`);
                                }
                            }
                            else if (cActionType == "=") {
                                // disambiguation
                                gc_functions[sWhat](this.lTokens, nTokenOffset, nLastToken);
                                //oEvalFunc[sWhat](this.lTokens, nTokenOffset, nLastToken);
                                if (bDebug) {
                                    console.log(`    DISAMBIGUATOR: (${sWhat})  [${this.lTokens[nTokenOffset+1]["sValue"]}:${this.lTokens[nLastToken]["sValue"]}]`);
                                }
                            }
                            else if (cActionType == ">") {
                                // we do nothing, this test is just a condition to apply all following actions
                                if (bDebug) {
778
779
780
781
782
783
784
785
786

787
788
789
790

791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811

812
813
814
815

816
817
818
819
820
821
822
823
824
825
826
827
828
829
774
775
776
777
778
779
780


781
782
783
784

785
786
787
788
789
790
791

792
793
794
795
796
797
798
799
800
801
802
803


804
805
806
807

808
809
810
811
812
813
814

815
816
817
818
819
820
821







-
-
+



-
+






-












-
-
+



-
+






-







    _createErrorFromRegex (sText, sText0, sSugg, nOffset, m, iGroup, sLineId, sRuleId, bCaseSvty, sMsg, sURL, bShowRuleId, sOption, bContext) {
        let nStart = nOffset + m.start[iGroup];
        let nEnd = nOffset + m.end[iGroup];
        // suggestions
        let lSugg = [];
        if (sSugg.startsWith("=")) {
            sSugg = gc_functions[sSugg.slice(1)](sText, m);
            //sSugg = oEvalFunc[sSugg.slice(1)](sText, m);
            lSugg = (sSugg) ? sSugg.split("|") : [];
            lSugg = (sSugg) ? sSugg.replace(/ /g, " ").split("|") : [];
        } else if (sSugg == "_") {
            lSugg = [];
        } else {
            lSugg = sSugg.gl_expand(m).split("|");
            lSugg = sSugg.gl_expand(m).replace(/ /g, " ").split("|");
        }
        if (bCaseSvty && lSugg.length > 0 && m[iGroup].slice(0,1).gl_isUpperCase()) {
            lSugg = (m[iGroup].gl_isUpperCase()) ? lSugg.map((s) => s.toUpperCase()) : capitalizeArray(lSugg);
        }
        // Message
        let sMessage = (sMsg.startsWith("=")) ? gc_functions[sMsg.slice(1)](sText, m) : sMsg.gl_expand(m);
        //let sMessage = (sMsg.startsWith("=")) ? oEvalFunc[sMsg.slice(1)](sText, m) : sMsg.gl_expand(m);
        if (bShowRuleId) {
            sMessage += "  #" + sLineId + " / " + sRuleId;
        }
        //
        return this._createError(nStart, nEnd, sLineId, sRuleId, sOption, sMessage, lSugg, sURL, bContext);
    }

    _createErrorFromTokens (sSugg, nTokenOffset, nLastToken, iFirstToken, nStart, nEnd, sLineId, sRuleId, bCaseSvty, sMsg, sURL, bShowRuleId, sOption, bContext) {
        // suggestions
        let lSugg = [];
        if (sSugg.startsWith("=")) {
            sSugg = gc_functions[sSugg.slice(1)](this.lTokens, nTokenOffset, nLastToken);
            //sSugg = oEvalFunc[sSugg.slice(1)](this.lTokens, nTokenOffset, nLastToken);
            lSugg = (sSugg) ? sSugg.split("|") : [];
            lSugg = (sSugg) ? sSugg.replace(/ /g, " ").split("|") : [];
        } else if (sSugg == "_") {
            lSugg = [];
        } else {
            lSugg = this._expand(sSugg, nTokenOffset, nLastToken).split("|");
            lSugg = this._expand(sSugg, nTokenOffset, nLastToken).replace(/ /g, " ").split("|");
        }
        if (bCaseSvty && lSugg.length > 0 && this.lTokens[iFirstToken]["sValue"].slice(0,1).gl_isUpperCase()) {
            lSugg = (this.sSentence.slice(nStart, nEnd).gl_isUpperCase()) ? lSugg.map((s) => s.toUpperCase()) : capitalizeArray(lSugg);
        }
        // Message
        let sMessage = (sMsg.startsWith("=")) ? gc_functions[sMsg.slice(1)](this.lTokens, nTokenOffset, nLastToken) : this._expand(sMsg, nTokenOffset, nLastToken);
        //let sMessage = (sMsg.startsWith("=")) ? oEvalFunc[sMsg.slice(1)](this.lTokens, nTokenOffset, nLastToken) : this._expand(sMsg, nTokenOffset, nLastToken);
        if (bShowRuleId) {
            sMessage += "  #" + sLineId + " / " + sRuleId;
        }
        //
        return this._createError(nStart, nEnd, sLineId, sRuleId, sOption, sMessage, lSugg, sURL, bContext);
    }

870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
862
863
864
865
866
867
868

869
870
871
872
873
874
875







-







            sNew = "_".repeat(ln);
        }
        else if (sRepl === "@") {
            sNew = "@".repeat(ln);
        }
        else if (sRepl.slice(0,1) === "=") {
            sNew = gc_functions[sRepl.slice(1)](sText, m);
            //sNew = oEvalFunc[sRepl.slice(1)](sText, m);
            sNew = sNew + " ".repeat(ln-sNew.length);
            if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) {
                sNew = sNew.gl_toCapitalize();
            }
        } else {
            sNew = sRepl.gl_expand(m);
            sNew = sNew + " ".repeat(ln-sNew.length);
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
908
909
910
911
912
913
914

915
916
917
918
919
920
921







-







                    this.lTokens[i]["sNewValue"] = "_";
                }
            }
        }
        else {
            if (sWhat.startsWith("=")) {
                sWhat = gc_functions[sWhat.slice(1)](this.lTokens, nTokenOffset, nLastToken);
                //sWhat = oEvalFunc[sWhat.slice(1)](this.lTokens, nTokenOffset, nLastToken);
            } else {
                sWhat = this._expand(sWhat, nTokenOffset, nLastToken);
            }
            let bUppercase = bCaseSvty && this.lTokens[nTokenRewriteStart]["sValue"].slice(0,1).gl_isUpperCase();
            if (nTokenRewriteEnd - nTokenRewriteStart == 0) {
                // one token
                if (bUppercase) {

Modified gc_lang/fr/rules.grx from [08e4bb7dc6] to [4ade6e87a4].

1303
1304
1305
1306
1307
1308
1309
1310
1311


1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329

1330





1331
1332
1333
1334
1335
1336
1337
1303
1304
1305
1306
1307
1308
1309


1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328

1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342







-
-
+
+

















-
+

+
+
+
+
+







!!!! Grands nombres                                                                               !!
!!
!!

__[s]/num(num_grand_nombre_soudé)__
    \d\d\d\d+
        <<- not before("NF[  -]?(C|E|P|Q|X|Z|EN(?:[  -]ISO|)) *$") >>>
        <<- ((after("^(?:,[0-9]+[⁰¹²³⁴⁵⁶⁷⁸⁹]?|[⁰¹²³⁴⁵⁶⁷⁸⁹])") and not (re.search("^[01]+$", \0) and after("^,[01]+\\b")))
            or after("^[   ]*(?:[kcmµn]?(?:[slgJKΩ]|m[²³]?|Wh?|Hz|dB)|[%‰€$£¥Åℓhj]|min|°C|℃)(?![\\w’'])"))
        <<- (after("^(?:,[0-9]+[⁰¹²³⁴⁵⁶⁷⁸⁹]?|[⁰¹²³⁴⁵⁶⁷⁸⁹])") and not (re.search("^[01]+$", \0) and after("^,[01]+\\b")))
            or after("^[   ]*(?:[kcmµn]?(?:[slgJKΩ]|m[²³]?|Wh?|Hz|dB)|[%‰€$£¥Åℓhj]|min|°C|℃)(?![\\w’'])")
        ->> =formatNumber(\0, True)                                                                 && Formatage des grands nombres.
        <<- __else__ and \0.__len__() > 4 ->> =formatNumber(\0)                                     && Formatage des grands nombres.

__[s]/num(num_nombre_quatre_chiffres)__
    (?<!,)(\d\d\d\d)[  ]([a-zA-Zµ][a-zA-Z0-9Ωℓ⁰¹²³⁴⁵⁶⁷⁸⁹/·]*) @@0,$
        <<- morph(\2, ";S", ":[VCR]") or mbUnit(\2) -1>> =formatNumber(\1, True)                    && Formatage des grands nombres.

TEST: {{12345}}                                 ->> 12 345
TEST: {{123456}}                                ->> 123 456
TEST: {{1234567}}                               ->> 1 234 567
TEST: {{12345678}}                              ->> 12 345 678
TEST: {{023456789}}                             ->> 023 456 789|023 45 67 89|02 345 67 89
TEST: {{0234567890}}                            ->> 0 234 567 890|02 34 56 78 90|023 456 78 90|0234 567-890
TEST: {{12345678901}}                           ->> 12 345 678 901
TEST: {{112798931830912839}}                    ->> 112 798 931 830 912 839
TEST: {{2308393909}}                            ->> 2 308 393 909|2308 393-909
TEST: {{2024}},9                                ->> 2 024
TEST: {{4000}} Å                                ->> 4 000
TEST: {{4000}}Å                                ->> 4 000
TEST: {{10010}} €                               ->> 10 010
TEST: {{12010}} kg                              ->> 12 010
TEST: {{12010}} m²                              ->> 12 010
TEST: {{12010}} dB                              ->> 12 010
TEST: {{12010}} %                               ->> 12 010
TEST: {{11010}} min                             ->> 11 010
TEST: {{3240}} µA                               ->> 3 240
TEST: 1111 0011,01 (binaire)


__[s>(num_grand_nombre_avec_points)__
    \d\d?\d?(?:[.]\d\d\d)+(?![0-9])
        <<- option("num") ->> =\0.replace(".", " ")                                                 && Grands nombres : utilisez des espaces insécables plutôt que des points.