Grammalecte  Changes On Branch b08f2ef338ece054

Changes In Branch webext2_illusion Excluding Merge-Ins

This is equivalent to a diff from e918b25b89 to b08f2ef338

2017-08-08
10:51
[fr] faux positif: confusion <est/et> check-in: a34171be91 user: olr tags: trunk, fr
2017-08-06
00:44
[core][js] ajout des ; oubliés et ajustement des ; en trop (commit erroné) Closed-Leaf check-in: b08f2ef338 user: IllusionPerdu tags: dead, webext2_illusion
2017-08-05
19:50
[fr] màj: confusion <à/a> dans pronom +à l’air +adj check-in: e918b25b89 user: olr tags: trunk, fr
16:20
[fr] nr: confusion <verre/ver/vert/vers> check-in: 699b424b98 user: olr tags: trunk, fr

Modified gc_core/js/helpers.js from [da0905c944] to [8fee48eb89].

1
2
3
4
5
6
7
8
9


10
11
12



13
14
15
16
17
18
19
20
21








22
23
24
25
26
27
28
29
30
31
32
33











34
35
36
37
38
39
40
41
42








43
44
45
46
47




48
49
50
51
52




53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68















69
70
71
72
73
74
75
76
77
78
79









80
81
82
83
84
85
86
87








88
89
90
91
92
93
94
95
96
97







98
1
2
3
4
5
6
7
8
9
10
11



12
13
14
15








16
17
18
19
20
21
22
23
24











25
26
27
28
29
30
31
32
33
34
35
36








37
38
39
40
41
42
43
44
45




46
47
48
49





50
51
52
53
















54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69










70
71
72
73
74
75
76
77
78
79







80
81
82
83
84
85
86
87
88
89
90







91
92
93
94
95
96
97
98









+
+
-
-
-
+
+
+

-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+

-
-
-
-
+
+
+
+
-
-
-
-
-
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
+
+
+
+
+
+
+
+



-
-
-
-
-
-
-
+
+
+
+
+
+
+


// HELPERS

"use strict";

// In Firefox, there is no console.log in PromiseWorker, but there is worker.log.
// In Thunderbird, you can’t access to console directly. So it’s required to pass a log function.
let funcOutput = null;

var helpers = {

function setLogOutput (func) {
    funcOutput = func;
}
    setLogOutput: function (func) {
        funcOutput = func;
    },

function echo (obj) {
    if (funcOutput !== null) {
        funcOutput(obj);
    } else {
        console.log(obj);
    }
    return true;
}
    echo: function (obj) {
        if (funcOutput !== null) {
            funcOutput(obj);
        } else {
            console.log(obj);
        }
        return true;
    },

function logerror (e, bStack=false) {
    let sMsg = "\n" + e.fileName + "\n" + e.name + "\nline: " + e.lineNumber + "\n" + e.message;
    if (bStack) {
        sMsg += "\n--- Stack ---\n" + e.stack;
    }
    if (funcOutput !== null) {
        funcOutput(sMsg);
    } else {
        console.error(sMsg);
    }
}
    logerror: function (e, bStack=false) {
        let sMsg = "\n" + e.fileName + "\n" + e.name + "\nline: " + e.lineNumber + "\n" + e.message;
        if (bStack) {
            sMsg += "\n--- Stack ---\n" + e.stack;
        }
        if (funcOutput !== null) {
            funcOutput(sMsg);
        } else {
            console.error(sMsg);
        }
    },

function inspect (o) {
    let sMsg = "__inspect__: " + typeof o;
    for (let sParam in o) {
        sMsg += "\n" + sParam + ": " + o.sParam;
    }
    sMsg += "\n" + JSON.stringify(o) + "\n__end__";
    echo(sMsg);
}
    inspect: function (o) {
        let sMsg = "__inspect__: " + typeof o;
        for (let sParam in o) {
            sMsg += "\n" + sParam + ": " + o.sParam;
        }
        sMsg += "\n" + JSON.stringify(o) + "\n__end__";
        this.echo(sMsg);
    },


// load ressources in workers (suggested by Mozilla extensions reviewers)
// for more options have a look here: https://gist.github.com/Noitidart/ec1e6b9a593ec7e3efed
// if not in workers, use sdk/data.load() instead
    loadFile: function (spf) {
        // load ressources in workers (suggested by Mozilla extensions reviewers)
        // for more options have a look here: https://gist.github.com/Noitidart/ec1e6b9a593ec7e3efed
        // if not in workers, use sdk/data.load() instead
function loadFile (spf) {
    try {
        let xRequest;
        if (typeof XMLHttpRequest !== "undefined") {
            xRequest = new XMLHttpRequest();
        try {
            let xRequest;
            if (typeof XMLHttpRequest !== "undefined") {
                xRequest = new XMLHttpRequest();
        }
        else {
            // JS bullshit again… necessary for Thunderbird
            let { Cc, Ci } = require("chrome");
            xRequest = Cc["@mozilla.org/xmlextras/xmlhttprequest;1"].createInstance();
            xRequest.QueryInterface(Ci.nsIXMLHttpRequest);
        }
        xRequest.open('GET', spf, false); // 3rd arg is false for synchronous, sync is acceptable in workers
        xRequest.send();
        return xRequest.responseText;
    }
    catch (e) {
        logerror(e);
        return null
    }
}
            } else {
                // JS bullshit again… necessary for Thunderbird
                let { Cc, Ci } = require("chrome");
                xRequest = Cc["@mozilla.org/xmlextras/xmlhttprequest;1"].createInstance();
                xRequest.QueryInterface(Ci.nsIXMLHttpRequest);
            }
            xRequest.open('GET', spf, false); // 3rd arg is false for synchronous, sync is acceptable in workers
            xRequest.send();
            return xRequest.responseText;
        }
        catch (e) {
            this.logerror(e);
            return null;
        }
    },


// conversions
function objectToMap (obj) {
    let m = new Map();
    for (let param in obj) {
        //console.log(param + " " + obj[param]);
        m.set(param, obj[param]);
    }
    return m;
}
    // conversions
    objectToMap: function (obj) {
        let m = new Map();
        for (let param in obj) {
            //console.log(param + " " + obj[param]);
            m.set(param, obj[param]);
        }
        return m;
    },

function mapToObject (m) {
    let obj = {};
    for (let [k, v] of m) {
        obj[k] = v;
    }
    return obj;
}
    mapToObject: function (m) {
        let obj = {};
        for (let [k, v] of m) {
            obj[k] = v;
        }
        return obj;
    }
};


if (typeof(exports) !== 'undefined') {
    exports.setLogOutput = setLogOutput;
    exports.echo = echo;
    exports.logerror = logerror;
    exports.inspect = inspect;
    exports.loadFile = loadFile;
    exports.objectToMap = objectToMap;
    exports.mapToObject = mapToObject;
    exports.setLogOutput = helpers.setLogOutput;
    exports.echo = helpers.echo;
    exports.logerror = helpers.logerror;
    exports.inspect = helpers.inspect;
    exports.loadFile = helpers.loadFile;
    exports.objectToMap = helpers.objectToMap;
    exports.mapToObject = helpers.mapToObject;
}

Modified gc_core/js/ibdawg.js from [e748c8288e] to [cdddd20c84].

1
2
3
4


5
6



7
8
9
10
11
12
13
14
15
16
17
18

19

20

21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

45
46

47
48

49
50
51
52
53
54
55
1
2
3
4
5
6


7
8
9
10
11
12
13
14
15
16
17
18
19
20

21
22
23

24
25


26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45

46
47

48
49

50
51
52
53
54
55
56
57




+
+
-
-
+
+
+











-
+

+
-
+

-
-




















-
+

-
+

-
+







//// IBDAWG

"use strict";


if (typeof(require) !== 'undefined') {
const st = require("resource://grammalecte/str_transform.js");
const helpers = require("resource://grammalecte/helpers.js");
    var str_transform = require("resource://grammalecte/str_transform.js");
    var helpers = require("resource://grammalecte/helpers.js");
}


// String
// Don’t remove. Necessary in TB.
${string}



class IBDAWG {
    // INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH

    constructor (sDicName) {
    constructor (sDicName, sPath="") {
        try {
            let sURL = (sPath !== "") ? sPath + "/" + sDicName : "resource://grammalecte/_dictionaries/"+sDicName;
            const dict = JSON.parse(helpers.loadFile("resource://grammalecte/_dictionaries/"+sDicName));
            const dict = JSON.parse(helpers.loadFile(sURL));
            Object.assign(this, dict);
            //const dict = require("resource://grammalecte/"+sLang+"/dictionary.js");
            //Object.assign(this, dict.dictionary);
        }
        catch (e) {
            throw Error("# Error. File not found or not loadable.\n" + e.message + "\n");
        }
        /*
            Properties:
            sName, nVersion, sHeader, lArcVal, nArcVal, byDic, sLang, nChar, nBytesArc, nBytesNodeAddress,
            nEntries, nNode, nArc, nAff, cStemming, nTag, dChar, _arcMask, _finalNodeMask, _lastArcMask, _addrBitMask, nBytesOffset,
        */
        if (!this.sHeader.startsWith("/pyfsa/")) {
            throw TypeError("# Error. Not a pyfsa binary dictionary. Header: " + this.sHeader);
        }
        if (!(this.nVersion == "1" || this.nVersion == "2" || this.nVersion == "3")) {
            throw RangeError("# Error. Unknown dictionary version: " + this.nVersion);
        }

        this.dChar = helpers.objectToMap(this.dChar);
        //this.byDic = new Uint8Array(this.byDic);  // not quicker, even slower

        if (this.cStemming == "S") {
            this.funcStemming = st.getStemFromSuffixCode;
            this.funcStemming = str_transform.getStemFromSuffixCode;
        } else if (this.cStemming == "A") {
            this.funcStemming = st.getStemFromAffixCode;
            this.funcStemming = str_transform.getStemFromAffixCode;
        } else {
            this.funcStemming = st.noStemming;
            this.funcStemming = str_transform.noStemming;
        }

        // Configuring DAWG functions according to nVersion
        switch (this.nVersion) {
            case 1:
                this.morph = this._morph1;
                this.stem = this._stem1;
70
71
72
73
74
75
76
77

78
79
80
81
82
83
84

85
86
87
88
89
90
91
92
93
94
95
96
97
98

99
100
101
102
103
104
105
72
73
74
75
76
77
78

79
80
81
82
83
84
85

86
87
88
89
90
91
92
93
94
95
96
97
98
99

100
101
102
103
104
105
106
107







-
+






-
+













-
+







                break;
            default:
                throw ValueError("# Error: unknown code: " + this.nVersion);
        }
        //console.log(this.getInfo());
        this.bOptNumSigle = true;
        this.bOptNumAtLast = false;
    };
    }

    getInfo () {
        return  `  Language: ${this.sLang}      Version: ${this.nVersion}      Stemming: ${this.cStemming}FX\n` +
                `  Arcs values:  ${this.nArcVal} = ${this.nChar} characters,  ${this.nAff} affixes,  ${this.nTag} tags\n` +
                `  Dictionary: ${this.nEntries} entries,    ${this.nNode} nodes,   ${this.nArc} arcs\n` +
                `  Address size: ${this.nBytesNodeAddress} bytes,  Arc size: ${this.nBytesArc} bytes\n`;
    };
    }

    isValidToken (sToken) {
        // checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)
        if (this.isValid(sToken)) {
            return true;
        }
        if (sToken.includes("-")) {
            if (sToken.gl_count("-") > 4) {
                return true;
            }
            return sToken.split("-").every(sWord  =>  this.isValid(sWord)); 
        }
        return false;
    };
    }

    isValid (sWord) {
        // checks if sWord is valid (different casing tested if the first letter is a capital)
        if (!sWord) {
            return null;
        }
        if (sWord.includes("’")) { // ugly hack
121
122
123
124
125
126
127
128

129
130
131
132
133
134
135
136
137
138
139

140
141
142
143
144
145
146
147
148
149
150
151
152
153
154

155
156
157
158
159
160
161
162
163
164
165
166

167
168
169
170

171
172
173
174
175
176
177
123
124
125
126
127
128
129

130
131
132
133
134
135
136
137
138
139
140

141
142
143
144
145
146
147
148
149
150
151
152
153
154
155

156
157
158
159
160
161
162
163
164
165
166
167

168
169
170
171

172
173
174
175
176
177
178
179







-
+










-
+














-
+











-
+



-
+







                }
                return !!this.lookup(sWord.slice(0, 1).toLowerCase() + sWord.slice(1));
            } else {
                return !!this.lookup(sWord.toLowerCase());
            }
        }
        return false;
    };
    }

    _convBytesToInteger (aBytes) {
        // Byte order = Big Endian (bigger first)
        let nVal = 0;
        let nWeight = (aBytes.length - 1) * 8;
        for (let n of aBytes) {
            nVal += n << nWeight;
            nWeight = nWeight - 8;
        }
        return nVal;
    };
    }

    lookup (sWord) {
        // returns true if sWord in dictionary (strict verification)
        let iAddr = 0;
        for (let c of sWord) {
            if (!this.dChar.has(c)) {
                return false;
            }
            iAddr = this._lookupArcNode(this.dChar.get(c), iAddr);
            if (iAddr === null) {
                return false;
            }
        }
        return Boolean(this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask);
    };
    }

    getMorph (sWord) {
        // retrieves morphologies list, different casing allowed
        let l = this.morph(sWord);
        if (sWord[0].gl_isUpperCase()) {
            l = l.concat(this.morph(sWord.toLowerCase()));
            if (sWord.gl_isUpperCase() && sWord.length > 1) {
                l = l.concat(this.morph(sWord.gl_toCapitalize()));
            }
        }
        return l;
    };
    }

    // morph (sWord) {
    //     is defined in constructor
    // };
    // }
    
    // VERSION 1
    _morph1 (sWord) {
        // returns morphologies of sWord
        let iAddr = 0;
        for (let c of sWord) {
            if (!this.dChar.has(c)) {
203
204
205
206
207
208
209
210

211
212
213
214
215
216
217
205
206
207
208
209
210
211

212
213
214
215
216
217
218
219







-
+







                    }
                }
                iAddr = iEndArcAddr + this.nBytesNodeAddress;
            }
            return l;
        }
        return [];
    };
    }

    _stem1 (sWord) {
        // returns stems list of sWord
        let iAddr = 0;
        for (let c of sWord) {
            if (!this.dChar.has(c)) {
                return [];
233
234
235
236
237
238
239
240

241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260

261
262
263
264
265

266
267
268
269

270
271
272
273

274
275
276
277
278
279

280
281
282
283

284
285
286
287

288
289
290
291
292
293
235
236
237
238
239
240
241

242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261

262
263
264
265
266

267
268
269
270

271
272
273
274

275
276
277
278
279
280

281
282
283
284

285
286
287
288

289
290
291
292
293
294
295







-
+



















-
+




-
+



-
+



-
+





-
+



-
+



-
+






                    l.push(this.funcStemming(sWord, this.lArcVal[nArc]));
                }
                iAddr = iEndArcAddr + this.nBytesNodeAddress;
            }
            return l;
        }
        return [];
    };
    }

    _lookupArcNode1 (nVal, iAddr) {
        // looks if nVal is an arc at the node at iAddr, if yes, returns address of next node else None
        while (true) {
            let iEndArcAddr = iAddr+this.nBytesArc;
            let nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr));
            if (nVal == (nRawArc & this._arcMask)) {
                // the value we are looking for 
                // we return the address of the next node
                return this._convBytesToInteger(this.byDic.slice(iEndArcAddr, iEndArcAddr+this.nBytesNodeAddress));
            }
            else {
                // value not found
                if (nRawArc & this._lastArcMask) {
                    return null;
                }
                iAddr = iEndArcAddr + this.nBytesNodeAddress;
            }
        }
    };
    }

    // VERSION 2
    _morph2 (sWord) {
        // to do
    };
    }

    _stem2 (sWord) {
        // to do
    };
    }

    _lookupArcNode2 (nVal, iAddr) {
        // to do
    };
    }


    // VERSION 3
    _morph3 (sWord) {
        // to do
    };
    }

    _stem3 (sWord) {
        // to do
    };
    }

    _lookupArcNode3 (nVal, iAddr) {
        // to do
    };
    }
}


if (typeof(exports) !== 'undefined') {
    exports.IBDAWG = IBDAWG;
}

Modified gc_core/js/jsex_map.js from [985754ee9e] to [ca76af0666].

1
2
3
4
5
6
7
8
9
10
11

12
13
14
15
16
17
18
19

20
21
22
23
24
25
26
27
28

29
30

31
32
33
34
35
36

37
38
39
40
41
42
43
44

45
46
47
1
2
3
4
5
6
7
8
9
10

11
12
13
14
15
16
17
18

19
20
21
22
23
24
25
26
27

28
29

30
31
32
33
34
35

36
37
38
39
40
41
42
43

44
45
46
47










-
+







-
+








-
+

-
+





-
+







-
+




// Map

if (Map.prototype.grammalecte === undefined) {
    Map.prototype.gl_shallowCopy = function () {
        let oNewMap = new Map();
        for (let [key, val] of this.entries()) {
            oNewMap.set(key, val);
        }
        return oNewMap;
    }
    };

    Map.prototype.gl_get = function (key, defaultValue) {
        let res = this.get(key);
        if (res !== undefined) {
            return res;
        }
        return defaultValue;
    }
    };

    Map.prototype.gl_toString = function () {
        // Default .toString() gives nothing useful
        let sRes = "{ ";
        for (let [k, v] of this.entries()) {
            sRes += (typeof k === "string") ? '"' + k + '": ' : k.toString() + ": ";
            sRes += (typeof v === "string") ? '"' + v + '", ' : v.toString() + ", ";
        }
        sRes = sRes.slice(0, -2) + " }"
        sRes = sRes.slice(0, -2) + " }";
        return sRes;
    }
    };

    Map.prototype.gl_update = function (dDict) {
        for (let [k, v] of dDict.entries()) {
            this.set(k, v);
        }
    }
    };

    Map.prototype.gl_updateOnlyExistingKeys = function (dDict) {
        for (let [k, v] of dDict.entries()) {
            if (this.has(k)){
                this.set(k, v);
            }
        }
    }
    };

    Map.prototype.grammalecte = true;
}

Modified gc_core/js/jsex_regex.js from [b8b02d05dd] to [8feeee694f].

38
39
40
41
42
43
44
45

46
47

48
49
50
51
52

53
54
55
56
57

58
59
60
61
62
63
64
38
39
40
41
42
43
44

45
46

47
48
49
50
51

52
53
54
55
56

57
58
59
60
61
62
63
64







-
+

-
+




-
+




-
+







                            m.end.push(m.index + codePos + m[i].length);
                        } else if (codePos === "$") {
                            // at the end of the pattern
                            m.start.push(this.lastIndex - m[i].length);
                            m.end.push(this.lastIndex);
                        } else if (codePos === "w") {
                            // word in the middle of the pattern
                            iPos = m[0].search("[ ’,()«»“”]"+m[i]+"[ ,’()«»“”]") + 1 + m.index
                            iPos = m[0].search("[ ’,()«»“”]"+m[i]+"[ ,’()«»“”]") + 1 + m.index;
                            m.start.push(iPos);
                            m.end.push(iPos + m[i].length)
                            m.end.push(iPos + m[i].length);
                        } else if (codePos === "*") {
                            // anywhere
                            iPos = m[0].indexOf(m[i]) + m.index;
                            m.start.push(iPos);
                            m.end.push(iPos + m[i].length)
                            m.end.push(iPos + m[i].length);
                        } else if (codePos === "**") {
                            // anywhere after previous group
                            iPos = m[0].indexOf(m[i], m.end[i-1]-m.index) + m.index;
                            m.start.push(iPos);
                            m.end.push(iPos + m[i].length)
                            m.end.push(iPos + m[i].length);
                        } else if (codePos.startsWith(">")) {
                            // >x:_
                            // todo: look in substring x
                            iPos = m[0].indexOf(m[i]) + m.index;
                            m.start.push(iPos);
                            m.end.push(iPos + m[i].length);
                        } else {
79
80
81
82
83
84
85
86

87
88
89
79
80
81
82
83
84
85

86
87
88
89







-
+



            if (typeof(helpers) !== "undefined") {
                helpers.logerror(e);
            } else {
                console.error(e);
            }
        }
        return m;
    }
    };

    RegExp.prototype.grammalecte = true;
}

Modified gc_core/js/jsex_string.js from [1e9c89a872] to [86533aa4da].

11
12
13
14
15
16
17
18

19
20
21

22
23
24

25
26
27

28
29
30

31
32
33

34
35
36
37
38
39
40
41

42
43
44
45

46
47
48
49

50
51
52
53
54

55
56
57
11
12
13
14
15
16
17

18
19
20

21
22
23

24
25
26

27
28
29

30
31
32

33
34
35
36
37
38
39
40

41
42
43
44

45
46
47
48

49
50
51
52
53

54
55
56
57







-
+


-
+


-
+


-
+


-
+


-
+







-
+



-
+



-
+




-
+



        let iPos = 0;
        let nStep = (bOverlapping) ? 1 : sSearch.length;
        while ((iPos = this.indexOf(sSearch, iPos)) >= 0) {
            nOccur++;
            iPos += nStep;
        }
        return nOccur;
    }
    };
    String.prototype.gl_isDigit = function () {
        return (this.search(/^[0-9⁰¹²³⁴⁵⁶⁷⁸⁹]+$/) !== -1);
    }
    };
    String.prototype.gl_isLowerCase = function () {
        return (this.search(/^[a-zà-öø-ÿ0-9-]+$/) !== -1);
    }
    };
    String.prototype.gl_isUpperCase = function () {
        return (this.search(/^[A-ZÀ-ÖØ-ߌ0-9-]+$/) !== -1);
    }
    };
    String.prototype.gl_isTitle = function () {
        return (this.search(/^[A-ZÀ-ÖØ-ߌ][a-zà-öø-ÿ'’-]+$/) !== -1);
    }
    };
    String.prototype.gl_toCapitalize = function () {
        return this.slice(0,1).toUpperCase() + this.slice(1).toLowerCase();
    }
    };
    String.prototype.gl_expand = function (oMatch) {
        let sNew = this;
        for (let i = 0; i < oMatch.length ; i++) {
            let z = new RegExp("\\\\"+parseInt(i), "g");
            sNew = sNew.replace(z, oMatch[i]);
        }
        return sNew;
    }
    };
    String.prototype.gl_trimRight = function (sChars) {
        let z = new RegExp("["+sChars+"]+$");
        return this.replace(z, "");
    }
    };
    String.prototype.gl_trimLeft = function (sChars) {
        let z = new RegExp("^["+sChars+"]+");
        return this.replace(z, "");
    }
    };
    String.prototype.gl_trim = function (sChars) {
        let z1 = new RegExp("^["+sChars+"]+");
        let z2 = new RegExp("["+sChars+"]+$");
        return this.replace(z1, "").replace(z2, "");
    }
    };

    String.prototype.grammalecte = true;
}

Modified gc_core/js/lang_core/gc_engine.js from [ce3013cd59] to [2f4877bef2].

1
2


3
4
5
6










7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

31
32
33
34
35
36
37
38

39
40
41
42
43

44
45











46

47
48
49
50
51
52
53
54
55








56
57
58
59
60
61
62
63
64
65
66










67
68
69
70
71
72
73
74
75
76
77
78
79
80













81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97




















98
99
100
101
102
103
104
105
106
107









108
109
110
111
112
113
114
115







116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174



























































175
176
177
178
179
180
181
182
183
184
185
186





187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235

















































236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259

























260
261
262
263



264
265
266
267



268
269
270
271



272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295





























296
297

298
299
300
301
302
303
304
305
306
307
308















309
310

311
312
313
314




315
316
317
318




319
320
321
322

323
324
325
326



327
328
329
330



331
332
333
334



335
336
337


338
339
340
341
342
343
344
345

346
347
348
349
350
351
352
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28






29







30








31

32
33
34

35
36
37
38
39
40
41
42
43
44
45
46
47
48

49
50








51
52
53
54
55
56
57
58
59










60
61
62
63
64
65
66
67
68
69
70













71
72
73
74
75
76
77
78
79
80
81
82
83
84
















85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105









106
107
108
109
110
111
112
113
114
115







116
117
118
119
120
121
122
123


























































124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188






189
190
191
192
193
194
















































195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244























245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270



271
272
273
274



275
276
277
278



279
280
281
282























283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312

313
314










315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330

331




332
333
334
335
336



337
338
339
340




341
342



343
344
345
346



347
348
349
350



351
352
353
354


355
356

357

358
359
360


361
362
363
364
365
366
367
368


+
+




+
+
+
+
+
+
+
+
+
+










-
-
-
-
-
-

-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
+
-



-
+


+
+
+
+
+
+
+
+
+
+
+
-
+

-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+






-
-
-
-
-
-
+
+
+
+
+

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
-
-
+
+
+

-
-
-
+
+
+

-
-
-
+
+
+

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
+

-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
+
-
-
-
-
+
+
+
+

-
-
-
+
+
+
+
-
-
-
-
+

-
-
-
+
+
+

-
-
-
+
+
+

-
-
-
+
+
+

-
-
+
+
-

-



-
-
+







// Grammar checker engine

"use strict";

${string}
${regex}
${map}


if (typeof(require) !== 'undefined') {
    var helpers = require("resource://grammalecte/helpers.js");
    var echo = require("resource://grammalecte/helpers.js").echo;
    var gc_options = require("resource://grammalecte/${lang}/gc_options.js");
    var gc_rules = require("resource://grammalecte/${lang}/gc_rules.js");
    var cregex = require("resource://grammalecte/${lang}/cregex.js");
    var text = require("resource://grammalecte/text.js");
}


function capitalizeArray (aArray) {
    // can’t map on user defined function??
    let aNew = [];
    for (let i = 0; i < aArray.length; i = i + 1) {
        aNew[i] = aArray[i].gl_toCapitalize();
    }
    return aNew;
}

const ibdawg = require("resource://grammalecte/ibdawg.js");
const helpers = require("resource://grammalecte/helpers.js");
const gc_options = require("resource://grammalecte/${lang}/gc_options.js");
const cr = require("resource://grammalecte/${lang}/cregex.js");
const text = require("resource://grammalecte/text.js");
const echo = require("resource://grammalecte/helpers.js").echo;

const lang = "${lang}";
const locales = ${loc};
const pkg = "${implname}";
const name = "${name}";
const version = "${version}";
const author = "${author}";

// data
// commons regexes
const _zEndOfSentence = new RegExp ('([.?!:;…][ .?!… »”")]*|.$)', "g");
const _zBeginOfParagraph = new RegExp ("^[-  –—.,;?!…]*", "ig");
const _zEndOfParagraph = new RegExp ("[-  .,;?!…–—]*$", "ig");

// grammar rules and dictionary
//const _rules = require("./gc_rules.js");
let _sContext = "";                                 // what software is running
let _sAppContext = "";                                  // what software is running
const _rules = require("resource://grammalecte/${lang}/gc_rules.js");
let _dOptions = null;
let _aIgnoredRules = new Set();
let _oDict = null;
let _dAnalyses = new Map();                         // cache for data from dictionary
let _dAnalyses = new Map();                             // cache for data from dictionary


var gc_engine = {

    //// Informations

    lang: "${lang}",
    locales: ${loc},
    pkg: "${implname}",
    name: "${name}",
    version: "${version}",
    author: "${author}",

///// Parsing
    //// Parsing

function parse (sText, sCountry="${country_default}", bDebug=false, bContext=false) {
    // analyses the paragraph sText and returns list of errors
    let dErrors;
    let errs;
    let sAlt = sText;
    let dDA = new Map();        // Disamnbiguator
    let dPriority = new Map();  // Key = position; value = priority
    let sNew = "";
    parse: function (sText, sCountry="${country_default}", bDebug=false, bContext=false) {
        // analyses the paragraph sText and returns list of errors
        let dErrors;
        let errs;
        let sAlt = sText;
        let dDA = new Map();        // Disamnbiguator
        let dPriority = new Map();  // Key = position; value = priority
        let sNew = "";

    // parse paragraph
    try {
        [sNew, dErrors] = _proofread(sText, sAlt, 0, true, dDA, dPriority, sCountry, bDebug, bContext);
        if (sNew) {
            sText = sNew;
        }
    }
    catch (e) {
        helpers.logerror(e);
    }
        // parse paragraph
        try {
            [sNew, dErrors] = this._proofread(sText, sAlt, 0, true, dDA, dPriority, sCountry, bDebug, bContext);
            if (sNew) {
                sText = sNew;
            }
        }
        catch (e) {
            helpers.logerror(e);
        }

    // cleanup
    if (sText.includes(" ")) {
        sText = sText.replace(/ /g, ' '); // nbsp
    }
    if (sText.includes(" ")) {
        sText = sText.replace(/ /g, ' '); // snbsp
    }
    if (sText.includes("'")) {
        sText = sText.replace(/'/g, "’");
    }
    if (sText.includes("‑")) {
        sText = sText.replace(/‑/g, "-"); // nobreakdash
    }
        // cleanup
        if (sText.includes(" ")) {
            sText = sText.replace(/ /g, ' '); // nbsp
        }
        if (sText.includes(" ")) {
            sText = sText.replace(/ /g, ' '); // snbsp
        }
        if (sText.includes("'")) {
            sText = sText.replace(/'/g, "’");
        }
        if (sText.includes("‑")) {
            sText = sText.replace(/‑/g, "-"); // nobreakdash
        }

    // parse sentence
    for (let [iStart, iEnd] of _getSentenceBoundaries(sText)) {
        if (4 < (iEnd - iStart) < 2000) {
            dDA.clear();
            //echo(sText.slice(iStart, iEnd));
            try {
                [_, errs] = _proofread(sText.slice(iStart, iEnd), sAlt.slice(iStart, iEnd), iStart, false, dDA, dPriority, sCountry, bDebug, bContext);
                dErrors.gl_update(errs);
            }
            catch (e) {
                helpers.logerror(e);
            }
        }
    }
    return Array.from(dErrors.values());
}
        // parse sentence
        for (let [iStart, iEnd] of this._getSentenceBoundaries(sText)) {
            if (4 < (iEnd - iStart) < 2000) {
                dDA.clear();
                //echo(sText.slice(iStart, iEnd));
                try {
                    [, errs] = this._proofread(sText.slice(iStart, iEnd), sAlt.slice(iStart, iEnd), iStart, false, dDA, dPriority, sCountry, bDebug, bContext);
                    dErrors.gl_update(errs);
                }
                catch (e) {
                    helpers.logerror(e);
                }
            }
        }
        return Array.from(dErrors.values());
    },

    _zEndOfSentence: new RegExp ('([.?!:;…][ .?!… »”")]*|.$)', "g"),
    _zBeginOfParagraph: new RegExp ("^[-  –—.,;?!…]*", "ig"),
    _zEndOfParagraph: new RegExp ("[-  .,;?!…–—]*$", "ig"),

function* _getSentenceBoundaries (sText) {
    let mBeginOfSentence = _zBeginOfParagraph.exec(sText)
    let iStart = _zBeginOfParagraph.lastIndex;
    let m;
    while ((m = _zEndOfSentence.exec(sText)) !== null) {
        yield [iStart, _zEndOfSentence.lastIndex];
        iStart = _zEndOfSentence.lastIndex;
    }
}
    _getSentenceBoundaries: function* (sText) {
        let mBeginOfSentence = this._zBeginOfParagraph.exec(sText)
        let iStart = this._zBeginOfParagraph.lastIndex;
        let m;
        while ((m = this._zEndOfSentence.exec(sText)) !== null) {
            yield [iStart, this._zEndOfSentence.lastIndex];
            iStart = this._zEndOfSentence.lastIndex;
        }
    },

function _proofread (s, sx, nOffset, bParagraph, dDA, dPriority, sCountry, bDebug, bContext) {
    let dErrs = new Map();
    let bChange = false;
    let bIdRule = option('idrule');
    let m;
    let bCondMemo;
    let nErrorStart;
    _proofread: function (s, sx, nOffset, bParagraph, dDA, dPriority, sCountry, bDebug, bContext) {
        let dErrs = new Map();
        let bChange = false;
        let bIdRule = option('idrule');
        let m;
        let bCondMemo;
        let nErrorStart;

    for (let [sOption, lRuleGroup] of _getRules(bParagraph)) {
        if (!sOption || option(sOption)) {
            for (let [zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions, lGroups, lNegLookBefore] of lRuleGroup) {
                if (!_aIgnoredRules.has(sRuleId)) {
                    while ((m = zRegex.gl_exec2(s, lGroups, lNegLookBefore)) !== null) {
                        bCondMemo = null;
                        /*if (bDebug) {
                            echo(">>>> Rule # " + sLineId + " - Text: " + s + " opt: "+ sOption);
                        }*/
                        for (let [sFuncCond, cActionType, sWhat, ...eAct] of lActions) {
                        // action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ]
                            try {
                                //echo(oEvalFunc[sFuncCond]);
                                bCondMemo = (!sFuncCond || oEvalFunc[sFuncCond](s, sx, m, dDA, sCountry, bCondMemo))
                                if (bCondMemo) {
                                    switch (cActionType) {
                                        case "-":
                                            // grammar error
                                            //echo("-> error detected in " + sLineId + "\nzRegex: " + zRegex.source);
                                            nErrorStart = nOffset + m.start[eAct[0]];
                                            if (!dErrs.has(nErrorStart) || nPriority > dPriority.get(nErrorStart)) {
                                                dErrs.set(nErrorStart, _createError(s, sx, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bIdRule, sOption, bContext));
                                                dPriority.set(nErrorStart, nPriority);
                                            }
                                            break;
                                        case "~":
                                            // text processor
                                            //echo("-> text processor by " + sLineId + "\nzRegex: " + zRegex.source);
                                            s = _rewrite(s, sWhat, eAct[0], m, bUppercase);
                                            bChange = true;
                                            if (bDebug) {
                                                echo("~ " + s + "  -- " + m[eAct[0]] + "  # " + sLineId);
                                            }
                                            break;
                                        case "=":
                                            // disambiguation
                                            //echo("-> disambiguation by " + sLineId + "\nzRegex: " + zRegex.source);
                                            oEvalFunc[sWhat](s, m, dDA);
                                            if (bDebug) {
                                                echo("= " + m[0] + "  # " + sLineId + "\nDA: " + dDA.gl_toString());
                                            }
                                            break;
                                        case ">":
                                            // we do nothing, this test is just a condition to apply all following actions
                                            break;
                                        default:
                                            echo("# error: unknown action at " + sLineId);
                                    }
                                } else {
                                    if (cActionType == ">") {
                                        break;
                                    }
                                }
                            }
                            catch (e) {
                                echo(s);
                                echo("# line id: " + sLineId + "\n# rule id: " + sRuleId);
                                helpers.logerror(e);
        for (let [sOption, lRuleGroup] of this._getRules(bParagraph)) {
            if (!sOption || option(sOption)) {
                for (let [zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions, lGroups, lNegLookBefore] of lRuleGroup) {
                    if (!_aIgnoredRules.has(sRuleId)) {
                        while ((m = zRegex.gl_exec2(s, lGroups, lNegLookBefore)) !== null) {
                            bCondMemo = null;
                            /*if (bDebug) {
                                echo(">>>> Rule # " + sLineId + " - Text: " + s + " opt: "+ sOption);
                            }*/
                            for (let [sFuncCond, cActionType, sWhat, ...eAct] of lActions) {
                            // action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ]
                                try {
                                    //echo(oEvalFunc[sFuncCond]);
                                    bCondMemo = (!sFuncCond || oEvalFunc[sFuncCond](s, sx, m, dDA, sCountry, bCondMemo))
                                    if (bCondMemo) {
                                        switch (cActionType) {
                                            case "-":
                                                // grammar error
                                                //echo("-> error detected in " + sLineId + "\nzRegex: " + zRegex.source);
                                                nErrorStart = nOffset + m.start[eAct[0]];
                                                if (!dErrs.has(nErrorStart) || nPriority > dPriority.get(nErrorStart)) {
                                                    dErrs.set(nErrorStart, this._createError(s, sx, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bIdRule, sOption, bContext));
                                                    dPriority.set(nErrorStart, nPriority);
                                                }
                                                break;
                                            case "~":
                                                // text processor
                                                //echo("-> text processor by " + sLineId + "\nzRegex: " + zRegex.source);
                                                s = this._rewrite(s, sWhat, eAct[0], m, bUppercase);
                                                bChange = true;
                                                if (bDebug) {
                                                    echo("~ " + s + "  -- " + m[eAct[0]] + "  # " + sLineId);
                                                }
                                                break;
                                            case "=":
                                                // disambiguation
                                                //echo("-> disambiguation by " + sLineId + "\nzRegex: " + zRegex.source);
                                                oEvalFunc[sWhat](s, m, dDA);
                                                if (bDebug) {
                                                    echo("= " + m[0] + "  # " + sLineId + "\nDA: " + dDA.gl_toString());
                                                }
                                                break;
                                            case ">":
                                                // we do nothing, this test is just a condition to apply all following actions
                                                break;
                                            default:
                                                echo("# error: unknown action at " + sLineId);
                                        }
                                    } else {
                                        if (cActionType == ">") {
                                            break;
                                        }
                                    }
                                }
                                catch (e) {
                                    echo(s);
                                    echo("# line id: " + sLineId + "\n# rule id: " + sRuleId);
                                    helpers.logerror(e);
                                }
                            }
                        }
                    }
                }
            }
        }
    }
    if (bChange) {
        return [s, dErrs];
    }
    return [false, dErrs];
}
        if (bChange) {
            return [s, dErrs];
        }
        return [false, dErrs];
    },

function _createError (s, sx, sRepl, nOffset, m, iGroup, sLineId, sRuleId, bUppercase, sMsg, sURL, bIdRule, sOption, bContext) {
    let oErr = {};
    oErr["nStart"] = nOffset + m.start[iGroup];
    oErr["nEnd"] = nOffset + m.end[iGroup];
    oErr["sLineId"] = sLineId;
    oErr["sRuleId"] = sRuleId;
    oErr["sType"] = (sOption) ? sOption : "notype";
    // suggestions
    if (sRepl[0] === "=") {
        let sugg = oEvalFunc[sRepl.slice(1)](s, m);
        if (sugg) {
            if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) {
                oErr["aSuggestions"] = capitalizeArray(sugg.split("|"));
            } else {
                oErr["aSuggestions"] = sugg.split("|");
            }
        } else {
            oErr["aSuggestions"] = [];
        }
    } else if (sRepl == "_") {
        oErr["aSuggestions"] = [];
    } else {
        if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) {
            oErr["aSuggestions"] = capitalizeArray(sRepl.gl_expand(m).split("|"));
        } else {
            oErr["aSuggestions"] = sRepl.gl_expand(m).split("|");
        }
    }
    // Message
    if (sMsg[0] === "=") {
        sMessage = oEvalFunc[sMsg.slice(1)](s, m)
    } else {
        sMessage = sMsg.gl_expand(m);
    }
    if (bIdRule) {
        sMessage += " ##" + sLineId + " #" + sRuleId;
    }
    oErr["sMessage"] = sMessage;
    // URL
    oErr["URL"] = sURL || "";
    // Context
    if (bContext) {
        oErr["sUnderlined"] = sx.slice(m.start[iGroup], m.end[iGroup]);
        oErr["sBefore"] = sx.slice(Math.max(0, m.start[iGroup]-80), m.start[iGroup]);
        oErr["sAfter"] = sx.slice(m.end[iGroup], m.end[iGroup]+80);
    }
    return oErr;
}
    _createError: function (s, sx, sRepl, nOffset, m, iGroup, sLineId, sRuleId, bUppercase, sMsg, sURL, bIdRule, sOption, bContext) {
        let oErr = {};
        oErr["nStart"] = nOffset + m.start[iGroup];
        oErr["nEnd"] = nOffset + m.end[iGroup];
        oErr["sLineId"] = sLineId;
        oErr["sRuleId"] = sRuleId;
        oErr["sType"] = (sOption) ? sOption : "notype";
        // suggestions
        if (sRepl.slice(0,1) === "=") {
            let sugg = oEvalFunc[sRepl.slice(1)](s, m);
            if (sugg) {
                if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) {
                    oErr["aSuggestions"] = capitalizeArray(sugg.split("|"));
                } else {
                    oErr["aSuggestions"] = sugg.split("|");
                }
            } else {
                oErr["aSuggestions"] = [];
            }
        } else if (sRepl == "_") {
            oErr["aSuggestions"] = [];
        } else {
            if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) {
                oErr["aSuggestions"] = capitalizeArray(sRepl.gl_expand(m).split("|"));
            } else {
                oErr["aSuggestions"] = sRepl.gl_expand(m).split("|");
            }
        }
        // Message
        let sMessage = "";
        if (sMsg.slice(0,1) === "=") {
            sMessage = oEvalFunc[sMsg.slice(1)](s, m)
        } else {
            sMessage = sMsg.gl_expand(m);
        }
        if (bIdRule) {
            sMessage += " ##" + sLineId + " #" + sRuleId;
        }
        oErr["sMessage"] = sMessage;
        // URL
        oErr["URL"] = sURL || "";
        // Context
        if (bContext) {
            oErr["sUnderlined"] = sx.slice(m.start[iGroup], m.end[iGroup]);
            oErr["sBefore"] = sx.slice(Math.max(0, m.start[iGroup]-80), m.start[iGroup]);
            oErr["sAfter"] = sx.slice(m.end[iGroup], m.end[iGroup]+80);
        }
        return oErr;
    },

function _rewrite (s, sRepl, iGroup, m, bUppercase) {
    // text processor: write sRepl in s at iGroup position"
    let ln = m.end[iGroup] - m.start[iGroup];
    let sNew = "";
    if (sRepl === "*") {
        sNew = " ".repeat(ln);
    } else if (sRepl === ">" || sRepl === "_" || sRepl === "~") {
        sNew = sRepl + " ".repeat(ln-1);
    } else if (sRepl === "@") {
        sNew = "@".repeat(ln);
    } else if (sRepl.slice(0,1) === "=") {
        sNew = oEvalFunc[sRepl.slice(1)](s, m);
        sNew = sNew + " ".repeat(ln-sNew.length);
        if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) {
            sNew = sNew.gl_toCapitalize();
        }
    } else {
        sNew = sRepl.gl_expand(m);
        sNew = sNew + " ".repeat(ln-sNew.length);
    }
    //echo("\n"+s+"\nstart: "+m.start[iGroup]+" end:"+m.end[iGroup])
    return s.slice(0, m.start[iGroup]) + sNew + s.slice(m.end[iGroup]);
}
    _rewrite: function (s, sRepl, iGroup, m, bUppercase) {
        // text processor: write sRepl in s at iGroup position"
        let ln = m.end[iGroup] - m.start[iGroup];
        let sNew = "";
        if (sRepl === "*") {
            sNew = " ".repeat(ln);
        } else if (sRepl === ">" || sRepl === "_" || sRepl === "~") {
            sNew = sRepl + " ".repeat(ln-1);
        } else if (sRepl === "@") {
            sNew = "@".repeat(ln);
        } else if (sRepl.slice(0,1) === "=") {
            sNew = oEvalFunc[sRepl.slice(1)](s, m);
            sNew = sNew + " ".repeat(ln-sNew.length);
            if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) {
                sNew = sNew.gl_toCapitalize();
            }
        } else {
            sNew = sRepl.gl_expand(m);
            sNew = sNew + " ".repeat(ln-sNew.length);
        }
        //echo("\n"+s+"\nstart: "+m.start[iGroup]+" end:"+m.end[iGroup])
        return s.slice(0, m.start[iGroup]) + sNew + s.slice(m.end[iGroup]);
    },

    // Actions on rules

function ignoreRule (sRuleId) {
    _aIgnoredRules.add(sRuleId);
}
    ignoreRule: function (sRuleId) {
        _aIgnoredRules.add(sRuleId);
    },

function resetIgnoreRules () {
    _aIgnoredRules.clear();
}
    resetIgnoreRules: function () {
        _aIgnoredRules.clear();
    },

function reactivateRule (sRuleId) {
    _aIgnoredRules.delete(sRuleId);
}
    reactivateRule: function (sRuleId) {
        _aIgnoredRules.delete(sRuleId);
    },

function listRules (sFilter=null) {
    // generator: returns tuple (sOption, sLineId, sRuleId)
    try {
        for ([sOption, lRuleGroup] of _getRules(true)) {
            for ([_, _, sLineId, sRuleId, _, _] of lRuleGroup) {
                if (!sFilter || sRuleId.test(sFilter)) {
                    yield [sOption, sLineId, sRuleId];
                }
            }
        }
        for ([sOption, lRuleGroup] of _getRules(false)) {
            for ([_, _, sLineId, sRuleId, _, _] of lRuleGroup) {
                if (!sFilter || sRuleId.test(sFilter)) {
                    yield [sOption, sLineId, sRuleId];
                }
            }
        }
    }
    catch (e) {
        helpers.logerror(e);
    }
}

    listRules: function* (sFilter=null) {
        // generator: returns tuple (sOption, sLineId, sRuleId)
        try {
            for (let [sOption, lRuleGroup] of this._getRules(true)) {
                for (let [,, sLineId, sRuleId,,] of lRuleGroup) {
                    if (!sFilter || sRuleId.test(sFilter)) {
                        yield [sOption, sLineId, sRuleId];
                    }
                }
            }
            for (let [sOption, lRuleGroup] of this._getRules(false)) {
                for (let [,, sLineId, sRuleId,,] of lRuleGroup) {
                    if (!sFilter || sRuleId.test(sFilter)) {
                        yield [sOption, sLineId, sRuleId];
                    }
                }
            }
        }
        catch (e) {
            helpers.logerror(e);
        }
    },

    _getRules: function (bParagraph) {
        if (!bParagraph) {
            return gc_rules.lSentenceRules;
        }
        return gc_rules.lParagraphRules;
    },

//////// init
    //// Initialization

function load (sContext="JavaScript") {
    try {
        _oDict = new ibdawg.IBDAWG("${dic_name}.json");
        _sContext = sContext;
        _dOptions = gc_options.getOptions(sContext).gl_shallowCopy();     // duplication necessary, to be able to reset to default
    }
    catch (e) {
        helpers.logerror(e);
    }
}
    load: function (sContext="JavaScript", sPath="") {
        try {
            if (typeof(require) !== 'undefined') {
                var ibdawg = require("resource://grammalecte/ibdawg.js");
                _oDict = new ibdawg.IBDAWG("${dic_name}.json");
            } else {
                _oDict = new IBDAWG("${dic_name}.json", sPath);
            }
            _sAppContext = sContext;
            _dOptions = gc_options.getOptions(sContext).gl_shallowCopy();     // duplication necessary, to be able to reset to default
        }
        catch (e) {
            helpers.logerror(e);
        }
    },

function setOption (sOpt, bVal) {
    getDictionary: function () {
    if (_dOptions.has(sOpt)) {
        _dOptions.set(sOpt, bVal);
    }
}
        return _oDict;
    },

    //// Options

function setOptions (dOpt) {
    _dOptions.gl_updateOnlyExistingKeys(dOpt);
}
    setOption: function (sOpt, bVal) {
        if (_dOptions.has(sOpt)) {
            _dOptions.set(sOpt, bVal);
        }

function getOptions () {
    return _dOptions;
}
    },

function getDefaultOptions () {
    return gc_options.getOptions(_sContext).gl_shallowCopy();
}
    setOptions: function (dOpt) {
        _dOptions.gl_updateOnlyExistingKeys(dOpt);
    },

function resetOptions () {
    _dOptions = gc_options.getOptions(_sContext).gl_shallowCopy();
}
    getOptions: function () {
        return _dOptions;
    },

function getDictionary () {
    return _oDict;
}
    getDefaultOptions: function () {
        return gc_options.getOptions(_sAppContext).gl_shallowCopy();
    },

function _getRules (bParagraph) {
    if (!bParagraph) {
    resetOptions: function () {
        _dOptions = gc_options.getOptions(_sAppContext).gl_shallowCopy();
        return _rules.lSentenceRules;
    }
    return _rules.lParagraphRules;
}



//////// common functions
//////// Common functions

function option (sOpt) {
    // return true if option sOpt is active
    return _dOptions.get(sOpt);
}

function displayInfo (dDA, aWord) {
457
458
459
460
461
462
463
464

465
466
467
468
469

470
471
472
473
474

475
476
477
478
479
480
481
482
483
484
485
486

487
488
489
490
491

492


493
494
495
496
497
498
499
473
474
475
476
477
478
479

480
481
482
483
484

485
486
487
488
489

490
491
492
493
494
495
496
497



498
499
500
501
502
503
504

505
506
507
508
509
510
511
512
513
514
515







-
+




-
+




-
+







-
-
-


+




-
+

+
+








//// functions to get text outside pattern scope

// warning: check compile_rules.py to understand how it works

function nextword (s, iStart, n) {
    // get the nth word of the input string or empty string
    let z = new RegExp("^( +[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+){" + (n-1).toString() + "} +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+)", "i");
    let z = new RegExp("^(?: +[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+){" + (n-1).toString() + "} +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+)", "ig");
    let m = z.exec(s.slice(iStart));
    if (!m) {
        return null;
    }
    return [iStart + RegExp.lastIndex - m[2].length, m[2]];
    return [iStart + z.lastIndex - m[1].length, m[1]];
}

function prevword (s, iEnd, n) {
    // get the (-)nth word of the input string or empty string
    let z = new RegExp("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+) +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+ +){" + (n-1).toString() + "}$", "i");
    let z = new RegExp("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+) +(?:[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+ +){" + (n-1).toString() + "}$", "i");
    let m = z.exec(s.slice(0, iEnd));
    if (!m) {
        return null;
    }
    return [m.index, m[1]];
}

const _zNextWord = new RegExp ("^ +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_-]*)", "i");
const _zPrevWord = new RegExp ("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_-]*) +$", "i");

function nextword1 (s, iStart) {
    // get next word (optimization)
    let _zNextWord = new RegExp ("^ +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_-]*)", "ig");
    let m = _zNextWord.exec(s.slice(iStart));
    if (!m) {
        return null;
    }
    return [iStart + RegExp.lastIndex - m[1].length, m[1]];
    return [iStart + _zNextWord.lastIndex - m[1].length, m[1]];
}

const _zPrevWord = new RegExp ("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_-]*) +$", "i");

function prevword1 (s, iEnd) {
    // get previous word (optimization)
    //echo("prev1, s:"+s);
    //echo("prev1, s.slice(0, iEnd):"+s.slice(0, iEnd));
    let m = _zPrevWord.exec(s.slice(0, iEnd));
    //echo("prev1, m:"+m);
592
593
594
595
596
597
598

599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619


























620
621
622
623
624
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626










627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652




653







+











-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-

    return true;
}

function define (dDA, nPos, lMorph) {
    dDA.set(nPos, lMorph);
    return true;
}


//////// GRAMMAR CHECKER PLUGINS

${pluginsJS}


${callablesJS}



if (typeof(exports) !== 'undefined') {
    exports.load = load;
    exports.parse = parse;
    exports.lang = lang;
    exports.version = version;
    exports.getDictionary = getDictionary;
    exports.setOption = setOption;
    exports.setOptions = setOptions;
    exports.getOptions = getOptions;
    exports.getDefaultOptions = getDefaultOptions;
    exports.resetOptions = resetOptions;
    exports.lang = gc_engine.lang;
    exports.locales = gc_engine.locales;
    exports.pkg = gc_engine.pkg;
    exports.name = gc_engine.name;
    exports.version = gc_engine.version;
    exports.author = gc_engine.author;
    exports.parse = gc_engine.parse;
    exports._zEndOfSentence = gc_engine._zEndOfSentence;
    exports._zBeginOfParagraph = gc_engine._zBeginOfParagraph;
    exports._zEndOfParagraph = gc_engine._zEndOfParagraph;
    exports._getSentenceBoundaries = gc_engine._getSentenceBoundaries;
    exports._proofread = gc_engine._proofread;
    exports._createError = gc_engine._createError;
    exports._rewrite = gc_engine._rewrite;
    exports.ignoreRule = gc_engine.ignoreRule;
    exports.resetIgnoreRules = gc_engine.resetIgnoreRules;
    exports.reactivateRule = gc_engine.reactivateRule;
    exports.listRules = gc_engine.listRules;
    exports._getRules = gc_engine._getRules;
    exports.load = gc_engine.load;
    exports.getDictionary = gc_engine.getDictionary;
    exports.setOption = gc_engine.setOption;
    exports.setOptions = gc_engine.setOptions;
    exports.getOptions = gc_engine.getOptions;
    exports.getDefaultOptions = gc_engine.getDefaultOptions;
    exports.resetOptions = gc_engine.resetOptions;
    exports.ignoreRule = ignoreRule;
    exports.reactivateRule = reactivateRule;
    exports.resetIgnoreRules = resetIgnoreRules;
    exports.listRules = listRules;
}

Modified gc_core/js/lang_core/gc_options.js from [6e45d077d4] to [ba36100a98].

1
2
3
4


5
6
7
8
9
10






11
12

13
14
15
16
17
18





19
20
21


22

23
24
25
26




27
1
2
3
4
5
6






7
8
9
10
11
12
13

14
15





16
17
18
19
20
21


22
23
24
25
26



27
28
29
30
31




+
+
-
-
-
-
-
-
+
+
+
+
+
+

-
+

-
-
-
-
-
+
+
+
+
+

-
-
+
+

+

-
-
-
+
+
+
+

// Options for Grammalecte

${map}


var gc_options = {
function getOptions (sContext="JavaScript") {
    if (dOpt.hasOwnProperty(sContext)) {
        return dOpt[sContext];
    }
    return dOpt["JavaScript"];
}
    getOptions: function (sContext="JavaScript") {
        if (this.dOpt.hasOwnProperty(sContext)) {
            return this.dOpt[sContext];
        }
        return this.dOpt["JavaScript"];
    },

const lStructOpt = ${lStructOpt};
    lStructOpt: ${lStructOpt},

const dOpt = {
    "JavaScript": new Map (${dOptJavaScript}),
    "Firefox": new Map (${dOptFirefox}),
    "Thunderbird": new Map (${dOptThunderbird}),
}
    dOpt: {
        "JavaScript": new Map (${dOptJavaScript}),
        "Firefox": new Map (${dOptFirefox}),
        "Thunderbird": new Map (${dOptThunderbird}),
    },

const dOptLabel = ${dOptLabel};

    dOptLabel: ${dOptLabel}
}


if (typeof(exports) !== 'undefined') {
	exports.getOptions = getOptions;
	exports.lStructOpt = lStructOpt;
	exports.dOptLabel = dOptLabel;
	exports.getOptions = gc_options.getOptions;
	exports.lStructOpt = gc_options.lStructOpt;
    exports.dOpt = gc_options.dOpt;
	exports.dOptLabel = gc_options.dOptLabel;
}

Modified gc_core/js/lang_core/gc_rules.js from [03bc540fb7] to [02fc1d6f94].

1
2
3
4
5
6

7

8
9


10
11
12
13
14


15
1
2
3
4
5
6
7

8
9

10
11
12
13
14


15
16
17






+
-
+

-
+
+



-
-
+
+

// Grammar checker rules
"use strict";

${string}
${regex}

var gc_rules = {
const lParagraphRules = ${paragraph_rules_JS};
    lParagraphRules: ${paragraph_rules_JS},

const lSentenceRules = ${sentence_rules_JS};
    lSentenceRules: ${sentence_rules_JS}
}


if (typeof(exports) !== 'undefined') {
	exports.lParagraphRules = lParagraphRules;
	exports.lSentenceRules = lSentenceRules;
    exports.lParagraphRules = gc_rules.lParagraphRules;
    exports.lSentenceRules = gc_rules.lSentenceRules;
}

Modified gc_core/js/str_transform.js from [0fafeda9a5] to [3f33d76266].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34


35

36
37
38
39
40
41
42
43








44
45
46
47
48
49
50
51
52
53
54











55
56
57
58
59


60
1
2






























3
4
5
6

7








8
9
10
11
12
13
14
15











16
17
18
19
20
21
22
23
24
25
26
27
28
29


30
31
32


-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-


+
+
-
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+



-
-
+
+

//// STRING TRANSFORMATION

var dSimilarChars = new Map ([
    ["a", "aàâáä"],
    ["à", "aàâáä"],
    ["â", "aàâáä"],
    ["á", "aàâáä"],
    ["ä", "aàâáä"],
    ["c", "cç"],
    ["ç", "cç"],
    ["e", "eéêèë"],
    ["é", "eéêèë"],
    ["ê", "eéêèë"],
    ["è", "eéêèë"],
    ["ë", "eéêèë"],
    ["i", "iîïíì"],
    ["î", "iîïíì"],
    ["ï", "iîïíì"],
    ["í", "iîïíì"],
    ["ì", "iîïíì"],
    ["o", "oôóòö"],
    ["ô", "oôóòö"],
    ["ó", "oôóòö"],
    ["ò", "oôóòö"],
    ["ö", "oôóòö"],
    ["u", "uûùüú"],
    ["û", "uûùüú"],
    ["ù", "uûùüú"],
    ["ü", "uûùüú"],
    ["ú", "uûùüú"]
]);

// Note: 48 is the ASCII code for "0"

var str_transform = {
    getStemFromSuffixCode: function (sFlex, sSfxCode) {
// Suffix only
        // Suffix only
function getStemFromSuffixCode (sFlex, sSfxCode) {
    if (sSfxCode == "0") {
        return sFlex;
    }
    return sSfxCode[0] == '0' ? sFlex + sSfxCode.slice(1) : sFlex.slice(0, -(sSfxCode.charCodeAt(0)-48)) + sSfxCode.slice(1);
}

// Prefix and suffix
        if (sSfxCode == "0") {
            return sFlex;
        }
        return sSfxCode[0] == '0' ? sFlex + sSfxCode.slice(1) : sFlex.slice(0, -(sSfxCode.charCodeAt(0)-48)) + sSfxCode.slice(1);
    },
    
    getStemFromAffixCode: function (sFlex, sAffCode) {
        // Prefix and suffix
function getStemFromAffixCode (sFlex, sAffCode) {
    if (sAffCode == "0") {
        return sFlex;
    }
    if (!sAffCode.includes("/")) {
        return "# error #";
    }
    var [sPfxCode, sSfxCode] = sAffCode.split('/');
    sFlex = sPfxCode.slice(1) + sFlex.slice(sPfxCode.charCodeAt(0)-48);
    return sSfxCode[0] == '0' ? sFlex + sSfxCode.slice(1) : sFlex.slice(0, -(sSfxCode.charCodeAt(0)-48)) + sSfxCode.slice(1);
}
        if (sAffCode == "0") {
            return sFlex;
        }
        if (!sAffCode.includes("/")) {
            return "# error #";
        }
        let [sPfxCode, sSfxCode] = sAffCode.split('/');
        sFlex = sPfxCode.slice(1) + sFlex.slice(sPfxCode.charCodeAt(0)-48);
        return sSfxCode[0] == '0' ? sFlex + sSfxCode.slice(1) : sFlex.slice(0, -(sSfxCode.charCodeAt(0)-48)) + sSfxCode.slice(1);
    }
};


if (typeof(exports) !== 'undefined') {
    exports.getStemFromSuffixCode = getStemFromSuffixCode;
    exports.getStemFromAffixCode = getStemFromAffixCode;
    exports.getStemFromSuffixCode = str_transform.getStemFromSuffixCode;
    exports.getStemFromAffixCode = str_transform.getStemFromAffixCode;
}

Modified gc_core/js/tests.js from [f2f737b523] to [abe05a3485].

1
2
3
4
5

6


7
8
9
10
11

12

13
14

15
16
17

18

19
20
21


22
23
24
25
26
27
28
1
2
3
4
5
6

7
8
9
10
11
12

13
14
15
16

17
18
19
20
21

22



23
24
25
26
27
28
29
30
31





+
-
+
+




-
+

+

-
+



+
-
+
-
-
-
+
+







// JavaScript

"use strict";


if (typeof(require) !== 'undefined') {
const helpers = require("resource://grammalecte/helpers.js");
    var helpers = require("resource://grammalecte/helpers.js");
}


class TestGrammarChecking {

    constructor (gce) {
    constructor (gce, spfTests="") {
        this.gce = gce;
        this.spfTests = spfTests;
        this._aRuleTested = new Set();
    };
    }

    * testParse (bDebug=false) {
        const t0 = Date.now();
        let sURL = (this.spfTests !== "") ? this.spfTests : "resource://grammalecte/"+this.gce.lang+"/tests_data.json";
        const aData = JSON.parse(helpers.loadFile("resource://grammalecte/"+this.gce.lang+"/tests_data.json")).aData;
        const aData = JSON.parse(helpers.loadFile(sURL)).aData;
        //const aData = require("resource://grammalecte/"+this.gce.lang+"/tests_data.js").aData;
        let nInvalid = 0
        let nTotal = 0
        let nInvalid = 0;
        let nTotal = 0;
        let sErrorText;
        let sSugg;
        let sExpectedErrors;
        let sTextToCheck;
        let sFoundErrors;
        let sListErr;
        let sLineNum;
57
58
59
60
61
62
63
64

65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81

82
83
84
85
86
87
88
89
90
91
92
93
94
95

96
97
98
99
100
101
102
60
61
62
63
64
65
66

67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

84
85
86
87
88
89
90
91
92
93
94
95
96
97

98
99
100
101
102
103
104
105







-
+
















-
+













-
+







                    if (sExpectedErrors !== sFoundErrors) {
                        yield "\n" + i.toString() +
                              "\n# Line num: " + sLineNum +
                              "\n> to check: " + sTextToCheck +
                              "\n  expected: " + sExpectedErrors +
                              "\n  found:    " + sFoundErrors +
                              "\n  errors:   \n" + sListErr;
                        nInvalid = nInvalid + 1
                        nInvalid = nInvalid + 1;
                    }
                    nTotal = nTotal + 1;
                }
                i = i + 1;
                if (i % 1000 === 0) {
                    yield i.toString();
                }
            }
            bShowUntested = true;
        }
        catch (e) {
            helpers.logerror(e);
        }

        if (bShowUntested) {
            i = 0;
            for (let [sOpt, sLineId, sRuleId] of gce.listRules()) {
            for (let [sOpt, sLineId, sRuleId] of this.gce.listRules()) {
                if (!this._aRuleTested.has(sLineId) && !/^[0-9]+[sp]$|^[pd]_/.test(sRuleId)) {
                    sUntestedRules += sRuleId + ", ";
                    i += 1;
                }
            }
            if (i > 0) {
                yield sUntestedRules + "\n[" + i.toString() + " untested rules]";
            }
        }

        const t1 = Date.now();
        yield "Tests parse finished in " + ((t1-t0)/1000).toString()
            + " s\nTotal errors: " + nInvalid.toString() + " / " + nTotal.toString();
    };
    }

    _getExpectedErrors (sLine) {
        try {
            let sRes = " ".repeat(sLine.length);
            let z = /\{\{.+?\}\}/g;
            let m;
            let i = 0;
114
115
116
117
118
119
120
121

122
123
124
125
126
127

128
129

130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146

147
148
149
150
151
152
153
117
118
119
120
121
122
123

124
125
126
127
128
129

130
131

132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148

149
150
151
152
153
154
155
156







-
+





-
+

-
+
















-
+







            }
            return sRes;
        }
        catch (e) {
            helpers.logerror(e);
        }
        return " ".repeat(sLine.length);
    };
    }

    _getFoundErrors (sLine, bDebug, sOption) {
        try {
            let aErrs = [];
            if (sOption) {
                gce.setOption(sOption, true);
                this.gce.setOption(sOption, true);
                aErrs = this.gce.parse(sLine, "FR", bDebug);
                gce.setOption(sOption, false);
                this.gce.setOption(sOption, false);
            } else {
                aErrs = this.gce.parse(sLine, "FR", bDebug);
            }
            let sRes = " ".repeat(sLine.length);
            let sListErr = "";
            for (let dErr of aErrs) {
                sRes = sRes.slice(0, dErr["nStart"]) + "~".repeat(dErr["nEnd"] - dErr["nStart"]) + sRes.slice(dErr["nEnd"]);
                sListErr += "    * {" + dErr['sLineId'] + " / " + dErr['sRuleId'] + "}  at  " + dErr['nStart'] + ":" + dErr['nEnd'] + "\n";
                this._aRuleTested.add(dErr["sLineId"]);
            }
            return [sRes, sListErr];
        }
        catch (e) {
            helpers.logerror(e);
        }
        return [" ".repeat(sLine.length), ""];
    };
    }

}


if (typeof(exports) !== 'undefined') {
    exports.TestGrammarChecking = TestGrammarChecking;
}

Modified gc_core/js/text.js from [beffb97d58] to [46a1749c2b].

1
2
3
4


5
6


7


8
9
10
11
12
13
14
15
16
17
18











19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37


















38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57




















58
59
60
61
62
63



64
1
2
3
4
5
6


7
8
9
10
11











12
13
14
15
16
17
18
19
20
21
22
23


















24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42



















43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65



66
67
68
69




+
+
-
-
+
+

+
+
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+



-
-
-
+
+
+

// JavaScript

"use strict";


if (typeof(exports) !== 'undefined') {
const helpers = require("resource://grammalecte/helpers.js");

    var helpers = require("resource://grammalecte/helpers.js");
}


var text = {
function* getParagraph (sText) {
    // generator: returns paragraphs of text
    let iStart = 0;
    let iEnd = 0;
    sText = sText.replace("\r", "");
    while ((iEnd = sText.indexOf("\n", iStart)) !== -1) {
        yield sText.slice(iStart, iEnd);
        iStart = iEnd + 1;
    }
    yield sText.slice(iStart);
}
    getParagraph: function* (sText) {
        // generator: returns paragraphs of text
        let iStart = 0;
        let iEnd = 0;
        sText = sText.replace("\r", "");
        while ((iEnd = sText.indexOf("\n", iStart)) !== -1) {
            yield sText.slice(iStart, iEnd);
            iStart = iEnd + 1;
        }
        yield sText.slice(iStart);
    },

function* wrap (sText, nWidth=80) {
    // generator: returns text line by line
    while (sText) {
        if (sText.length >= nWidth) {
            let nEnd = sText.lastIndexOf(" ", nWidth) + 1;
            if (nEnd > 0) {
                yield sText.slice(0, nEnd);
                sText = sText.slice(nEnd);
            } else {
                yield sText.slice(0, nWidth);
                sText = sText.slice(nWidth);
            }
        } else {
            break;
        }
    }
    yield sText;
}
    wrap: function* (sText, nWidth=80) {
        // generator: returns text line by line
        while (sText) {
            if (sText.length >= nWidth) {
                let nEnd = sText.lastIndexOf(" ", nWidth) + 1;
                if (nEnd > 0) {
                    yield sText.slice(0, nEnd);
                    sText = sText.slice(nEnd);
                } else {
                    yield sText.slice(0, nWidth);
                    sText = sText.slice(nWidth);
                }
            } else {
                break;
            }
        }
        yield sText;
    },

function getReadableError (oErr) {
    // Returns an error oErr as a readable error
    try {
        let sResult = "\n* " + oErr['nStart'] + ":" + oErr['nEnd'] 
                    + "  # " + oErr['sLineId'] + "  # " + oErr['sRuleId'] + ":\n";
        sResult += "  " + oErr["sMessage"];
        if (oErr["aSuggestions"].length > 0) {
            sResult += "\n  > Suggestions : " + oErr["aSuggestions"].join(" | ");
        }
        if (oErr["URL"] !== "") {
            sResult += "\n  > URL: " + oErr["URL"];
        }
        return sResult;
    }
    catch (e) {
        helpers.logerror(e);
        return "\n# Error. Data: " + oErr.toString();
    }
}
    getReadableError: function (oErr) {
        // Returns an error oErr as a readable error
        try {
            let sResult = "\n* " + oErr['nStart'] + ":" + oErr['nEnd'] 
                        + "  # " + oErr['sLineId'] + "  # " + oErr['sRuleId'] + ":\n";
            sResult += "  " + oErr["sMessage"];
            if (oErr["aSuggestions"].length > 0) {
                sResult += "\n  > Suggestions : " + oErr["aSuggestions"].join(" | ");
            }
            if (oErr["URL"] !== "") {
                sResult += "\n  > URL: " + oErr["URL"];
            }
            return sResult;
        }
        catch (e) {
            helpers.logerror(e);
            return "\n# Error. Data: " + oErr.toString();
        }
    }
};


if (typeof(exports) !== 'undefined') {
    exports.getParagraph = getParagraph;
    exports.wrap = wrap;
    exports.getReadableError = getReadableError;
    exports.getParagraph = text.getParagraph;
    exports.wrap = text.wrap;
    exports.getReadableError = text.getReadableError;
}

Modified gc_core/js/tokenizer.js from [a6594366c3] to [fcd058bf6a].

1
2
3
4
5


6
7
8





9
10
11
12
13
14
15
1
2
3
4
5
6
7



8
9
10
11
12
13
14
15
16
17
18
19





+
+
-
-
-
+
+
+
+
+







// JavaScript
// Very simple tokenizer

"use strict";


if (typeof(exports) !== 'undefined') {
const helpers = require("resource://grammalecte/helpers.js");

const aPatterns = {
    var helpers = require("resource://grammalecte/helpers.js");
}


const aTkzPatterns = {
    // All regexps must start with ^.
    "default":
        [
            [/^[   \t]+/, 'SPACE'],
            [/^[,.;:!?…«»“”‘’"(){}\[\]/·–—]+/, 'SEPARATOR'],
            [/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_]+[@.])[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
            [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'],
31
32
33
34
35
36
37
38

39
40
41
42
43
44
45

46
47
48

49

50
51
52
53
54
55
56
35
36
37
38
39
40
41

42
43
44
45
46
47
48

49
50
51

52

53
54
55
56
57
58
59
60







-
+






-
+


-
+
-
+







            [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
            [/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`]/i, 'ELPFX'],
            [/^\d\d?[hm]\d\d\b/, 'HOUR'],
            [/^\d+(?:er|nd|e|de|ième|ème|eme)s?\b/, 'ORDINAL'],
            [/^-?\d+(?:[.,]\d+|)/, 'NUM'],
            [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+)*/, 'WORD']
        ]
}
};


class Tokenizer {

    constructor (sLang) {
        this.sLang = sLang;
        if (!aPatterns.hasOwnProperty(sLang)) {
        if (!aTkzPatterns.hasOwnProperty(sLang)) {
            this.sLang = "default";
        }
        this.aRules = aPatterns[this.sLang];
        this.aRules = aTkzPatterns[this.sLang];
    };
    }

    * genTokens (sText) {
        let m;
        let i = 0;
        while (sText) {
            let nCut = 1;
            for (let [zRegex, sType] of this.aRules) {
70
71
72
73
74
75
76
77

78
79
80
81
82
83
84
74
75
76
77
78
79
80

81
82
83
84
85
86
87
88







-
+







                catch (e) {
                    helpers.logerror(e);
                }
            }
            i += nCut;
            sText = sText.slice(nCut);
        }
    };
    }

    getSpellingErrors (sText, oDict) {
        let aSpellErr = [];
        for (let oToken of this.genTokens(sText)) {
            if (oToken.sType === 'WORD' && !oDict.isValidToken(oToken.sValue)) {
                aSpellErr.push(oToken);
            }