Grammalecte  Check-in [b08f2ef338]

Overview
Comment:[core][js] ajout des ; oubliés et ajustement des ; en trop (commit erroné)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | dead | webext2_illusion
Files: files | file ages | folders
SHA3-256: b08f2ef338ece05406fc86b516e1d8d97e880b6c6f8751e501d8c526a0f340d9
User & Date: IllusionPerdu on 2017-08-06 00:44:03
Original Comment: [FIX] (gc_core/js) ajout des ; oubliés et ajustement des ; en trop
Other Links: branch diff | manifest | tags
Context
2017-08-06
00:44
[core][js] ajout des ; oubliés et ajustement des ; en trop (commit erroné) Closed-Leaf check-in: b08f2ef338 user: IllusionPerdu tags: dead, webext2_illusion
2017-08-05
19:50
[fr] màj: confusion <à/a> dans pronom +à l’air +adj check-in: e918b25b89 user: olr tags: trunk, fr
Changes

Modified gc_core/js/helpers.js from [da0905c944] to [8fee48eb89].

1
2
3
4
5
6
7
8
9


10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87

88
89
90
91
92
93
94
95
96
97
98

// HELPERS

"use strict";

// In Firefox, there is no console.log in PromiseWorker, but there is worker.log.
// In Thunderbird, you can’t access to console directly. So it’s required to pass a log function.
let funcOutput = null;



function setLogOutput (func) {
    funcOutput = func;
}

function echo (obj) {
    if (funcOutput !== null) {
        funcOutput(obj);
    } else {
        console.log(obj);
    }
    return true;
}

function logerror (e, bStack=false) {
    let sMsg = "\n" + e.fileName + "\n" + e.name + "\nline: " + e.lineNumber + "\n" + e.message;
    if (bStack) {
        sMsg += "\n--- Stack ---\n" + e.stack;
    }
    if (funcOutput !== null) {
        funcOutput(sMsg);
    } else {
        console.error(sMsg);
    }
}

function inspect (o) {
    let sMsg = "__inspect__: " + typeof o;
    for (let sParam in o) {
        sMsg += "\n" + sParam + ": " + o.sParam;
    }
    sMsg += "\n" + JSON.stringify(o) + "\n__end__";
    echo(sMsg);
}


// load ressources in workers (suggested by Mozilla extensions reviewers)
// for more options have a look here: https://gist.github.com/Noitidart/ec1e6b9a593ec7e3efed
// if not in workers, use sdk/data.load() instead
function loadFile (spf) {
    try {
        let xRequest;
        if (typeof XMLHttpRequest !== "undefined") {
            xRequest = new XMLHttpRequest();
        }
        else {
            // JS bullshit again… necessary for Thunderbird
            let { Cc, Ci } = require("chrome");
            xRequest = Cc["@mozilla.org/xmlextras/xmlhttprequest;1"].createInstance();
            xRequest.QueryInterface(Ci.nsIXMLHttpRequest);
        }
        xRequest.open('GET', spf, false); // 3rd arg is false for synchronous, sync is acceptable in workers
        xRequest.send();
        return xRequest.responseText;
    }
    catch (e) {
        logerror(e);
        return null
    }
}


// conversions
function objectToMap (obj) {
    let m = new Map();
    for (let param in obj) {
        //console.log(param + " " + obj[param]);
        m.set(param, obj[param]);
    }
    return m;
}

function mapToObject (m) {
    let obj = {};
    for (let [k, v] of m) {
        obj[k] = v;
    }
    return obj;
}



if (typeof(exports) !== 'undefined') {
    exports.setLogOutput = setLogOutput;
    exports.echo = echo;
    exports.logerror = logerror;
    exports.inspect = inspect;
    exports.loadFile = loadFile;
    exports.objectToMap = objectToMap;
    exports.mapToObject = mapToObject;
}









>
>
|
|
|

|
|
|
|
|
|
|
|

|
|
|
|
|
|
|
|
|
|
|

|
|
|
|
|
|
|
|

|
|
|
|
<
|
|
|
|
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

<
|
|
|
|
|
|
|
|
|

|
|
|
|
|
|
|
>



|
|
|
|
|
|
|

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49

50
51
52
53

54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69

70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98

// HELPERS

"use strict";

// In Firefox, there is no console.log in PromiseWorker, but there is worker.log.
// In Thunderbird, you can’t access to console directly. So it’s required to pass a log function.
let funcOutput = null;

var helpers = {

    setLogOutput: function (func) {
        funcOutput = func;
    },

    echo: function (obj) {
        if (funcOutput !== null) {
            funcOutput(obj);
        } else {
            console.log(obj);
        }
        return true;
    },

    logerror: function (e, bStack=false) {
        let sMsg = "\n" + e.fileName + "\n" + e.name + "\nline: " + e.lineNumber + "\n" + e.message;
        if (bStack) {
            sMsg += "\n--- Stack ---\n" + e.stack;
        }
        if (funcOutput !== null) {
            funcOutput(sMsg);
        } else {
            console.error(sMsg);
        }
    },

    inspect: function (o) {
        let sMsg = "__inspect__: " + typeof o;
        for (let sParam in o) {
            sMsg += "\n" + sParam + ": " + o.sParam;
        }
        sMsg += "\n" + JSON.stringify(o) + "\n__end__";
        this.echo(sMsg);
    },

    loadFile: function (spf) {
        // load ressources in workers (suggested by Mozilla extensions reviewers)
        // for more options have a look here: https://gist.github.com/Noitidart/ec1e6b9a593ec7e3efed
        // if not in workers, use sdk/data.load() instead

        try {
            let xRequest;
            if (typeof XMLHttpRequest !== "undefined") {
                xRequest = new XMLHttpRequest();

            } else {
                // JS bullshit again… necessary for Thunderbird
                let { Cc, Ci } = require("chrome");
                xRequest = Cc["@mozilla.org/xmlextras/xmlhttprequest;1"].createInstance();
                xRequest.QueryInterface(Ci.nsIXMLHttpRequest);
            }
            xRequest.open('GET', spf, false); // 3rd arg is false for synchronous, sync is acceptable in workers
            xRequest.send();
            return xRequest.responseText;
        }
        catch (e) {
            this.logerror(e);
            return null;
        }
    },


    // conversions
    objectToMap: function (obj) {
        let m = new Map();
        for (let param in obj) {
            //console.log(param + " " + obj[param]);
            m.set(param, obj[param]);
        }
        return m;
    },

    mapToObject: function (m) {
        let obj = {};
        for (let [k, v] of m) {
            obj[k] = v;
        }
        return obj;
    }
};


if (typeof(exports) !== 'undefined') {
    exports.setLogOutput = helpers.setLogOutput;
    exports.echo = helpers.echo;
    exports.logerror = helpers.logerror;
    exports.inspect = helpers.inspect;
    exports.loadFile = helpers.loadFile;
    exports.objectToMap = helpers.objectToMap;
    exports.mapToObject = helpers.mapToObject;
}

Modified gc_core/js/ibdawg.js from [e748c8288e] to [cdddd20c84].

1
2
3
4


5
6

7
8
9
10
11
12
13
14
15
16
17
18
19

20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
//// IBDAWG

"use strict";



const st = require("resource://grammalecte/str_transform.js");
const helpers = require("resource://grammalecte/helpers.js");



// String
// Don’t remove. Necessary in TB.
${string}



class IBDAWG {
    // INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH

    constructor (sDicName) {
        try {

            const dict = JSON.parse(helpers.loadFile("resource://grammalecte/_dictionaries/"+sDicName));
            Object.assign(this, dict);
            //const dict = require("resource://grammalecte/"+sLang+"/dictionary.js");
            //Object.assign(this, dict.dictionary);
        }
        catch (e) {
            throw Error("# Error. File not found or not loadable.\n" + e.message + "\n");
        }
        /*
            Properties:
            sName, nVersion, sHeader, lArcVal, nArcVal, byDic, sLang, nChar, nBytesArc, nBytesNodeAddress,
            nEntries, nNode, nArc, nAff, cStemming, nTag, dChar, _arcMask, _finalNodeMask, _lastArcMask, _addrBitMask, nBytesOffset,
        */
        if (!this.sHeader.startsWith("/pyfsa/")) {
            throw TypeError("# Error. Not a pyfsa binary dictionary. Header: " + this.sHeader);
        }
        if (!(this.nVersion == "1" || this.nVersion == "2" || this.nVersion == "3")) {
            throw RangeError("# Error. Unknown dictionary version: " + this.nVersion);
        }

        this.dChar = helpers.objectToMap(this.dChar);
        //this.byDic = new Uint8Array(this.byDic);  // not quicker, even slower

        if (this.cStemming == "S") {
            this.funcStemming = st.getStemFromSuffixCode;
        } else if (this.cStemming == "A") {
            this.funcStemming = st.getStemFromAffixCode;
        } else {
            this.funcStemming = st.noStemming;
        }

        // Configuring DAWG functions according to nVersion
        switch (this.nVersion) {
            case 1:
                this.morph = this._morph1;
                this.stem = this._stem1;




>
>
|
|
>











|

>
|

<
<




















|

|

|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25


26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
//// IBDAWG

"use strict";


if (typeof(require) !== 'undefined') {
    var str_transform = require("resource://grammalecte/str_transform.js");
    var helpers = require("resource://grammalecte/helpers.js");
}


// String
// Don’t remove. Necessary in TB.
${string}



class IBDAWG {
    // INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH

    constructor (sDicName, sPath="") {
        try {
            let sURL = (sPath !== "") ? sPath + "/" + sDicName : "resource://grammalecte/_dictionaries/"+sDicName;
            const dict = JSON.parse(helpers.loadFile(sURL));
            Object.assign(this, dict);


        }
        catch (e) {
            throw Error("# Error. File not found or not loadable.\n" + e.message + "\n");
        }
        /*
            Properties:
            sName, nVersion, sHeader, lArcVal, nArcVal, byDic, sLang, nChar, nBytesArc, nBytesNodeAddress,
            nEntries, nNode, nArc, nAff, cStemming, nTag, dChar, _arcMask, _finalNodeMask, _lastArcMask, _addrBitMask, nBytesOffset,
        */
        if (!this.sHeader.startsWith("/pyfsa/")) {
            throw TypeError("# Error. Not a pyfsa binary dictionary. Header: " + this.sHeader);
        }
        if (!(this.nVersion == "1" || this.nVersion == "2" || this.nVersion == "3")) {
            throw RangeError("# Error. Unknown dictionary version: " + this.nVersion);
        }

        this.dChar = helpers.objectToMap(this.dChar);
        //this.byDic = new Uint8Array(this.byDic);  // not quicker, even slower

        if (this.cStemming == "S") {
            this.funcStemming = str_transform.getStemFromSuffixCode;
        } else if (this.cStemming == "A") {
            this.funcStemming = str_transform.getStemFromAffixCode;
        } else {
            this.funcStemming = str_transform.noStemming;
        }

        // Configuring DAWG functions according to nVersion
        switch (this.nVersion) {
            case 1:
                this.morph = this._morph1;
                this.stem = this._stem1;
70
71
72
73
74
75
76
77

78
79
80
81
82
83
84

85
86
87
88
89
90
91
92
93
94
95
96
97
98

99
100
101
102
103
104
105
                break;
            default:
                throw ValueError("# Error: unknown code: " + this.nVersion);
        }
        //console.log(this.getInfo());
        this.bOptNumSigle = true;
        this.bOptNumAtLast = false;
    };


    getInfo () {
        return  `  Language: ${this.sLang}      Version: ${this.nVersion}      Stemming: ${this.cStemming}FX\n` +
                `  Arcs values:  ${this.nArcVal} = ${this.nChar} characters,  ${this.nAff} affixes,  ${this.nTag} tags\n` +
                `  Dictionary: ${this.nEntries} entries,    ${this.nNode} nodes,   ${this.nArc} arcs\n` +
                `  Address size: ${this.nBytesNodeAddress} bytes,  Arc size: ${this.nBytesArc} bytes\n`;
    };


    isValidToken (sToken) {
        // checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)
        if (this.isValid(sToken)) {
            return true;
        }
        if (sToken.includes("-")) {
            if (sToken.gl_count("-") > 4) {
                return true;
            }
            return sToken.split("-").every(sWord  =>  this.isValid(sWord)); 
        }
        return false;
    };


    isValid (sWord) {
        // checks if sWord is valid (different casing tested if the first letter is a capital)
        if (!sWord) {
            return null;
        }
        if (sWord.includes("’")) { // ugly hack







<
>






<
>













<
>







72
73
74
75
76
77
78

79
80
81
82
83
84
85

86
87
88
89
90
91
92
93
94
95
96
97
98
99

100
101
102
103
104
105
106
107
                break;
            default:
                throw ValueError("# Error: unknown code: " + this.nVersion);
        }
        //console.log(this.getInfo());
        this.bOptNumSigle = true;
        this.bOptNumAtLast = false;

    }

    getInfo () {
        return  `  Language: ${this.sLang}      Version: ${this.nVersion}      Stemming: ${this.cStemming}FX\n` +
                `  Arcs values:  ${this.nArcVal} = ${this.nChar} characters,  ${this.nAff} affixes,  ${this.nTag} tags\n` +
                `  Dictionary: ${this.nEntries} entries,    ${this.nNode} nodes,   ${this.nArc} arcs\n` +
                `  Address size: ${this.nBytesNodeAddress} bytes,  Arc size: ${this.nBytesArc} bytes\n`;

    }

    isValidToken (sToken) {
        // checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)
        if (this.isValid(sToken)) {
            return true;
        }
        if (sToken.includes("-")) {
            if (sToken.gl_count("-") > 4) {
                return true;
            }
            return sToken.split("-").every(sWord  =>  this.isValid(sWord)); 
        }
        return false;

    }

    isValid (sWord) {
        // checks if sWord is valid (different casing tested if the first letter is a capital)
        if (!sWord) {
            return null;
        }
        if (sWord.includes("’")) { // ugly hack
121
122
123
124
125
126
127
128

129
130
131
132
133
134
135
136
137
138
139

140
141
142
143
144
145
146
147
148
149
150
151
152
153
154

155
156
157
158
159
160
161
162
163
164
165
166

167
168
169
170
171
172
173
174
175
176
177
                }
                return !!this.lookup(sWord.slice(0, 1).toLowerCase() + sWord.slice(1));
            } else {
                return !!this.lookup(sWord.toLowerCase());
            }
        }
        return false;
    };


    _convBytesToInteger (aBytes) {
        // Byte order = Big Endian (bigger first)
        let nVal = 0;
        let nWeight = (aBytes.length - 1) * 8;
        for (let n of aBytes) {
            nVal += n << nWeight;
            nWeight = nWeight - 8;
        }
        return nVal;
    };


    lookup (sWord) {
        // returns true if sWord in dictionary (strict verification)
        let iAddr = 0;
        for (let c of sWord) {
            if (!this.dChar.has(c)) {
                return false;
            }
            iAddr = this._lookupArcNode(this.dChar.get(c), iAddr);
            if (iAddr === null) {
                return false;
            }
        }
        return Boolean(this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask);
    };


    getMorph (sWord) {
        // retrieves morphologies list, different casing allowed
        let l = this.morph(sWord);
        if (sWord[0].gl_isUpperCase()) {
            l = l.concat(this.morph(sWord.toLowerCase()));
            if (sWord.gl_isUpperCase() && sWord.length > 1) {
                l = l.concat(this.morph(sWord.gl_toCapitalize()));
            }
        }
        return l;
    };


    // morph (sWord) {
    //     is defined in constructor
    // };
    
    // VERSION 1
    _morph1 (sWord) {
        // returns morphologies of sWord
        let iAddr = 0;
        for (let c of sWord) {
            if (!this.dChar.has(c)) {







<
>










<
>














<
>











<
>



|







123
124
125
126
127
128
129

130
131
132
133
134
135
136
137
138
139
140

141
142
143
144
145
146
147
148
149
150
151
152
153
154
155

156
157
158
159
160
161
162
163
164
165
166
167

168
169
170
171
172
173
174
175
176
177
178
179
                }
                return !!this.lookup(sWord.slice(0, 1).toLowerCase() + sWord.slice(1));
            } else {
                return !!this.lookup(sWord.toLowerCase());
            }
        }
        return false;

    }

    _convBytesToInteger (aBytes) {
        // Byte order = Big Endian (bigger first)
        let nVal = 0;
        let nWeight = (aBytes.length - 1) * 8;
        for (let n of aBytes) {
            nVal += n << nWeight;
            nWeight = nWeight - 8;
        }
        return nVal;

    }

    lookup (sWord) {
        // returns true if sWord in dictionary (strict verification)
        let iAddr = 0;
        for (let c of sWord) {
            if (!this.dChar.has(c)) {
                return false;
            }
            iAddr = this._lookupArcNode(this.dChar.get(c), iAddr);
            if (iAddr === null) {
                return false;
            }
        }
        return Boolean(this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask);

    }

    getMorph (sWord) {
        // retrieves morphologies list, different casing allowed
        let l = this.morph(sWord);
        if (sWord[0].gl_isUpperCase()) {
            l = l.concat(this.morph(sWord.toLowerCase()));
            if (sWord.gl_isUpperCase() && sWord.length > 1) {
                l = l.concat(this.morph(sWord.gl_toCapitalize()));
            }
        }
        return l;

    }

    // morph (sWord) {
    //     is defined in constructor
    // }
    
    // VERSION 1
    _morph1 (sWord) {
        // returns morphologies of sWord
        let iAddr = 0;
        for (let c of sWord) {
            if (!this.dChar.has(c)) {
203
204
205
206
207
208
209
210

211
212
213
214
215
216
217
                    }
                }
                iAddr = iEndArcAddr + this.nBytesNodeAddress;
            }
            return l;
        }
        return [];
    };


    _stem1 (sWord) {
        // returns stems list of sWord
        let iAddr = 0;
        for (let c of sWord) {
            if (!this.dChar.has(c)) {
                return [];







<
>







205
206
207
208
209
210
211

212
213
214
215
216
217
218
219
                    }
                }
                iAddr = iEndArcAddr + this.nBytesNodeAddress;
            }
            return l;
        }
        return [];

    }

    _stem1 (sWord) {
        // returns stems list of sWord
        let iAddr = 0;
        for (let c of sWord) {
            if (!this.dChar.has(c)) {
                return [];
233
234
235
236
237
238
239
240

241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260

261
262
263
264
265

266
267
268
269

270
271
272
273

274
275
276
277
278
279

280
281
282
283

284
285
286
287

288
289
290
291
292
293
                    l.push(this.funcStemming(sWord, this.lArcVal[nArc]));
                }
                iAddr = iEndArcAddr + this.nBytesNodeAddress;
            }
            return l;
        }
        return [];
    };


    _lookupArcNode1 (nVal, iAddr) {
        // looks if nVal is an arc at the node at iAddr, if yes, returns address of next node else None
        while (true) {
            let iEndArcAddr = iAddr+this.nBytesArc;
            let nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr));
            if (nVal == (nRawArc & this._arcMask)) {
                // the value we are looking for 
                // we return the address of the next node
                return this._convBytesToInteger(this.byDic.slice(iEndArcAddr, iEndArcAddr+this.nBytesNodeAddress));
            }
            else {
                // value not found
                if (nRawArc & this._lastArcMask) {
                    return null;
                }
                iAddr = iEndArcAddr + this.nBytesNodeAddress;
            }
        }
    };


    // VERSION 2
    _morph2 (sWord) {
        // to do
    };


    _stem2 (sWord) {
        // to do
    };


    _lookupArcNode2 (nVal, iAddr) {
        // to do
    };



    // VERSION 3
    _morph3 (sWord) {
        // to do
    };


    _stem3 (sWord) {
        // to do
    };


    _lookupArcNode3 (nVal, iAddr) {
        // to do
    };

}


if (typeof(exports) !== 'undefined') {
    exports.IBDAWG = IBDAWG;
}







<
>



















<
>




<
>



<
>



<
>





<
>



<
>



<
>






235
236
237
238
239
240
241

242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261

262
263
264
265
266

267
268
269
270

271
272
273
274

275
276
277
278
279
280

281
282
283
284

285
286
287
288

289
290
291
292
293
294
295
                    l.push(this.funcStemming(sWord, this.lArcVal[nArc]));
                }
                iAddr = iEndArcAddr + this.nBytesNodeAddress;
            }
            return l;
        }
        return [];

    }

    _lookupArcNode1 (nVal, iAddr) {
        // looks if nVal is an arc at the node at iAddr, if yes, returns address of next node else None
        while (true) {
            let iEndArcAddr = iAddr+this.nBytesArc;
            let nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr));
            if (nVal == (nRawArc & this._arcMask)) {
                // the value we are looking for 
                // we return the address of the next node
                return this._convBytesToInteger(this.byDic.slice(iEndArcAddr, iEndArcAddr+this.nBytesNodeAddress));
            }
            else {
                // value not found
                if (nRawArc & this._lastArcMask) {
                    return null;
                }
                iAddr = iEndArcAddr + this.nBytesNodeAddress;
            }
        }

    }

    // VERSION 2
    _morph2 (sWord) {
        // to do

    }

    _stem2 (sWord) {
        // to do

    }

    _lookupArcNode2 (nVal, iAddr) {
        // to do

    }


    // VERSION 3
    _morph3 (sWord) {
        // to do

    }

    _stem3 (sWord) {
        // to do

    }

    _lookupArcNode3 (nVal, iAddr) {
        // to do

    }
}


if (typeof(exports) !== 'undefined') {
    exports.IBDAWG = IBDAWG;
}

Modified gc_core/js/jsex_map.js from [985754ee9e] to [ca76af0666].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47

// Map

if (Map.prototype.grammalecte === undefined) {
    Map.prototype.gl_shallowCopy = function () {
        let oNewMap = new Map();
        for (let [key, val] of this.entries()) {
            oNewMap.set(key, val);
        }
        return oNewMap;
    }

    Map.prototype.gl_get = function (key, defaultValue) {
        let res = this.get(key);
        if (res !== undefined) {
            return res;
        }
        return defaultValue;
    }

    Map.prototype.gl_toString = function () {
        // Default .toString() gives nothing useful
        let sRes = "{ ";
        for (let [k, v] of this.entries()) {
            sRes += (typeof k === "string") ? '"' + k + '": ' : k.toString() + ": ";
            sRes += (typeof v === "string") ? '"' + v + '", ' : v.toString() + ", ";
        }
        sRes = sRes.slice(0, -2) + " }"
        return sRes;
    }

    Map.prototype.gl_update = function (dDict) {
        for (let [k, v] of dDict.entries()) {
            this.set(k, v);
        }
    }

    Map.prototype.gl_updateOnlyExistingKeys = function (dDict) {
        for (let [k, v] of dDict.entries()) {
            if (this.has(k)){
                this.set(k, v);
            }
        }
    }

    Map.prototype.grammalecte = true;
}










|







|








|

|





|







|



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47

// Map

if (Map.prototype.grammalecte === undefined) {
    Map.prototype.gl_shallowCopy = function () {
        let oNewMap = new Map();
        for (let [key, val] of this.entries()) {
            oNewMap.set(key, val);
        }
        return oNewMap;
    };

    Map.prototype.gl_get = function (key, defaultValue) {
        let res = this.get(key);
        if (res !== undefined) {
            return res;
        }
        return defaultValue;
    };

    Map.prototype.gl_toString = function () {
        // Default .toString() gives nothing useful
        let sRes = "{ ";
        for (let [k, v] of this.entries()) {
            sRes += (typeof k === "string") ? '"' + k + '": ' : k.toString() + ": ";
            sRes += (typeof v === "string") ? '"' + v + '", ' : v.toString() + ", ";
        }
        sRes = sRes.slice(0, -2) + " }";
        return sRes;
    };

    Map.prototype.gl_update = function (dDict) {
        for (let [k, v] of dDict.entries()) {
            this.set(k, v);
        }
    };

    Map.prototype.gl_updateOnlyExistingKeys = function (dDict) {
        for (let [k, v] of dDict.entries()) {
            if (this.has(k)){
                this.set(k, v);
            }
        }
    };

    Map.prototype.grammalecte = true;
}

Modified gc_core/js/jsex_regex.js from [b8b02d05dd] to [8feeee694f].

38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
                            m.end.push(m.index + codePos + m[i].length);
                        } else if (codePos === "$") {
                            // at the end of the pattern
                            m.start.push(this.lastIndex - m[i].length);
                            m.end.push(this.lastIndex);
                        } else if (codePos === "w") {
                            // word in the middle of the pattern
                            iPos = m[0].search("[ ’,()«»“”]"+m[i]+"[ ,’()«»“”]") + 1 + m.index
                            m.start.push(iPos);
                            m.end.push(iPos + m[i].length)
                        } else if (codePos === "*") {
                            // anywhere
                            iPos = m[0].indexOf(m[i]) + m.index;
                            m.start.push(iPos);
                            m.end.push(iPos + m[i].length)
                        } else if (codePos === "**") {
                            // anywhere after previous group
                            iPos = m[0].indexOf(m[i], m.end[i-1]-m.index) + m.index;
                            m.start.push(iPos);
                            m.end.push(iPos + m[i].length)
                        } else if (codePos.startsWith(">")) {
                            // >x:_
                            // todo: look in substring x
                            iPos = m[0].indexOf(m[i]) + m.index;
                            m.start.push(iPos);
                            m.end.push(iPos + m[i].length);
                        } else {







|

|




|




|







38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
                            m.end.push(m.index + codePos + m[i].length);
                        } else if (codePos === "$") {
                            // at the end of the pattern
                            m.start.push(this.lastIndex - m[i].length);
                            m.end.push(this.lastIndex);
                        } else if (codePos === "w") {
                            // word in the middle of the pattern
                            iPos = m[0].search("[ ’,()«»“”]"+m[i]+"[ ,’()«»“”]") + 1 + m.index;
                            m.start.push(iPos);
                            m.end.push(iPos + m[i].length);
                        } else if (codePos === "*") {
                            // anywhere
                            iPos = m[0].indexOf(m[i]) + m.index;
                            m.start.push(iPos);
                            m.end.push(iPos + m[i].length);
                        } else if (codePos === "**") {
                            // anywhere after previous group
                            iPos = m[0].indexOf(m[i], m.end[i-1]-m.index) + m.index;
                            m.start.push(iPos);
                            m.end.push(iPos + m[i].length);
                        } else if (codePos.startsWith(">")) {
                            // >x:_
                            // todo: look in substring x
                            iPos = m[0].indexOf(m[i]) + m.index;
                            m.start.push(iPos);
                            m.end.push(iPos + m[i].length);
                        } else {
79
80
81
82
83
84
85
86
87
88
89
            if (typeof(helpers) !== "undefined") {
                helpers.logerror(e);
            } else {
                console.error(e);
            }
        }
        return m;
    }

    RegExp.prototype.grammalecte = true;
}







|



79
80
81
82
83
84
85
86
87
88
89
            if (typeof(helpers) !== "undefined") {
                helpers.logerror(e);
            } else {
                console.error(e);
            }
        }
        return m;
    };

    RegExp.prototype.grammalecte = true;
}

Modified gc_core/js/jsex_string.js from [1e9c89a872] to [86533aa4da].

11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
        let iPos = 0;
        let nStep = (bOverlapping) ? 1 : sSearch.length;
        while ((iPos = this.indexOf(sSearch, iPos)) >= 0) {
            nOccur++;
            iPos += nStep;
        }
        return nOccur;
    }
    String.prototype.gl_isDigit = function () {
        return (this.search(/^[0-9⁰¹²³⁴⁵⁶⁷⁸⁹]+$/) !== -1);
    }
    String.prototype.gl_isLowerCase = function () {
        return (this.search(/^[a-zà-öø-ÿ0-9-]+$/) !== -1);
    }
    String.prototype.gl_isUpperCase = function () {
        return (this.search(/^[A-ZÀ-ÖØ-ߌ0-9-]+$/) !== -1);
    }
    String.prototype.gl_isTitle = function () {
        return (this.search(/^[A-ZÀ-ÖØ-ߌ][a-zà-öø-ÿ'’-]+$/) !== -1);
    }
    String.prototype.gl_toCapitalize = function () {
        return this.slice(0,1).toUpperCase() + this.slice(1).toLowerCase();
    }
    String.prototype.gl_expand = function (oMatch) {
        let sNew = this;
        for (let i = 0; i < oMatch.length ; i++) {
            let z = new RegExp("\\\\"+parseInt(i), "g");
            sNew = sNew.replace(z, oMatch[i]);
        }
        return sNew;
    }
    String.prototype.gl_trimRight = function (sChars) {
        let z = new RegExp("["+sChars+"]+$");
        return this.replace(z, "");
    }
    String.prototype.gl_trimLeft = function (sChars) {
        let z = new RegExp("^["+sChars+"]+");
        return this.replace(z, "");
    }
    String.prototype.gl_trim = function (sChars) {
        let z1 = new RegExp("^["+sChars+"]+");
        let z2 = new RegExp("["+sChars+"]+$");
        return this.replace(z1, "").replace(z2, "");
    }

    String.prototype.grammalecte = true;
}







|


|


|


|


|


|







|



|



|




|



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
        let iPos = 0;
        let nStep = (bOverlapping) ? 1 : sSearch.length;
        while ((iPos = this.indexOf(sSearch, iPos)) >= 0) {
            nOccur++;
            iPos += nStep;
        }
        return nOccur;
    };
    String.prototype.gl_isDigit = function () {
        return (this.search(/^[0-9⁰¹²³⁴⁵⁶⁷⁸⁹]+$/) !== -1);
    };
    String.prototype.gl_isLowerCase = function () {
        return (this.search(/^[a-zà-öø-ÿ0-9-]+$/) !== -1);
    };
    String.prototype.gl_isUpperCase = function () {
        return (this.search(/^[A-ZÀ-ÖØ-ߌ0-9-]+$/) !== -1);
    };
    String.prototype.gl_isTitle = function () {
        return (this.search(/^[A-ZÀ-ÖØ-ߌ][a-zà-öø-ÿ'’-]+$/) !== -1);
    };
    String.prototype.gl_toCapitalize = function () {
        return this.slice(0,1).toUpperCase() + this.slice(1).toLowerCase();
    };
    String.prototype.gl_expand = function (oMatch) {
        let sNew = this;
        for (let i = 0; i < oMatch.length ; i++) {
            let z = new RegExp("\\\\"+parseInt(i), "g");
            sNew = sNew.replace(z, oMatch[i]);
        }
        return sNew;
    };
    String.prototype.gl_trimRight = function (sChars) {
        let z = new RegExp("["+sChars+"]+$");
        return this.replace(z, "");
    };
    String.prototype.gl_trimLeft = function (sChars) {
        let z = new RegExp("^["+sChars+"]+");
        return this.replace(z, "");
    };
    String.prototype.gl_trim = function (sChars) {
        let z1 = new RegExp("^["+sChars+"]+");
        let z2 = new RegExp("["+sChars+"]+$");
        return this.replace(z1, "").replace(z2, "");
    };

    String.prototype.grammalecte = true;
}

Modified gc_core/js/lang_core/gc_engine.js from [ce3013cd59] to [2f4877bef2].

1
2


3
4
5
6










7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45











46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96

97



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174

175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216

217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258

259

260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293

294



295


296
297
298
299
300


301



302
303
304
305
306
307
308
309
310
311
312
313
314

315
316
317

318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
// Grammar checker engine



${string}
${regex}
${map}












function capitalizeArray (aArray) {
    // can’t map on user defined function??
    let aNew = [];
    for (let i = 0; i < aArray.length; i = i + 1) {
        aNew[i] = aArray[i].gl_toCapitalize();
    }
    return aNew;
}

const ibdawg = require("resource://grammalecte/ibdawg.js");
const helpers = require("resource://grammalecte/helpers.js");
const gc_options = require("resource://grammalecte/${lang}/gc_options.js");
const cr = require("resource://grammalecte/${lang}/cregex.js");
const text = require("resource://grammalecte/text.js");
const echo = require("resource://grammalecte/helpers.js").echo;

const lang = "${lang}";
const locales = ${loc};
const pkg = "${implname}";
const name = "${name}";
const version = "${version}";
const author = "${author}";

// commons regexes
const _zEndOfSentence = new RegExp ('([.?!:;…][ .?!… »”")]*|.$)', "g");
const _zBeginOfParagraph = new RegExp ("^[-  –—.,;?!…]*", "ig");
const _zEndOfParagraph = new RegExp ("[-  .,;?!…–—]*$", "ig");

// grammar rules and dictionary
//const _rules = require("./gc_rules.js");
let _sContext = "";                                 // what software is running
const _rules = require("resource://grammalecte/${lang}/gc_rules.js");
let _dOptions = null;
let _aIgnoredRules = new Set();
let _oDict = null;
let _dAnalyses = new Map();                         // cache for data from dictionary













///// Parsing

function parse (sText, sCountry="${country_default}", bDebug=false, bContext=false) {
    // analyses the paragraph sText and returns list of errors
    let dErrors;
    let errs;
    let sAlt = sText;
    let dDA = new Map();        // Disamnbiguator
    let dPriority = new Map();  // Key = position; value = priority
    let sNew = "";

    // parse paragraph
    try {
        [sNew, dErrors] = _proofread(sText, sAlt, 0, true, dDA, dPriority, sCountry, bDebug, bContext);
        if (sNew) {
            sText = sNew;
        }
    }
    catch (e) {
        helpers.logerror(e);
    }

    // cleanup
    if (sText.includes(" ")) {
        sText = sText.replace(/ /g, ' '); // nbsp
    }
    if (sText.includes(" ")) {
        sText = sText.replace(/ /g, ' '); // snbsp
    }
    if (sText.includes("'")) {
        sText = sText.replace(/'/g, "’");
    }
    if (sText.includes("‑")) {
        sText = sText.replace(/‑/g, "-"); // nobreakdash
    }

    // parse sentence
    for (let [iStart, iEnd] of _getSentenceBoundaries(sText)) {
        if (4 < (iEnd - iStart) < 2000) {
            dDA.clear();
            //echo(sText.slice(iStart, iEnd));
            try {
                [_, errs] = _proofread(sText.slice(iStart, iEnd), sAlt.slice(iStart, iEnd), iStart, false, dDA, dPriority, sCountry, bDebug, bContext);
                dErrors.gl_update(errs);
            }
            catch (e) {
                helpers.logerror(e);
            }
        }
    }
    return Array.from(dErrors.values());

}




function* _getSentenceBoundaries (sText) {
    let mBeginOfSentence = _zBeginOfParagraph.exec(sText)
    let iStart = _zBeginOfParagraph.lastIndex;
    let m;
    while ((m = _zEndOfSentence.exec(sText)) !== null) {
        yield [iStart, _zEndOfSentence.lastIndex];
        iStart = _zEndOfSentence.lastIndex;
    }
}

function _proofread (s, sx, nOffset, bParagraph, dDA, dPriority, sCountry, bDebug, bContext) {
    let dErrs = new Map();
    let bChange = false;
    let bIdRule = option('idrule');
    let m;
    let bCondMemo;
    let nErrorStart;

    for (let [sOption, lRuleGroup] of _getRules(bParagraph)) {
        if (!sOption || option(sOption)) {
            for (let [zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions, lGroups, lNegLookBefore] of lRuleGroup) {
                if (!_aIgnoredRules.has(sRuleId)) {
                    while ((m = zRegex.gl_exec2(s, lGroups, lNegLookBefore)) !== null) {
                        bCondMemo = null;
                        /*if (bDebug) {
                            echo(">>>> Rule # " + sLineId + " - Text: " + s + " opt: "+ sOption);
                        }*/
                        for (let [sFuncCond, cActionType, sWhat, ...eAct] of lActions) {
                        // action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ]
                            try {
                                //echo(oEvalFunc[sFuncCond]);
                                bCondMemo = (!sFuncCond || oEvalFunc[sFuncCond](s, sx, m, dDA, sCountry, bCondMemo))
                                if (bCondMemo) {
                                    switch (cActionType) {
                                        case "-":
                                            // grammar error
                                            //echo("-> error detected in " + sLineId + "\nzRegex: " + zRegex.source);
                                            nErrorStart = nOffset + m.start[eAct[0]];
                                            if (!dErrs.has(nErrorStart) || nPriority > dPriority.get(nErrorStart)) {
                                                dErrs.set(nErrorStart, _createError(s, sx, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bIdRule, sOption, bContext));
                                                dPriority.set(nErrorStart, nPriority);
                                            }
                                            break;
                                        case "~":
                                            // text processor
                                            //echo("-> text processor by " + sLineId + "\nzRegex: " + zRegex.source);
                                            s = _rewrite(s, sWhat, eAct[0], m, bUppercase);
                                            bChange = true;
                                            if (bDebug) {
                                                echo("~ " + s + "  -- " + m[eAct[0]] + "  # " + sLineId);
                                            }
                                            break;
                                        case "=":
                                            // disambiguation
                                            //echo("-> disambiguation by " + sLineId + "\nzRegex: " + zRegex.source);
                                            oEvalFunc[sWhat](s, m, dDA);
                                            if (bDebug) {
                                                echo("= " + m[0] + "  # " + sLineId + "\nDA: " + dDA.gl_toString());
                                            }
                                            break;
                                        case ">":
                                            // we do nothing, this test is just a condition to apply all following actions
                                            break;
                                        default:
                                            echo("# error: unknown action at " + sLineId);
                                    }
                                } else {
                                    if (cActionType == ">") {
                                        break;
                                    }
                                }
                            }
                            catch (e) {
                                echo(s);
                                echo("# line id: " + sLineId + "\n# rule id: " + sRuleId);
                                helpers.logerror(e);

                            }
                        }
                    }
                }
            }
        }
    }
    if (bChange) {
        return [s, dErrs];
    }
    return [false, dErrs];
}

function _createError (s, sx, sRepl, nOffset, m, iGroup, sLineId, sRuleId, bUppercase, sMsg, sURL, bIdRule, sOption, bContext) {
    let oErr = {};
    oErr["nStart"] = nOffset + m.start[iGroup];
    oErr["nEnd"] = nOffset + m.end[iGroup];
    oErr["sLineId"] = sLineId;
    oErr["sRuleId"] = sRuleId;
    oErr["sType"] = (sOption) ? sOption : "notype";
    // suggestions
    if (sRepl[0] === "=") {
        let sugg = oEvalFunc[sRepl.slice(1)](s, m);
        if (sugg) {
            if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) {
                oErr["aSuggestions"] = capitalizeArray(sugg.split("|"));
            } else {
                oErr["aSuggestions"] = sugg.split("|");
            }
        } else {
            oErr["aSuggestions"] = [];
        }
    } else if (sRepl == "_") {
        oErr["aSuggestions"] = [];
    } else {
        if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) {
            oErr["aSuggestions"] = capitalizeArray(sRepl.gl_expand(m).split("|"));
        } else {
            oErr["aSuggestions"] = sRepl.gl_expand(m).split("|");
        }
    }
    // Message

    if (sMsg[0] === "=") {
        sMessage = oEvalFunc[sMsg.slice(1)](s, m)
    } else {
        sMessage = sMsg.gl_expand(m);
    }
    if (bIdRule) {
        sMessage += " ##" + sLineId + " #" + sRuleId;
    }
    oErr["sMessage"] = sMessage;
    // URL
    oErr["URL"] = sURL || "";
    // Context
    if (bContext) {
        oErr["sUnderlined"] = sx.slice(m.start[iGroup], m.end[iGroup]);
        oErr["sBefore"] = sx.slice(Math.max(0, m.start[iGroup]-80), m.start[iGroup]);
        oErr["sAfter"] = sx.slice(m.end[iGroup], m.end[iGroup]+80);
    }
    return oErr;
}

function _rewrite (s, sRepl, iGroup, m, bUppercase) {
    // text processor: write sRepl in s at iGroup position"
    let ln = m.end[iGroup] - m.start[iGroup];
    let sNew = "";
    if (sRepl === "*") {
        sNew = " ".repeat(ln);
    } else if (sRepl === ">" || sRepl === "_" || sRepl === "~") {
        sNew = sRepl + " ".repeat(ln-1);
    } else if (sRepl === "@") {
        sNew = "@".repeat(ln);
    } else if (sRepl.slice(0,1) === "=") {
        sNew = oEvalFunc[sRepl.slice(1)](s, m);
        sNew = sNew + " ".repeat(ln-sNew.length);
        if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) {
            sNew = sNew.gl_toCapitalize();
        }
    } else {
        sNew = sRepl.gl_expand(m);
        sNew = sNew + " ".repeat(ln-sNew.length);
    }
    //echo("\n"+s+"\nstart: "+m.start[iGroup]+" end:"+m.end[iGroup])
    return s.slice(0, m.start[iGroup]) + sNew + s.slice(m.end[iGroup]);

}


function ignoreRule (sRuleId) {
    _aIgnoredRules.add(sRuleId);
}

function resetIgnoreRules () {
    _aIgnoredRules.clear();
}

function reactivateRule (sRuleId) {
    _aIgnoredRules.delete(sRuleId);
}

function listRules (sFilter=null) {
    // generator: returns tuple (sOption, sLineId, sRuleId)
    try {
        for ([sOption, lRuleGroup] of _getRules(true)) {
            for ([_, _, sLineId, sRuleId, _, _] of lRuleGroup) {
                if (!sFilter || sRuleId.test(sFilter)) {
                    yield [sOption, sLineId, sRuleId];
                }
            }
        }
        for ([sOption, lRuleGroup] of _getRules(false)) {
            for ([_, _, sLineId, sRuleId, _, _] of lRuleGroup) {
                if (!sFilter || sRuleId.test(sFilter)) {
                    yield [sOption, sLineId, sRuleId];
                }
            }
        }
    }
    catch (e) {
        helpers.logerror(e);
    }

}







//////// init

function load (sContext="JavaScript") {
    try {


        _oDict = new ibdawg.IBDAWG("${dic_name}.json");



        _sContext = sContext;
        _dOptions = gc_options.getOptions(sContext).gl_shallowCopy();     // duplication necessary, to be able to reset to default
    }
    catch (e) {
        helpers.logerror(e);
    }
}

function setOption (sOpt, bVal) {
    if (_dOptions.has(sOpt)) {
        _dOptions.set(sOpt, bVal);
    }
}


function setOptions (dOpt) {
    _dOptions.gl_updateOnlyExistingKeys(dOpt);

}

function getOptions () {
    return _dOptions;
}

function getDefaultOptions () {
    return gc_options.getOptions(_sContext).gl_shallowCopy();
}

function resetOptions () {
    _dOptions = gc_options.getOptions(_sContext).gl_shallowCopy();
}

function getDictionary () {
    return _oDict;
}

function _getRules (bParagraph) {
    if (!bParagraph) {
        return _rules.lSentenceRules;
    }
    return _rules.lParagraphRules;
}



//////// common functions

function option (sOpt) {
    // return true if option sOpt is active
    return _dOptions.get(sOpt);
}

function displayInfo (dDA, aWord) {


>
>




>
>
>
>
>
>
>
>
>
>










<
<
<
<
<
<

<
<
<
<
<
<
|
<
<
<
<
<
<
<
|
<



|


>
>
>
>
>
>
>
>
>
>
>
|

|
|
|
|
|
|
|
|

|
|
|
|
|
|
|
|
|
|

|
|
|
|
|
|
|
|
|
|
|
|
|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
|
>
>
>

|
|
|
|
|
|
|
|
|

|
|
|
|
|
|
|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>






<
|
|
|
|
|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
|
>

|
|
|

|
|
|

|
|
|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
|
>
>
>
|
>
>

|

|
|
>
>
|
>
>
>
|
|
|
|
|
|
|

|
<
|
|
|
>

|
|
>
|
<
<
<
|

|
|
|

|
|
|

|
|
|

|
|
<

<



<
|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28






29






30







31

32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188

189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331

332
333
334
335
336
337
338
339
340



341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356

357

358
359
360

361
362
363
364
365
366
367
368
// Grammar checker engine

"use strict";

${string}
${regex}
${map}


if (typeof(require) !== 'undefined') {
    var helpers = require("resource://grammalecte/helpers.js");
    var echo = require("resource://grammalecte/helpers.js").echo;
    var gc_options = require("resource://grammalecte/${lang}/gc_options.js");
    var gc_rules = require("resource://grammalecte/${lang}/gc_rules.js");
    var cregex = require("resource://grammalecte/${lang}/cregex.js");
    var text = require("resource://grammalecte/text.js");
}


function capitalizeArray (aArray) {
    // can’t map on user defined function??
    let aNew = [];
    for (let i = 0; i < aArray.length; i = i + 1) {
        aNew[i] = aArray[i].gl_toCapitalize();
    }
    return aNew;
}














// data







let _sAppContext = "";                                  // what software is running

let _dOptions = null;
let _aIgnoredRules = new Set();
let _oDict = null;
let _dAnalyses = new Map();                             // cache for data from dictionary


var gc_engine = {

    //// Informations

    lang: "${lang}",
    locales: ${loc},
    pkg: "${implname}",
    name: "${name}",
    version: "${version}",
    author: "${author}",

    //// Parsing

    parse: function (sText, sCountry="${country_default}", bDebug=false, bContext=false) {
        // analyses the paragraph sText and returns list of errors
        let dErrors;
        let errs;
        let sAlt = sText;
        let dDA = new Map();        // Disamnbiguator
        let dPriority = new Map();  // Key = position; value = priority
        let sNew = "";

        // parse paragraph
        try {
            [sNew, dErrors] = this._proofread(sText, sAlt, 0, true, dDA, dPriority, sCountry, bDebug, bContext);
            if (sNew) {
                sText = sNew;
            }
        }
        catch (e) {
            helpers.logerror(e);
        }

        // cleanup
        if (sText.includes(" ")) {
            sText = sText.replace(/ /g, ' '); // nbsp
        }
        if (sText.includes(" ")) {
            sText = sText.replace(/ /g, ' '); // snbsp
        }
        if (sText.includes("'")) {
            sText = sText.replace(/'/g, "’");
        }
        if (sText.includes("‑")) {
            sText = sText.replace(/‑/g, "-"); // nobreakdash
        }

        // parse sentence
        for (let [iStart, iEnd] of this._getSentenceBoundaries(sText)) {
            if (4 < (iEnd - iStart) < 2000) {
                dDA.clear();
                //echo(sText.slice(iStart, iEnd));
                try {
                    [, errs] = this._proofread(sText.slice(iStart, iEnd), sAlt.slice(iStart, iEnd), iStart, false, dDA, dPriority, sCountry, bDebug, bContext);
                    dErrors.gl_update(errs);
                }
                catch (e) {
                    helpers.logerror(e);
                }
            }
        }
        return Array.from(dErrors.values());
    },

    _zEndOfSentence: new RegExp ('([.?!:;…][ .?!… »”")]*|.$)', "g"),
    _zBeginOfParagraph: new RegExp ("^[-  –—.,;?!…]*", "ig"),
    _zEndOfParagraph: new RegExp ("[-  .,;?!…–—]*$", "ig"),

    _getSentenceBoundaries: function* (sText) {
        let mBeginOfSentence = this._zBeginOfParagraph.exec(sText)
        let iStart = this._zBeginOfParagraph.lastIndex;
        let m;
        while ((m = this._zEndOfSentence.exec(sText)) !== null) {
            yield [iStart, this._zEndOfSentence.lastIndex];
            iStart = this._zEndOfSentence.lastIndex;
        }
    },

    _proofread: function (s, sx, nOffset, bParagraph, dDA, dPriority, sCountry, bDebug, bContext) {
        let dErrs = new Map();
        let bChange = false;
        let bIdRule = option('idrule');
        let m;
        let bCondMemo;
        let nErrorStart;

        for (let [sOption, lRuleGroup] of this._getRules(bParagraph)) {
            if (!sOption || option(sOption)) {
                for (let [zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions, lGroups, lNegLookBefore] of lRuleGroup) {
                    if (!_aIgnoredRules.has(sRuleId)) {
                        while ((m = zRegex.gl_exec2(s, lGroups, lNegLookBefore)) !== null) {
                            bCondMemo = null;
                            /*if (bDebug) {
                                echo(">>>> Rule # " + sLineId + " - Text: " + s + " opt: "+ sOption);
                            }*/
                            for (let [sFuncCond, cActionType, sWhat, ...eAct] of lActions) {
                            // action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ]
                                try {
                                    //echo(oEvalFunc[sFuncCond]);
                                    bCondMemo = (!sFuncCond || oEvalFunc[sFuncCond](s, sx, m, dDA, sCountry, bCondMemo))
                                    if (bCondMemo) {
                                        switch (cActionType) {
                                            case "-":
                                                // grammar error
                                                //echo("-> error detected in " + sLineId + "\nzRegex: " + zRegex.source);
                                                nErrorStart = nOffset + m.start[eAct[0]];
                                                if (!dErrs.has(nErrorStart) || nPriority > dPriority.get(nErrorStart)) {
                                                    dErrs.set(nErrorStart, this._createError(s, sx, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bIdRule, sOption, bContext));
                                                    dPriority.set(nErrorStart, nPriority);
                                                }
                                                break;
                                            case "~":
                                                // text processor
                                                //echo("-> text processor by " + sLineId + "\nzRegex: " + zRegex.source);
                                                s = this._rewrite(s, sWhat, eAct[0], m, bUppercase);
                                                bChange = true;
                                                if (bDebug) {
                                                    echo("~ " + s + "  -- " + m[eAct[0]] + "  # " + sLineId);
                                                }
                                                break;
                                            case "=":
                                                // disambiguation
                                                //echo("-> disambiguation by " + sLineId + "\nzRegex: " + zRegex.source);
                                                oEvalFunc[sWhat](s, m, dDA);
                                                if (bDebug) {
                                                    echo("= " + m[0] + "  # " + sLineId + "\nDA: " + dDA.gl_toString());
                                                }
                                                break;
                                            case ">":
                                                // we do nothing, this test is just a condition to apply all following actions
                                                break;
                                            default:
                                                echo("# error: unknown action at " + sLineId);
                                        }
                                    } else {
                                        if (cActionType == ">") {
                                            break;
                                        }
                                    }
                                }
                                catch (e) {
                                    echo(s);
                                    echo("# line id: " + sLineId + "\n# rule id: " + sRuleId);
                                    helpers.logerror(e);
                                }
                            }
                        }
                    }
                }
            }
        }

        if (bChange) {
            return [s, dErrs];
        }
        return [false, dErrs];
    },

    _createError: function (s, sx, sRepl, nOffset, m, iGroup, sLineId, sRuleId, bUppercase, sMsg, sURL, bIdRule, sOption, bContext) {
        let oErr = {};
        oErr["nStart"] = nOffset + m.start[iGroup];
        oErr["nEnd"] = nOffset + m.end[iGroup];
        oErr["sLineId"] = sLineId;
        oErr["sRuleId"] = sRuleId;
        oErr["sType"] = (sOption) ? sOption : "notype";
        // suggestions
        if (sRepl.slice(0,1) === "=") {
            let sugg = oEvalFunc[sRepl.slice(1)](s, m);
            if (sugg) {
                if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) {
                    oErr["aSuggestions"] = capitalizeArray(sugg.split("|"));
                } else {
                    oErr["aSuggestions"] = sugg.split("|");
                }
            } else {
                oErr["aSuggestions"] = [];
            }
        } else if (sRepl == "_") {
            oErr["aSuggestions"] = [];
        } else {
            if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) {
                oErr["aSuggestions"] = capitalizeArray(sRepl.gl_expand(m).split("|"));
            } else {
                oErr["aSuggestions"] = sRepl.gl_expand(m).split("|");
            }
        }
        // Message
        let sMessage = "";
        if (sMsg.slice(0,1) === "=") {
            sMessage = oEvalFunc[sMsg.slice(1)](s, m)
        } else {
            sMessage = sMsg.gl_expand(m);
        }
        if (bIdRule) {
            sMessage += " ##" + sLineId + " #" + sRuleId;
        }
        oErr["sMessage"] = sMessage;
        // URL
        oErr["URL"] = sURL || "";
        // Context
        if (bContext) {
            oErr["sUnderlined"] = sx.slice(m.start[iGroup], m.end[iGroup]);
            oErr["sBefore"] = sx.slice(Math.max(0, m.start[iGroup]-80), m.start[iGroup]);
            oErr["sAfter"] = sx.slice(m.end[iGroup], m.end[iGroup]+80);
        }
        return oErr;
    },

    _rewrite: function (s, sRepl, iGroup, m, bUppercase) {
        // text processor: write sRepl in s at iGroup position"
        let ln = m.end[iGroup] - m.start[iGroup];
        let sNew = "";
        if (sRepl === "*") {
            sNew = " ".repeat(ln);
        } else if (sRepl === ">" || sRepl === "_" || sRepl === "~") {
            sNew = sRepl + " ".repeat(ln-1);
        } else if (sRepl === "@") {
            sNew = "@".repeat(ln);
        } else if (sRepl.slice(0,1) === "=") {
            sNew = oEvalFunc[sRepl.slice(1)](s, m);
            sNew = sNew + " ".repeat(ln-sNew.length);
            if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) {
                sNew = sNew.gl_toCapitalize();
            }
        } else {
            sNew = sRepl.gl_expand(m);
            sNew = sNew + " ".repeat(ln-sNew.length);
        }
        //echo("\n"+s+"\nstart: "+m.start[iGroup]+" end:"+m.end[iGroup])
        return s.slice(0, m.start[iGroup]) + sNew + s.slice(m.end[iGroup]);
    },

    // Actions on rules

    ignoreRule: function (sRuleId) {
        _aIgnoredRules.add(sRuleId);
    },

    resetIgnoreRules: function () {
        _aIgnoredRules.clear();
    },

    reactivateRule: function (sRuleId) {
        _aIgnoredRules.delete(sRuleId);
    },

    listRules: function* (sFilter=null) {
        // generator: returns tuple (sOption, sLineId, sRuleId)
        try {
            for (let [sOption, lRuleGroup] of this._getRules(true)) {
                for (let [,, sLineId, sRuleId,,] of lRuleGroup) {
                    if (!sFilter || sRuleId.test(sFilter)) {
                        yield [sOption, sLineId, sRuleId];
                    }
                }
            }
            for (let [sOption, lRuleGroup] of this._getRules(false)) {
                for (let [,, sLineId, sRuleId,,] of lRuleGroup) {
                    if (!sFilter || sRuleId.test(sFilter)) {
                        yield [sOption, sLineId, sRuleId];
                    }
                }
            }
        }
        catch (e) {
            helpers.logerror(e);
        }
    },

    _getRules: function (bParagraph) {
        if (!bParagraph) {
            return gc_rules.lSentenceRules;
        }
        return gc_rules.lParagraphRules;
    },

    //// Initialization

    load: function (sContext="JavaScript", sPath="") {
        try {
            if (typeof(require) !== 'undefined') {
                var ibdawg = require("resource://grammalecte/ibdawg.js");
                _oDict = new ibdawg.IBDAWG("${dic_name}.json");
            } else {
                _oDict = new IBDAWG("${dic_name}.json", sPath);
            }
            _sAppContext = sContext;
            _dOptions = gc_options.getOptions(sContext).gl_shallowCopy();     // duplication necessary, to be able to reset to default
        }
        catch (e) {
            helpers.logerror(e);
        }
    },

    getDictionary: function () {

        return _oDict;
    },

    //// Options

    setOption: function (sOpt, bVal) {
        if (_dOptions.has(sOpt)) {
            _dOptions.set(sOpt, bVal);
        }



    },

    setOptions: function (dOpt) {
        _dOptions.gl_updateOnlyExistingKeys(dOpt);
    },

    getOptions: function () {
        return _dOptions;
    },

    getDefaultOptions: function () {
        return gc_options.getOptions(_sAppContext).gl_shallowCopy();
    },

    resetOptions: function () {
        _dOptions = gc_options.getOptions(_sAppContext).gl_shallowCopy();

    }

}



//////// Common functions

function option (sOpt) {
    // return true if option sOpt is active
    return _dOptions.get(sOpt);
}

function displayInfo (dDA, aWord) {
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486

487
488
489
490
491
492


493
494
495
496
497
498
499

//// functions to get text outside pattern scope

// warning: check compile_rules.py to understand how it works

function nextword (s, iStart, n) {
    // get the nth word of the input string or empty string
    let z = new RegExp("^( +[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+){" + (n-1).toString() + "} +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+)", "i");
    let m = z.exec(s.slice(iStart));
    if (!m) {
        return null;
    }
    return [iStart + RegExp.lastIndex - m[2].length, m[2]];
}

function prevword (s, iEnd, n) {
    // get the (-)nth word of the input string or empty string
    let z = new RegExp("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+) +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+ +){" + (n-1).toString() + "}$", "i");
    let m = z.exec(s.slice(0, iEnd));
    if (!m) {
        return null;
    }
    return [m.index, m[1]];
}

const _zNextWord = new RegExp ("^ +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_-]*)", "i");
const _zPrevWord = new RegExp ("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_-]*) +$", "i");

function nextword1 (s, iStart) {
    // get next word (optimization)

    let m = _zNextWord.exec(s.slice(iStart));
    if (!m) {
        return null;
    }
    return [iStart + RegExp.lastIndex - m[1].length, m[1]];
}



function prevword1 (s, iEnd) {
    // get previous word (optimization)
    //echo("prev1, s:"+s);
    //echo("prev1, s.slice(0, iEnd):"+s.slice(0, iEnd));
    let m = _zPrevWord.exec(s.slice(0, iEnd));
    //echo("prev1, m:"+m);







|




|




|







<
<
<


>




|

>
>







473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497



498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515

//// functions to get text outside pattern scope

// warning: check compile_rules.py to understand how it works

function nextword (s, iStart, n) {
    // get the nth word of the input string or empty string
    let z = new RegExp("^(?: +[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+){" + (n-1).toString() + "} +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+)", "ig");
    let m = z.exec(s.slice(iStart));
    if (!m) {
        return null;
    }
    return [iStart + z.lastIndex - m[1].length, m[1]];
}

function prevword (s, iEnd, n) {
    // get the (-)nth word of the input string or empty string
    let z = new RegExp("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+) +(?:[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+ +){" + (n-1).toString() + "}$", "i");
    let m = z.exec(s.slice(0, iEnd));
    if (!m) {
        return null;
    }
    return [m.index, m[1]];
}




function nextword1 (s, iStart) {
    // get next word (optimization)
    let _zNextWord = new RegExp ("^ +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_-]*)", "ig");
    let m = _zNextWord.exec(s.slice(iStart));
    if (!m) {
        return null;
    }
    return [iStart + _zNextWord.lastIndex - m[1].length, m[1]];
}

const _zPrevWord = new RegExp ("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_-]*) +$", "i");

function prevword1 (s, iEnd) {
    // get previous word (optimization)
    //echo("prev1, s:"+s);
    //echo("prev1, s.slice(0, iEnd):"+s.slice(0, iEnd));
    let m = _zPrevWord.exec(s.slice(0, iEnd));
    //echo("prev1, m:"+m);
592
593
594
595
596
597
598

599
600
601
602
603
604
605
606
607
608
609
610

611
612
613















614
615
616
617
618
619
620
621
622
623
624
    return true;
}

function define (dDA, nPos, lMorph) {
    dDA.set(nPos, lMorph);
    return true;
}


//////// GRAMMAR CHECKER PLUGINS

${pluginsJS}


${callablesJS}



if (typeof(exports) !== 'undefined') {
    exports.load = load;

    exports.parse = parse;
    exports.lang = lang;
    exports.version = version;















    exports.getDictionary = getDictionary;
    exports.setOption = setOption;
    exports.setOptions = setOptions;
    exports.getOptions = getOptions;
    exports.getDefaultOptions = getDefaultOptions;
    exports.resetOptions = resetOptions;
    exports.ignoreRule = ignoreRule;
    exports.reactivateRule = reactivateRule;
    exports.resetIgnoreRules = resetIgnoreRules;
    exports.listRules = listRules;
}







>











|
>
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
|
|
|
<
<
<
<

608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652




653
    return true;
}

function define (dDA, nPos, lMorph) {
    dDA.set(nPos, lMorph);
    return true;
}


//////// GRAMMAR CHECKER PLUGINS

${pluginsJS}


${callablesJS}



if (typeof(exports) !== 'undefined') {
    exports.lang = gc_engine.lang;
    exports.locales = gc_engine.locales;
    exports.pkg = gc_engine.pkg;
    exports.name = gc_engine.name;
    exports.version = gc_engine.version;
    exports.author = gc_engine.author;
    exports.parse = gc_engine.parse;
    exports._zEndOfSentence = gc_engine._zEndOfSentence;
    exports._zBeginOfParagraph = gc_engine._zBeginOfParagraph;
    exports._zEndOfParagraph = gc_engine._zEndOfParagraph;
    exports._getSentenceBoundaries = gc_engine._getSentenceBoundaries;
    exports._proofread = gc_engine._proofread;
    exports._createError = gc_engine._createError;
    exports._rewrite = gc_engine._rewrite;
    exports.ignoreRule = gc_engine.ignoreRule;
    exports.resetIgnoreRules = gc_engine.resetIgnoreRules;
    exports.reactivateRule = gc_engine.reactivateRule;
    exports.listRules = gc_engine.listRules;
    exports._getRules = gc_engine._getRules;
    exports.load = gc_engine.load;
    exports.getDictionary = gc_engine.getDictionary;
    exports.setOption = gc_engine.setOption;
    exports.setOptions = gc_engine.setOptions;
    exports.getOptions = gc_engine.getOptions;
    exports.getDefaultOptions = gc_engine.getDefaultOptions;
    exports.resetOptions = gc_engine.resetOptions;




}

Modified gc_core/js/lang_core/gc_options.js from [6e45d077d4] to [ba36100a98].

1
2
3
4


5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22

23
24
25

26
27
// Options for Grammalecte

${map}



function getOptions (sContext="JavaScript") {
    if (dOpt.hasOwnProperty(sContext)) {
        return dOpt[sContext];
    }
    return dOpt["JavaScript"];
}

const lStructOpt = ${lStructOpt};

const dOpt = {
    "JavaScript": new Map (${dOptJavaScript}),
    "Firefox": new Map (${dOptFirefox}),
    "Thunderbird": new Map (${dOptThunderbird}),
}

const dOptLabel = ${dOptLabel};



if (typeof(exports) !== 'undefined') {
	exports.getOptions = getOptions;
	exports.lStructOpt = lStructOpt;

	exports.dOptLabel = dOptLabel;
}




>
>
|
|
|
|
|
|

|

|
|
|
|
|

|
|

>

|
|
>
|

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
// Options for Grammalecte

${map}


var gc_options = {
    getOptions: function (sContext="JavaScript") {
        if (this.dOpt.hasOwnProperty(sContext)) {
            return this.dOpt[sContext];
        }
        return this.dOpt["JavaScript"];
    },

    lStructOpt: ${lStructOpt},

    dOpt: {
        "JavaScript": new Map (${dOptJavaScript}),
        "Firefox": new Map (${dOptFirefox}),
        "Thunderbird": new Map (${dOptThunderbird}),
    },

    dOptLabel: ${dOptLabel}
}


if (typeof(exports) !== 'undefined') {
	exports.getOptions = gc_options.getOptions;
	exports.lStructOpt = gc_options.lStructOpt;
    exports.dOpt = gc_options.dOpt;
	exports.dOptLabel = gc_options.dOptLabel;
}

Modified gc_core/js/lang_core/gc_rules.js from [03bc540fb7] to [02fc1d6f94].

1
2
3
4
5
6

7
8
9

10
11
12
13
14
15
// Grammar checker rules
"use strict";

${string}
${regex}


const lParagraphRules = ${paragraph_rules_JS};

const lSentenceRules = ${sentence_rules_JS};



if (typeof(exports) !== 'undefined') {
	exports.lParagraphRules = lParagraphRules;
	exports.lSentenceRules = lSentenceRules;
}






>
|

|
>



|
|

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
// Grammar checker rules
"use strict";

${string}
${regex}

var gc_rules = {
    lParagraphRules: ${paragraph_rules_JS},

    lSentenceRules: ${sentence_rules_JS}
}


if (typeof(exports) !== 'undefined') {
    exports.lParagraphRules = gc_rules.lParagraphRules;
    exports.lSentenceRules = gc_rules.lSentenceRules;
}

Modified gc_core/js/str_transform.js from [0fafeda9a5] to [3f33d76266].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34


35
36
37
38
39
40
41
42

43
44
45
46
47
48
49
50
51
52
53
54

55
56
57
58
59
60
//// STRING TRANSFORMATION

var dSimilarChars = new Map ([
    ["a", "aàâáä"],
    ["à", "aàâáä"],
    ["â", "aàâáä"],
    ["á", "aàâáä"],
    ["ä", "aàâáä"],
    ["c", "cç"],
    ["ç", "cç"],
    ["e", "eéêèë"],
    ["é", "eéêèë"],
    ["ê", "eéêèë"],
    ["è", "eéêèë"],
    ["ë", "eéêèë"],
    ["i", "iîïíì"],
    ["î", "iîïíì"],
    ["ï", "iîïíì"],
    ["í", "iîïíì"],
    ["ì", "iîïíì"],
    ["o", "oôóòö"],
    ["ô", "oôóòö"],
    ["ó", "oôóòö"],
    ["ò", "oôóòö"],
    ["ö", "oôóòö"],
    ["u", "uûùüú"],
    ["û", "uûùüú"],
    ["ù", "uûùüú"],
    ["ü", "uûùüú"],
    ["ú", "uûùüú"]
]);

// Note: 48 is the ASCII code for "0"



// Suffix only
function getStemFromSuffixCode (sFlex, sSfxCode) {
    if (sSfxCode == "0") {
        return sFlex;
    }
    return sSfxCode[0] == '0' ? sFlex + sSfxCode.slice(1) : sFlex.slice(0, -(sSfxCode.charCodeAt(0)-48)) + sSfxCode.slice(1);
}


// Prefix and suffix
function getStemFromAffixCode (sFlex, sAffCode) {
    if (sAffCode == "0") {
        return sFlex;
    }
    if (!sAffCode.includes("/")) {
        return "# error #";
    }
    var [sPfxCode, sSfxCode] = sAffCode.split('/');
    sFlex = sPfxCode.slice(1) + sFlex.slice(sPfxCode.charCodeAt(0)-48);
    return sSfxCode[0] == '0' ? sFlex + sSfxCode.slice(1) : sFlex.slice(0, -(sSfxCode.charCodeAt(0)-48)) + sSfxCode.slice(1);
}



if (typeof(exports) !== 'undefined') {
    exports.getStemFromSuffixCode = getStemFromSuffixCode;
    exports.getStemFromAffixCode = getStemFromAffixCode;
}


<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<


>
>
|
<
|
|
|
|
|
|
>
|
<
|
|
|
|
|
|
|
|
|
|
>



|
|

1
2






























3
4
5
6
7

8
9
10
11
12
13
14
15

16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
//// STRING TRANSFORMATION































// Note: 48 is the ASCII code for "0"

var str_transform = {
    getStemFromSuffixCode: function (sFlex, sSfxCode) {
        // Suffix only

        if (sSfxCode == "0") {
            return sFlex;
        }
        return sSfxCode[0] == '0' ? sFlex + sSfxCode.slice(1) : sFlex.slice(0, -(sSfxCode.charCodeAt(0)-48)) + sSfxCode.slice(1);
    },
    
    getStemFromAffixCode: function (sFlex, sAffCode) {
        // Prefix and suffix

        if (sAffCode == "0") {
            return sFlex;
        }
        if (!sAffCode.includes("/")) {
            return "# error #";
        }
        let [sPfxCode, sSfxCode] = sAffCode.split('/');
        sFlex = sPfxCode.slice(1) + sFlex.slice(sPfxCode.charCodeAt(0)-48);
        return sSfxCode[0] == '0' ? sFlex + sSfxCode.slice(1) : sFlex.slice(0, -(sSfxCode.charCodeAt(0)-48)) + sSfxCode.slice(1);
    }
};


if (typeof(exports) !== 'undefined') {
    exports.getStemFromSuffixCode = str_transform.getStemFromSuffixCode;
    exports.getStemFromAffixCode = str_transform.getStemFromAffixCode;
}

Modified gc_core/js/tests.js from [f2f737b523] to [abe05a3485].

1
2
3
4
5

6

7
8
9
10
11
12

13
14

15
16
17

18
19
20
21
22
23
24
25
26
27
28
// JavaScript

"use strict";



const helpers = require("resource://grammalecte/helpers.js");



class TestGrammarChecking {

    constructor (gce) {
        this.gce = gce;

        this._aRuleTested = new Set();
    };


    * testParse (bDebug=false) {
        const t0 = Date.now();

        const aData = JSON.parse(helpers.loadFile("resource://grammalecte/"+this.gce.lang+"/tests_data.json")).aData;
        //const aData = require("resource://grammalecte/"+this.gce.lang+"/tests_data.js").aData;
        let nInvalid = 0
        let nTotal = 0
        let sErrorText;
        let sSugg;
        let sExpectedErrors;
        let sTextToCheck;
        let sFoundErrors;
        let sListErr;
        let sLineNum;





>
|
>




|

>

<
>



>
|
<
|
|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

17
18
19
20
21
22

23
24
25
26
27
28
29
30
31
// JavaScript

"use strict";


if (typeof(require) !== 'undefined') {
    var helpers = require("resource://grammalecte/helpers.js");
}


class TestGrammarChecking {

    constructor (gce, spfTests="") {
        this.gce = gce;
        this.spfTests = spfTests;
        this._aRuleTested = new Set();

    }

    * testParse (bDebug=false) {
        const t0 = Date.now();
        let sURL = (this.spfTests !== "") ? this.spfTests : "resource://grammalecte/"+this.gce.lang+"/tests_data.json";
        const aData = JSON.parse(helpers.loadFile(sURL)).aData;

        let nInvalid = 0;
        let nTotal = 0;
        let sErrorText;
        let sSugg;
        let sExpectedErrors;
        let sTextToCheck;
        let sFoundErrors;
        let sListErr;
        let sLineNum;
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95

96
97
98
99
100
101
102
                    if (sExpectedErrors !== sFoundErrors) {
                        yield "\n" + i.toString() +
                              "\n# Line num: " + sLineNum +
                              "\n> to check: " + sTextToCheck +
                              "\n  expected: " + sExpectedErrors +
                              "\n  found:    " + sFoundErrors +
                              "\n  errors:   \n" + sListErr;
                        nInvalid = nInvalid + 1
                    }
                    nTotal = nTotal + 1;
                }
                i = i + 1;
                if (i % 1000 === 0) {
                    yield i.toString();
                }
            }
            bShowUntested = true;
        }
        catch (e) {
            helpers.logerror(e);
        }

        if (bShowUntested) {
            i = 0;
            for (let [sOpt, sLineId, sRuleId] of gce.listRules()) {
                if (!this._aRuleTested.has(sLineId) && !/^[0-9]+[sp]$|^[pd]_/.test(sRuleId)) {
                    sUntestedRules += sRuleId + ", ";
                    i += 1;
                }
            }
            if (i > 0) {
                yield sUntestedRules + "\n[" + i.toString() + " untested rules]";
            }
        }

        const t1 = Date.now();
        yield "Tests parse finished in " + ((t1-t0)/1000).toString()
            + " s\nTotal errors: " + nInvalid.toString() + " / " + nTotal.toString();
    };


    _getExpectedErrors (sLine) {
        try {
            let sRes = " ".repeat(sLine.length);
            let z = /\{\{.+?\}\}/g;
            let m;
            let i = 0;







|
















|













<
>







60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97

98
99
100
101
102
103
104
105
                    if (sExpectedErrors !== sFoundErrors) {
                        yield "\n" + i.toString() +
                              "\n# Line num: " + sLineNum +
                              "\n> to check: " + sTextToCheck +
                              "\n  expected: " + sExpectedErrors +
                              "\n  found:    " + sFoundErrors +
                              "\n  errors:   \n" + sListErr;
                        nInvalid = nInvalid + 1;
                    }
                    nTotal = nTotal + 1;
                }
                i = i + 1;
                if (i % 1000 === 0) {
                    yield i.toString();
                }
            }
            bShowUntested = true;
        }
        catch (e) {
            helpers.logerror(e);
        }

        if (bShowUntested) {
            i = 0;
            for (let [sOpt, sLineId, sRuleId] of this.gce.listRules()) {
                if (!this._aRuleTested.has(sLineId) && !/^[0-9]+[sp]$|^[pd]_/.test(sRuleId)) {
                    sUntestedRules += sRuleId + ", ";
                    i += 1;
                }
            }
            if (i > 0) {
                yield sUntestedRules + "\n[" + i.toString() + " untested rules]";
            }
        }

        const t1 = Date.now();
        yield "Tests parse finished in " + ((t1-t0)/1000).toString()
            + " s\nTotal errors: " + nInvalid.toString() + " / " + nTotal.toString();

    }

    _getExpectedErrors (sLine) {
        try {
            let sRes = " ".repeat(sLine.length);
            let z = /\{\{.+?\}\}/g;
            let m;
            let i = 0;
114
115
116
117
118
119
120
121

122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146

147
148
149
150
151
152
153
            }
            return sRes;
        }
        catch (e) {
            helpers.logerror(e);
        }
        return " ".repeat(sLine.length);
    };


    _getFoundErrors (sLine, bDebug, sOption) {
        try {
            let aErrs = [];
            if (sOption) {
                gce.setOption(sOption, true);
                aErrs = this.gce.parse(sLine, "FR", bDebug);
                gce.setOption(sOption, false);
            } else {
                aErrs = this.gce.parse(sLine, "FR", bDebug);
            }
            let sRes = " ".repeat(sLine.length);
            let sListErr = "";
            for (let dErr of aErrs) {
                sRes = sRes.slice(0, dErr["nStart"]) + "~".repeat(dErr["nEnd"] - dErr["nStart"]) + sRes.slice(dErr["nEnd"]);
                sListErr += "    * {" + dErr['sLineId'] + " / " + dErr['sRuleId'] + "}  at  " + dErr['nStart'] + ":" + dErr['nEnd'] + "\n";
                this._aRuleTested.add(dErr["sLineId"]);
            }
            return [sRes, sListErr];
        }
        catch (e) {
            helpers.logerror(e);
        }
        return [" ".repeat(sLine.length), ""];
    };


}


if (typeof(exports) !== 'undefined') {
    exports.TestGrammarChecking = TestGrammarChecking;
}







<
>





|

|
















<
>







117
118
119
120
121
122
123

124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148

149
150
151
152
153
154
155
156
            }
            return sRes;
        }
        catch (e) {
            helpers.logerror(e);
        }
        return " ".repeat(sLine.length);

    }

    _getFoundErrors (sLine, bDebug, sOption) {
        try {
            let aErrs = [];
            if (sOption) {
                this.gce.setOption(sOption, true);
                aErrs = this.gce.parse(sLine, "FR", bDebug);
                this.gce.setOption(sOption, false);
            } else {
                aErrs = this.gce.parse(sLine, "FR", bDebug);
            }
            let sRes = " ".repeat(sLine.length);
            let sListErr = "";
            for (let dErr of aErrs) {
                sRes = sRes.slice(0, dErr["nStart"]) + "~".repeat(dErr["nEnd"] - dErr["nStart"]) + sRes.slice(dErr["nEnd"]);
                sListErr += "    * {" + dErr['sLineId'] + " / " + dErr['sRuleId'] + "}  at  " + dErr['nStart'] + ":" + dErr['nEnd'] + "\n";
                this._aRuleTested.add(dErr["sLineId"]);
            }
            return [sRes, sListErr];
        }
        catch (e) {
            helpers.logerror(e);
        }
        return [" ".repeat(sLine.length), ""];

    }

}


if (typeof(exports) !== 'undefined') {
    exports.TestGrammarChecking = TestGrammarChecking;
}

Modified gc_core/js/text.js from [beffb97d58] to [46a1749c2b].

1
2
3
4


5
6
7


8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57

58
59
60
61
62
63
64
// JavaScript

"use strict";



const helpers = require("resource://grammalecte/helpers.js");




function* getParagraph (sText) {
    // generator: returns paragraphs of text
    let iStart = 0;
    let iEnd = 0;
    sText = sText.replace("\r", "");
    while ((iEnd = sText.indexOf("\n", iStart)) !== -1) {
        yield sText.slice(iStart, iEnd);
        iStart = iEnd + 1;
    }
    yield sText.slice(iStart);
}

function* wrap (sText, nWidth=80) {
    // generator: returns text line by line
    while (sText) {
        if (sText.length >= nWidth) {
            let nEnd = sText.lastIndexOf(" ", nWidth) + 1;
            if (nEnd > 0) {
                yield sText.slice(0, nEnd);
                sText = sText.slice(nEnd);
            } else {
                yield sText.slice(0, nWidth);
                sText = sText.slice(nWidth);
            }
        } else {
            break;
        }
    }
    yield sText;
}

function getReadableError (oErr) {
    // Returns an error oErr as a readable error
    try {
        let sResult = "\n* " + oErr['nStart'] + ":" + oErr['nEnd'] 
                    + "  # " + oErr['sLineId'] + "  # " + oErr['sRuleId'] + ":\n";
        sResult += "  " + oErr["sMessage"];
        if (oErr["aSuggestions"].length > 0) {
            sResult += "\n  > Suggestions : " + oErr["aSuggestions"].join(" | ");
        }
        if (oErr["URL"] !== "") {
            sResult += "\n  > URL: " + oErr["URL"];
        }
        return sResult;
    }
    catch (e) {
        helpers.logerror(e);
        return "\n# Error. Data: " + oErr.toString();
    }
}



if (typeof(exports) !== 'undefined') {
    exports.getParagraph = getParagraph;
    exports.wrap = wrap;
    exports.getReadableError = getReadableError;
}




>
>
|
|

>
>
|
|
|
|
|
|
|
|
|
|
|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>



|
|
|

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
// JavaScript

"use strict";


if (typeof(exports) !== 'undefined') {
    var helpers = require("resource://grammalecte/helpers.js");
}


var text = {
    getParagraph: function* (sText) {
        // generator: returns paragraphs of text
        let iStart = 0;
        let iEnd = 0;
        sText = sText.replace("\r", "");
        while ((iEnd = sText.indexOf("\n", iStart)) !== -1) {
            yield sText.slice(iStart, iEnd);
            iStart = iEnd + 1;
        }
        yield sText.slice(iStart);
    },

    wrap: function* (sText, nWidth=80) {
        // generator: returns text line by line
        while (sText) {
            if (sText.length >= nWidth) {
                let nEnd = sText.lastIndexOf(" ", nWidth) + 1;
                if (nEnd > 0) {
                    yield sText.slice(0, nEnd);
                    sText = sText.slice(nEnd);
                } else {
                    yield sText.slice(0, nWidth);
                    sText = sText.slice(nWidth);
                }
            } else {
                break;
            }
        }
        yield sText;
    },

    getReadableError: function (oErr) {
        // Returns an error oErr as a readable error
        try {
            let sResult = "\n* " + oErr['nStart'] + ":" + oErr['nEnd'] 
                        + "  # " + oErr['sLineId'] + "  # " + oErr['sRuleId'] + ":\n";
            sResult += "  " + oErr["sMessage"];
            if (oErr["aSuggestions"].length > 0) {
                sResult += "\n  > Suggestions : " + oErr["aSuggestions"].join(" | ");
            }
            if (oErr["URL"] !== "") {
                sResult += "\n  > URL: " + oErr["URL"];
            }
            return sResult;
        }
        catch (e) {
            helpers.logerror(e);
            return "\n# Error. Data: " + oErr.toString();
        }
    }
};


if (typeof(exports) !== 'undefined') {
    exports.getParagraph = text.getParagraph;
    exports.wrap = text.wrap;
    exports.getReadableError = text.getReadableError;
}

Modified gc_core/js/tokenizer.js from [a6594366c3] to [fcd058bf6a].

1
2
3
4
5


6
7


8
9
10
11
12
13
14
15
// JavaScript
// Very simple tokenizer

"use strict";



const helpers = require("resource://grammalecte/helpers.js");



const aPatterns = {
    // All regexps must start with ^.
    "default":
        [
            [/^[   \t]+/, 'SPACE'],
            [/^[,.;:!?…«»“”‘’"(){}\[\]/·–—]+/, 'SEPARATOR'],
            [/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_]+[@.])[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
            [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'],





>
>
|
|
>
>
|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
// JavaScript
// Very simple tokenizer

"use strict";


if (typeof(exports) !== 'undefined') {
    var helpers = require("resource://grammalecte/helpers.js");
}


const aTkzPatterns = {
    // All regexps must start with ^.
    "default":
        [
            [/^[   \t]+/, 'SPACE'],
            [/^[,.;:!?…«»“”‘’"(){}\[\]/·–—]+/, 'SEPARATOR'],
            [/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_]+[@.])[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
            [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'],
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49

50
51
52
53
54
55
56
            [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
            [/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`]/i, 'ELPFX'],
            [/^\d\d?[hm]\d\d\b/, 'HOUR'],
            [/^\d+(?:er|nd|e|de|ième|ème|eme)s?\b/, 'ORDINAL'],
            [/^-?\d+(?:[.,]\d+|)/, 'NUM'],
            [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+)*/, 'WORD']
        ]
}


class Tokenizer {

    constructor (sLang) {
        this.sLang = sLang;
        if (!aPatterns.hasOwnProperty(sLang)) {
            this.sLang = "default";
        }
        this.aRules = aPatterns[this.sLang];
    };


    * genTokens (sText) {
        let m;
        let i = 0;
        while (sText) {
            let nCut = 1;
            for (let [zRegex, sType] of this.aRules) {







|






|


|
<
>







35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

53
54
55
56
57
58
59
60
            [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
            [/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`]/i, 'ELPFX'],
            [/^\d\d?[hm]\d\d\b/, 'HOUR'],
            [/^\d+(?:er|nd|e|de|ième|ème|eme)s?\b/, 'ORDINAL'],
            [/^-?\d+(?:[.,]\d+|)/, 'NUM'],
            [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+)*/, 'WORD']
        ]
};


class Tokenizer {

    constructor (sLang) {
        this.sLang = sLang;
        if (!aTkzPatterns.hasOwnProperty(sLang)) {
            this.sLang = "default";
        }
        this.aRules = aTkzPatterns[this.sLang];

    }

    * genTokens (sText) {
        let m;
        let i = 0;
        while (sText) {
            let nCut = 1;
            for (let [zRegex, sType] of this.aRules) {
70
71
72
73
74
75
76
77

78
79
80
81
82
83
84
                catch (e) {
                    helpers.logerror(e);
                }
            }
            i += nCut;
            sText = sText.slice(nCut);
        }
    };


    getSpellingErrors (sText, oDict) {
        let aSpellErr = [];
        for (let oToken of this.genTokens(sText)) {
            if (oToken.sType === 'WORD' && !oDict.isValidToken(oToken.sValue)) {
                aSpellErr.push(oToken);
            }







<
>







74
75
76
77
78
79
80

81
82
83
84
85
86
87
88
                catch (e) {
                    helpers.logerror(e);
                }
            }
            i += nCut;
            sText = sText.slice(nCut);
        }

    }

    getSpellingErrors (sText, oDict) {
        let aSpellErr = [];
        for (let oToken of this.genTokens(sText)) {
            if (oToken.sType === 'WORD' && !oDict.isValidToken(oToken.sValue)) {
                aSpellErr.push(oToken);
            }