Grammalecte  Diff

Differences From Artifact [7ee1350cd7]:

To Artifact [ab7d9a98c9]:


1
2
3
4
5
6
7
8
9
10
11
12
13

14
15
16
17
18
19
20
21
22

23
24
25
26
27
28
29
30
31
32
1
2
3
4
5
6
7
8
9
10
11
12

13
14
15
16
17

18



19



20
21
22
23
24
25
26












-
+




-

-
-
-
+
-
-
-







// Grammar checker engine
/*jslint esversion: 6*/
/*global console,require,exports*/

"use strict";

${string}
${regex}
${map}


if (typeof(require) !== 'undefined') {
    var helpers = require("resource://grammalecte/graphspell/helpers.js");
    //var helpers = require("resource://grammalecte/graphspell/helpers.js");
    var gc_options = require("resource://grammalecte/${lang}/gc_options.js");
    var gc_rules = require("resource://grammalecte/${lang}/gc_rules.js");
    var cregex = require("resource://grammalecte/${lang}/cregex.js");
    var text = require("resource://grammalecte/text.js");
    var echo = helpers.echo;
}
else if (typeof(console) !== "undefined") {
    var echo = function (o) { console.log(o); return true; };
}

else {
    var echo = function () { return true; }
}

function capitalizeArray (aArray) {
    // can’t map on user defined function??
    let aNew = [];
    for (let i = 0; i < aArray.length; i = i + 1) {
        aNew[i] = aArray[i].gl_toCapitalize();
    }
68
69
70
71
72
73
74
75

76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102

103
104
105
106
107
108
109
62
63
64
65
66
67
68

69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89

90
91
92
93
94

95
96
97
98
99
100
101
102







-
+




















-





-
+







        try {
            [sNew, dErrors] = this._proofread(sText, sAlt, 0, true, dDA, dPriority, sCountry, bDebug, bContext);
            if (sNew) {
                sText = sNew;
            }
        }
        catch (e) {
            helpers.logerror(e);
            console.error(e);
        }

        // cleanup
        if (sText.includes(" ")) {
            sText = sText.replace(/ /g, ' '); // nbsp
        }
        if (sText.includes(" ")) {
            sText = sText.replace(/ /g, ' '); // snbsp
        }
        if (sText.includes("'")) {
            sText = sText.replace(/'/g, "’");
        }
        if (sText.includes("‑")) {
            sText = sText.replace(/‑/g, "-"); // nobreakdash
        }

        // parse sentence
        for (let [iStart, iEnd] of this._getSentenceBoundaries(sText)) {
            if (4 < (iEnd - iStart) < 2000) {
                dDA.clear();
                //helpers.echo(sText.slice(iStart, iEnd));
                try {
                    [, errs] = this._proofread(sText.slice(iStart, iEnd), sAlt.slice(iStart, iEnd), iStart, false, dDA, dPriority, sCountry, bDebug, bContext);
                    dErrors.gl_update(errs);
                }
                catch (e) {
                    helpers.logerror(e);
                    console.error(e);
                }
            }
        }
        return Array.from(dErrors.values());
    },

    _zEndOfSentence: new RegExp ('([.?!:;…][ .?!… »”")]*|.$)', "g"),
131
132
133
134
135
136
137
138

139
140
141
142
143

144
145
146
147
148
149

150
151
152
153
154
155
156
157
158

159
160
161
162

163
164
165
166
167

168
169
170

171
172
173
174
175
176
177

178
179
180
181
182
183
184
185
186
187
188



189
190
191
192
193
194
195
124
125
126
127
128
129
130

131
132
133
134
135

136
137
138
139
140
141

142
143
144
145
146
147
148
149
150

151
152
153
154

155
156
157
158
159

160
161
162

163
164
165
166
167
168
169

170
171
172
173
174
175
176
177
178



179
180
181
182
183
184
185
186
187
188







-
+




-
+





-
+








-
+



-
+




-
+


-
+






-
+








-
-
-
+
+
+







        for (let [sOption, lRuleGroup] of this._getRules(bParagraph)) {
            if (!sOption || option(sOption)) {
                for (let [zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions, lGroups, lNegLookBefore] of lRuleGroup) {
                    if (!_aIgnoredRules.has(sRuleId)) {
                        while ((m = zRegex.gl_exec2(s, lGroups, lNegLookBefore)) !== null) {
                            bCondMemo = null;
                            /*if (bDebug) {
                                helpers.echo(">>>> Rule # " + sLineId + " - Text: " + s + " opt: "+ sOption);
                                console.log(">>>> Rule # " + sLineId + " - Text: " + s + " opt: "+ sOption);
                            }*/
                            for (let [sFuncCond, cActionType, sWhat, ...eAct] of lActions) {
                            // action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ]
                                try {
                                    //helpers.echo(oEvalFunc[sFuncCond]);
                                    //console.log(oEvalFunc[sFuncCond]);
                                    bCondMemo = (!sFuncCond || oEvalFunc[sFuncCond](s, sx, m, dDA, sCountry, bCondMemo));
                                    if (bCondMemo) {
                                        switch (cActionType) {
                                            case "-":
                                                // grammar error
                                                //helpers.echo("-> error detected in " + sLineId + "\nzRegex: " + zRegex.source);
                                                //console.log("-> error detected in " + sLineId + "\nzRegex: " + zRegex.source);
                                                nErrorStart = nOffset + m.start[eAct[0]];
                                                if (!dErrs.has(nErrorStart) || nPriority > dPriority.get(nErrorStart)) {
                                                    dErrs.set(nErrorStart, this._createError(s, sx, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bIdRule, sOption, bContext));
                                                    dPriority.set(nErrorStart, nPriority);
                                                }
                                                break;
                                            case "~":
                                                // text processor
                                                //helpers.echo("-> text processor by " + sLineId + "\nzRegex: " + zRegex.source);
                                                //console.log("-> text processor by " + sLineId + "\nzRegex: " + zRegex.source);
                                                s = this._rewrite(s, sWhat, eAct[0], m, bUppercase);
                                                bChange = true;
                                                if (bDebug) {
                                                    helpers.echo("~ " + s + "  -- " + m[eAct[0]] + "  # " + sLineId);
                                                    console.log("~ " + s + "  -- " + m[eAct[0]] + "  # " + sLineId);
                                                }
                                                break;
                                            case "=":
                                                // disambiguation
                                                //helpers.echo("-> disambiguation by " + sLineId + "\nzRegex: " + zRegex.source);
                                                //console.log("-> disambiguation by " + sLineId + "\nzRegex: " + zRegex.source);
                                                oEvalFunc[sWhat](s, m, dDA);
                                                if (bDebug) {
                                                    helpers.echo("= " + m[0] + "  # " + sLineId + "\nDA: " + dDA.gl_toString());
                                                    console.log("= " + m[0] + "  # " + sLineId + "\nDA: " + dDA.gl_toString());
                                                }
                                                break;
                                            case ">":
                                                // we do nothing, this test is just a condition to apply all following actions
                                                break;
                                            default:
                                                helpers.echo("# error: unknown action at " + sLineId);
                                                console.log("# error: unknown action at " + sLineId);
                                        }
                                    } else {
                                        if (cActionType == ">") {
                                            break;
                                        }
                                    }
                                }
                                catch (e) {
                                    helpers.echo(s);
                                    helpers.echo("# line id: " + sLineId + "\n# rule id: " + sRuleId);
                                    helpers.logerror(e);
                                    console.log(s);
                                    console.log("# line id: " + sLineId + "\n# rule id: " + sRuleId);
                                    console.error(e);
                                }
                            }
                        }
                    }
                }
            }
        }
265
266
267
268
269
270
271
272

273
274
275
276
277
278
279
258
259
260
261
262
263
264

265
266
267
268
269
270
271
272







-
+







            if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) {
                sNew = sNew.gl_toCapitalize();
            }
        } else {
            sNew = sRepl.gl_expand(m);
            sNew = sNew + " ".repeat(ln-sNew.length);
        }
        //helpers.echo("\n"+s+"\nstart: "+m.start[iGroup]+" end:"+m.end[iGroup])
        //console.log("\n"+s+"\nstart: "+m.start[iGroup]+" end:"+m.end[iGroup])
        return s.slice(0, m.start[iGroup]) + sNew + s.slice(m.end[iGroup]);
    },

    // Actions on rules

    ignoreRule: function (sRuleId) {
        _aIgnoredRules.add(sRuleId);
302
303
304
305
306
307
308
309

310
311
312
313
314
315
316
295
296
297
298
299
300
301

302
303
304
305
306
307
308
309







-
+







                    if (!sFilter || sRuleId.test(sFilter)) {
                        yield [sOption, sLineId, sRuleId];
                    }
                }
            }
        }
        catch (e) {
            helpers.logerror(e);
            console.error(e);
        }
    },

    _getRules: function (bParagraph) {
        if (!bParagraph) {
            return gc_rules.lSentenceRules;
        }
327
328
329
330
331
332
333
334

335
336
337
338
339
340
341
320
321
322
323
324
325
326

327
328
329
330
331
332
333
334







-
+







            } else {
                _oSpellChecker = new SpellChecker("${lang}", sPath, "${dic_main_filename_js}", "${dic_extended_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}");
            }
            _sAppContext = sContext;
            _dOptions = gc_options.getOptions(sContext).gl_shallowCopy();     // duplication necessary, to be able to reset to default
        }
        catch (e) {
            helpers.logerror(e);
            console.error(e);
        }
    },

    getSpellChecker: function () {
        return _oSpellChecker;
    },

371
372
373
374
375
376
377
378

379
380
381
382

383
384
385
386

387
388

389
390
391
392
393
394

395
396
397
398
399
400
401
402

403
404
405

406
407
408
409
410

411
412
413
414

415
416
417
418
419
420
421
422
423
424

425
426
427

428
429
430
431
432

433
434
435
436

437
438
439
440
441
442
443
364
365
366
367
368
369
370

371
372
373
374

375
376
377
378

379
380

381
382
383
384
385
386

387
388
389
390
391
392
393
394

395
396
397

398
399
400
401
402

403
404
405
406

407
408
409
410
411
412
413
414
415
416

417
418
419

420
421
422
423
424

425
426
427
428

429
430
431
432
433
434
435
436







-
+



-
+



-
+

-
+





-
+







-
+


-
+




-
+



-
+









-
+


-
+




-
+



-
+







    // return true if option sOpt is active
    return _dOptions.get(sOpt);
}

function displayInfo (dDA, aWord) {
    // for debugging: info of word
    if (!aWord) {
        helpers.echo("> nothing to find");
        console.log("> nothing to find");
        return true;
    }
    if (!_dAnalyses.has(aWord[1]) && !_storeMorphFromFSA(aWord[1])) {
        helpers.echo("> not in FSA");
        console.log("> not in FSA");
        return true;
    }
    if (dDA.has(aWord[0])) {
        helpers.echo("DA: " + dDA.get(aWord[0]));
        console.log("DA: " + dDA.get(aWord[0]));
    }
    helpers.echo("FSA: " + _dAnalyses.get(aWord[1]));
    console.log("FSA: " + _dAnalyses.get(aWord[1]));
    return true;
}

function _storeMorphFromFSA (sWord) {
    // retrieves morphologies list from _oSpellChecker -> _dAnalyses
    //helpers.echo("register: "+sWord + " " + _oSpellChecker.getMorph(sWord).toString())
    //console.log("register: "+sWord + " " + _oSpellChecker.getMorph(sWord).toString())
    _dAnalyses.set(sWord, _oSpellChecker.getMorph(sWord));
    return !!_dAnalyses.get(sWord);
}

function morph (dDA, aWord, sPattern, bStrict=true, bNoWord=false) {
    // analyse a tuple (position, word), return true if sPattern in morphologies (disambiguation on)
    if (!aWord) {
        //helpers.echo("morph: noword, returns " + bNoWord);
        //console.log("morph: noword, returns " + bNoWord);
        return bNoWord;
    }
    //helpers.echo("aWord: "+aWord.toString());
    //console.log("aWord: "+aWord.toString());
    if (!_dAnalyses.has(aWord[1]) && !_storeMorphFromFSA(aWord[1])) {
        return false;
    }
    let lMorph = dDA.has(aWord[0]) ? dDA.get(aWord[0]) : _dAnalyses.get(aWord[1]);
    //helpers.echo("lMorph: "+lMorph.toString());
    //console.log("lMorph: "+lMorph.toString());
    if (lMorph.length === 0) {
        return false;
    }
    //helpers.echo("***");
    //console.log("***");
    if (bStrict) {
        return lMorph.every(s  =>  (s.search(sPattern) !== -1));
    }
    return lMorph.some(s  =>  (s.search(sPattern) !== -1));
}

function morphex (dDA, aWord, sPattern, sNegPattern, bNoWord=false) {
    // analyse a tuple (position, word), returns true if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)
    if (!aWord) {
        //helpers.echo("morph: noword, returns " + bNoWord);
        //console.log("morph: noword, returns " + bNoWord);
        return bNoWord;
    }
    //helpers.echo("aWord: "+aWord.toString());
    //console.log("aWord: "+aWord.toString());
    if (!_dAnalyses.has(aWord[1]) && !_storeMorphFromFSA(aWord[1])) {
        return false;
    }
    let lMorph = dDA.has(aWord[0]) ? dDA.get(aWord[0]) : _dAnalyses.get(aWord[1]);
    //helpers.echo("lMorph: "+lMorph.toString());
    //console.log("lMorph: "+lMorph.toString());
    if (lMorph.length === 0) {
        return false;
    }
    //helpers.echo("***");
    //console.log("***");
    // check negative condition
    if (lMorph.some(s  =>  (s.search(sNegPattern) !== -1))) {
        return false;
    }
    // search sPattern
    return lMorph.some(s  =>  (s.search(sPattern) !== -1));
}
528
529
530
531
532
533
534
535

536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551

552
553
554
555

556
557
558
559
560
561
562
521
522
523
524
525
526
527

528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543

544
545
546
547

548
549
550
551
552
553
554
555







-
+















-
+



-
+







    try {
        if (zNegPattern && zNegPattern.test(s)) {
            return false;
        }
        return zPattern.test(s);
    }
    catch (e) {
        helpers.logerror(e);
        console.error(e);
    }
    return false;
}

function look_chk1 (dDA, s, nOffset, zPattern, sPatternGroup1, sNegPatternGroup1=null) {
    // returns True if s has pattern zPattern and m.group(1) has pattern sPatternGroup1
    let m = zPattern.gl_exec2(s, null);
    if (!m) {
        return false;
    }
    try {
        let sWord = m[1];
        let nPos = m.start[1] + nOffset;
        if (sNegPatternGroup1) {
            return morphex(dDA, [nPos, sWord], sPatternGroup1, sNegPatternGroup1);
        } 
        }
        return morph(dDA, [nPos, sWord], sPatternGroup1, false);
    }
    catch (e) {
        helpers.logerror(e);
        console.error(e);
        return false;
    }
}


//////// Disambiguator