Grammalecte  Diff

Differences From Artifact [2f4877bef2]:

To Artifact [46602d3f85]:


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

17


18




19
20
21
22
23
24
25
1
2
3
4
5
6
7
8
9
10
11

12
13
14
15
16
17
18
19

20
21
22
23
24
25
26
27
28
29
30











-




+

+
+
-
+
+
+
+







// Grammar checker engine

"use strict";

${string}
${regex}
${map}


if (typeof(require) !== 'undefined') {
    var helpers = require("resource://grammalecte/helpers.js");
    var echo = require("resource://grammalecte/helpers.js").echo;
    var gc_options = require("resource://grammalecte/${lang}/gc_options.js");
    var gc_rules = require("resource://grammalecte/${lang}/gc_rules.js");
    var cregex = require("resource://grammalecte/${lang}/cregex.js");
    var text = require("resource://grammalecte/text.js");
    var echo = helpers.echo;
}
else if (typeof(console) !== "undefined") {
    var echo = function (o) { console.log(o); return true; };

}
else {
    var echo = function () { return true; }
}

function capitalizeArray (aArray) {
    // can’t map on user defined function??
    let aNew = [];
    for (let i = 0; i < aArray.length; i = i + 1) {
        aNew[i] = aArray[i].gl_toCapitalize();
    }
82
83
84
85
86
87
88
89

90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107

108
109
110
111
112
113
114
87
88
89
90
91
92
93

94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111

112
113
114
115
116
117
118
119







-
+

















-
+







            sText = sText.replace(/‑/g, "-"); // nobreakdash
        }

        // parse sentence
        for (let [iStart, iEnd] of this._getSentenceBoundaries(sText)) {
            if (4 < (iEnd - iStart) < 2000) {
                dDA.clear();
                //echo(sText.slice(iStart, iEnd));
                //helpers.echo(sText.slice(iStart, iEnd));
                try {
                    [, errs] = this._proofread(sText.slice(iStart, iEnd), sAlt.slice(iStart, iEnd), iStart, false, dDA, dPriority, sCountry, bDebug, bContext);
                    dErrors.gl_update(errs);
                }
                catch (e) {
                    helpers.logerror(e);
                }
            }
        }
        return Array.from(dErrors.values());
    },

    _zEndOfSentence: new RegExp ('([.?!:;…][ .?!… »”")]*|.$)', "g"),
    _zBeginOfParagraph: new RegExp ("^[-  –—.,;?!…]*", "ig"),
    _zEndOfParagraph: new RegExp ("[-  .,;?!…–—]*$", "ig"),

    _getSentenceBoundaries: function* (sText) {
        let mBeginOfSentence = this._zBeginOfParagraph.exec(sText)
        let mBeginOfSentence = this._zBeginOfParagraph.exec(sText);
        let iStart = this._zBeginOfParagraph.lastIndex;
        let m;
        while ((m = this._zEndOfSentence.exec(sText)) !== null) {
            yield [iStart, this._zEndOfSentence.lastIndex];
            iStart = this._zEndOfSentence.lastIndex;
        }
    },
124
125
126
127
128
129
130
131

132
133
134
135
136
137


138
139
140
141
142

143
144
145
146
147
148
149
150
151

152
153
154
155

156
157
158
159
160

161
162
163

164
165
166
167
168
169
170

171
172
173
174
175
176
177
178
179
180


181
182
183
184
185
186
187
129
130
131
132
133
134
135

136
137
138
139
140


141
142
143
144
145
146

147
148
149
150
151
152
153
154
155

156
157
158
159

160
161
162
163
164

165
166
167

168
169
170
171
172
173
174

175
176
177
178
179
180
181
182
183


184
185
186
187
188
189
190
191
192







-
+




-
-
+
+




-
+








-
+



-
+




-
+


-
+






-
+








-
-
+
+







        for (let [sOption, lRuleGroup] of this._getRules(bParagraph)) {
            if (!sOption || option(sOption)) {
                for (let [zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions, lGroups, lNegLookBefore] of lRuleGroup) {
                    if (!_aIgnoredRules.has(sRuleId)) {
                        while ((m = zRegex.gl_exec2(s, lGroups, lNegLookBefore)) !== null) {
                            bCondMemo = null;
                            /*if (bDebug) {
                                echo(">>>> Rule # " + sLineId + " - Text: " + s + " opt: "+ sOption);
                                helpers.echo(">>>> Rule # " + sLineId + " - Text: " + s + " opt: "+ sOption);
                            }*/
                            for (let [sFuncCond, cActionType, sWhat, ...eAct] of lActions) {
                            // action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ]
                                try {
                                    //echo(oEvalFunc[sFuncCond]);
                                    bCondMemo = (!sFuncCond || oEvalFunc[sFuncCond](s, sx, m, dDA, sCountry, bCondMemo))
                                    //helpers.echo(oEvalFunc[sFuncCond]);
                                    bCondMemo = (!sFuncCond || oEvalFunc[sFuncCond](s, sx, m, dDA, sCountry, bCondMemo));
                                    if (bCondMemo) {
                                        switch (cActionType) {
                                            case "-":
                                                // grammar error
                                                //echo("-> error detected in " + sLineId + "\nzRegex: " + zRegex.source);
                                                //helpers.echo("-> error detected in " + sLineId + "\nzRegex: " + zRegex.source);
                                                nErrorStart = nOffset + m.start[eAct[0]];
                                                if (!dErrs.has(nErrorStart) || nPriority > dPriority.get(nErrorStart)) {
                                                    dErrs.set(nErrorStart, this._createError(s, sx, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bIdRule, sOption, bContext));
                                                    dPriority.set(nErrorStart, nPriority);
                                                }
                                                break;
                                            case "~":
                                                // text processor
                                                //echo("-> text processor by " + sLineId + "\nzRegex: " + zRegex.source);
                                                //helpers.echo("-> text processor by " + sLineId + "\nzRegex: " + zRegex.source);
                                                s = this._rewrite(s, sWhat, eAct[0], m, bUppercase);
                                                bChange = true;
                                                if (bDebug) {
                                                    echo("~ " + s + "  -- " + m[eAct[0]] + "  # " + sLineId);
                                                    helpers.echo("~ " + s + "  -- " + m[eAct[0]] + "  # " + sLineId);
                                                }
                                                break;
                                            case "=":
                                                // disambiguation
                                                //echo("-> disambiguation by " + sLineId + "\nzRegex: " + zRegex.source);
                                                //helpers.echo("-> disambiguation by " + sLineId + "\nzRegex: " + zRegex.source);
                                                oEvalFunc[sWhat](s, m, dDA);
                                                if (bDebug) {
                                                    echo("= " + m[0] + "  # " + sLineId + "\nDA: " + dDA.gl_toString());
                                                    helpers.echo("= " + m[0] + "  # " + sLineId + "\nDA: " + dDA.gl_toString());
                                                }
                                                break;
                                            case ">":
                                                // we do nothing, this test is just a condition to apply all following actions
                                                break;
                                            default:
                                                echo("# error: unknown action at " + sLineId);
                                                helpers.echo("# error: unknown action at " + sLineId);
                                        }
                                    } else {
                                        if (cActionType == ">") {
                                            break;
                                        }
                                    }
                                }
                                catch (e) {
                                    echo(s);
                                    echo("# line id: " + sLineId + "\n# rule id: " + sRuleId);
                                    helpers.echo(s);
                                    helpers.echo("# line id: " + sLineId + "\n# rule id: " + sRuleId);
                                    helpers.logerror(e);
                                }
                            }
                        }
                    }
                }
            }
219
220
221
222
223
224
225
226

227
228
229
230
231
232
233
224
225
226
227
228
229
230

231
232
233
234
235
236
237
238







-
+







            } else {
                oErr["aSuggestions"] = sRepl.gl_expand(m).split("|");
            }
        }
        // Message
        let sMessage = "";
        if (sMsg.slice(0,1) === "=") {
            sMessage = oEvalFunc[sMsg.slice(1)](s, m)
            sMessage = oEvalFunc[sMsg.slice(1)](s, m);
        } else {
            sMessage = sMsg.gl_expand(m);
        }
        if (bIdRule) {
            sMessage += " ##" + sLineId + " #" + sRuleId;
        }
        oErr["sMessage"] = sMessage;
258
259
260
261
262
263
264
265

266
267
268
269
270
271
272
263
264
265
266
267
268
269

270
271
272
273
274
275
276
277







-
+







            if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) {
                sNew = sNew.gl_toCapitalize();
            }
        } else {
            sNew = sRepl.gl_expand(m);
            sNew = sNew + " ".repeat(ln-sNew.length);
        }
        //echo("\n"+s+"\nstart: "+m.start[iGroup]+" end:"+m.end[iGroup])
        //helpers.echo("\n"+s+"\nstart: "+m.start[iGroup]+" end:"+m.end[iGroup])
        return s.slice(0, m.start[iGroup]) + sNew + s.slice(m.end[iGroup]);
    },

    // Actions on rules

    ignoreRule: function (sRuleId) {
        _aIgnoredRules.add(sRuleId);
351
352
353
354
355
356
357
358

359
360
361
362
363
364
365
366
367
368
369
370
371

372
373
374
375

376
377
378
379

380
381

382
383
384
385
386
387

388
389
390
391
392
393
394
395

396
397
398

399
400
401
402
403

404
405
406
407

408
409
410
411
412
413
414
415
416
417

418
419
420

421
422
423
424
425

426
427
428
429

430
431
432
433
434
435
436
356
357
358
359
360
361
362

363
364
365
366
367
368
369
370
371
372
373
374
375

376
377
378
379

380
381
382
383

384
385

386
387
388
389
390
391

392
393
394
395
396
397
398
399

400
401
402

403
404
405
406
407

408
409
410
411

412
413
414
415
416
417
418
419
420
421

422
423
424

425
426
427
428
429

430
431
432
433

434
435
436
437
438
439
440
441







-
+












-
+



-
+



-
+

-
+





-
+







-
+


-
+




-
+



-
+









-
+


-
+




-
+



-
+







    getDefaultOptions: function () {
        return gc_options.getOptions(_sAppContext).gl_shallowCopy();
    },

    resetOptions: function () {
        _dOptions = gc_options.getOptions(_sAppContext).gl_shallowCopy();
    }
}
};


//////// Common functions

function option (sOpt) {
    // return true if option sOpt is active
    return _dOptions.get(sOpt);
}

function displayInfo (dDA, aWord) {
    // for debugging: info of word
    if (!aWord) {
        echo("> nothing to find");
        helpers.echo("> nothing to find");
        return true;
    }
    if (!_dAnalyses.has(aWord[1]) && !_storeMorphFromFSA(aWord[1])) {
        echo("> not in FSA");
        helpers.echo("> not in FSA");
        return true;
    }
    if (dDA.has(aWord[0])) {
        echo("DA: " + dDA.get(aWord[0]));
        helpers.echo("DA: " + dDA.get(aWord[0]));
    }
    echo("FSA: " + _dAnalyses.get(aWord[1]));
    helpers.echo("FSA: " + _dAnalyses.get(aWord[1]));
    return true;
}

function _storeMorphFromFSA (sWord) {
    // retrieves morphologies list from _oDict -> _dAnalyses
    //echo("register: "+sWord + " " + _oDict.getMorph(sWord).toString())
    //helpers.echo("register: "+sWord + " " + _oDict.getMorph(sWord).toString())
    _dAnalyses.set(sWord, _oDict.getMorph(sWord));
    return !!_dAnalyses.get(sWord);
}

function morph (dDA, aWord, sPattern, bStrict=true, bNoWord=false) {
    // analyse a tuple (position, word), return true if sPattern in morphologies (disambiguation on)
    if (!aWord) {
        //echo("morph: noword, returns " + bNoWord);
        //helpers.echo("morph: noword, returns " + bNoWord);
        return bNoWord;
    }
    //echo("aWord: "+aWord.toString());
    //helpers.echo("aWord: "+aWord.toString());
    if (!_dAnalyses.has(aWord[1]) && !_storeMorphFromFSA(aWord[1])) {
        return false;
    }
    let lMorph = dDA.has(aWord[0]) ? dDA.get(aWord[0]) : _dAnalyses.get(aWord[1]);
    //echo("lMorph: "+lMorph.toString());
    //helpers.echo("lMorph: "+lMorph.toString());
    if (lMorph.length === 0) {
        return false;
    }
    //echo("***");
    //helpers.echo("***");
    if (bStrict) {
        return lMorph.every(s  =>  (s.search(sPattern) !== -1));
    }
    return lMorph.some(s  =>  (s.search(sPattern) !== -1));
}

function morphex (dDA, aWord, sPattern, sNegPattern, bNoWord=false) {
    // analyse a tuple (position, word), returns true if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)
    if (!aWord) {
        //echo("morph: noword, returns " + bNoWord);
        //helpers.echo("morph: noword, returns " + bNoWord);
        return bNoWord;
    }
    //echo("aWord: "+aWord.toString());
    //helpers.echo("aWord: "+aWord.toString());
    if (!_dAnalyses.has(aWord[1]) && !_storeMorphFromFSA(aWord[1])) {
        return false;
    }
    let lMorph = dDA.has(aWord[0]) ? dDA.get(aWord[0]) : _dAnalyses.get(aWord[1]);
    //echo("lMorph: "+lMorph.toString());
    //helpers.echo("lMorph: "+lMorph.toString());
    if (lMorph.length === 0) {
        return false;
    }
    //echo("***");
    //helpers.echo("***");
    // check negative condition
    if (lMorph.some(s  =>  (s.search(sNegPattern) !== -1))) {
        return false;
    }
    // search sPattern
    return lMorph.some(s  =>  (s.search(sPattern) !== -1));
}
463
464
465
466
467
468
469
470

471
472
473
474
475
476
477
468
469
470
471
472
473
474

475
476
477
478
479
480
481
482







-
+







    // returns a list of sWord's stems
    if (!sWord) {
        return [];
    }
    if (!_dAnalyses.has(sWord) && !_storeMorphFromFSA(sWord)) {
        return [];
    }
    return [ for (s of _dAnalyses.get(sWord))  s.slice(1, s.indexOf(" ")) ];
    return _dAnalyses.get(sWord).map( s => s.slice(1, s.indexOf(" ")) );
}


//// functions to get text outside pattern scope

// warning: check compile_rules.py to understand how it works

505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
510
511
512
513
514
515
516


517

518
519
520

521
522
523
524
525
526
527







-
-

-



-







    return [iStart + _zNextWord.lastIndex - m[1].length, m[1]];
}

const _zPrevWord = new RegExp ("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_-]*) +$", "i");

function prevword1 (s, iEnd) {
    // get previous word (optimization)
    //echo("prev1, s:"+s);
    //echo("prev1, s.slice(0, iEnd):"+s.slice(0, iEnd));
    let m = _zPrevWord.exec(s.slice(0, iEnd));
    //echo("prev1, m:"+m);
    if (!m) {
        return null;
    }
    //echo("prev1: " + m.index + " " + m[1]);
    return [m.index, m[1]];
}

function look (s, zPattern, zNegPattern=null) {
    // seek zPattern in s (before/after/fulltext), if antipattern zNegPattern not in s
    try {
        if (zNegPattern && zNegPattern.test(s)) {
563
564
565
566
567
568
569
570
571
572
573
574

575
576
577
578
579
580
581
582
564
565
566
567
568
569
570

571
572
573

574

575
576
577
578
579
580
581







-



-
+
-







    }
    if (dDA.has(nPos)) {
        return true;
    }
    if (!_dAnalyses.has(sWord) && !_storeMorphFromFSA(sWord)) {
        return true;
    }
    //echo("morph: "+_dAnalyses.get(sWord).toString());
    if (_dAnalyses.get(sWord).length === 1) {
        return true;
    }
    let lSelect = [ for (sMorph of _dAnalyses.get(sWord))  if (sMorph.search(sPattern) !== -1)  sMorph ];
    let lSelect = _dAnalyses.get(sWord).filter( sMorph => sMorph.search(sPattern) !== -1 );
    //echo("lSelect: "+lSelect.toString());
    if (lSelect.length > 0) {
        if (lSelect.length != _dAnalyses.get(sWord).length) {
            dDA.set(nPos, lSelect);
        }
    } else if (lDefault) {
        dDA.set(nPos, lDefaul);
    }
592
593
594
595
596
597
598
599

600
601
602
603
604
605
606
607
591
592
593
594
595
596
597

598

599
600
601
602
603
604
605







-
+
-







    }
    if (!_dAnalyses.has(sWord) && !_storeMorphFromFSA(sWord)) {
        return true;
    }
    if (_dAnalyses.get(sWord).length === 1) {
        return true;
    }
    let lSelect = [ for (sMorph of _dAnalyses.get(sWord))  if (sMorph.search(sPattern) === -1)  sMorph ];
    let lSelect = _dAnalyses.get(sWord).filter( sMorph => sMorph.search(sPattern) === -1 );
    //echo("lSelect: "+lSelect.toString());
    if (lSelect.length > 0) {
        if (lSelect.length != _dAnalyses.get(sWord).length) {
            dDA.set(nPos, lSelect);
        }
    } else if (lDefault) {
        dDA.set(nPos, lDefault);
    }