Grammalecte  Diff

Differences From Artifact [26e5034903]:

To Artifact [4598bbd432]:


1
2
3





4
5
6
7
8





9
10
11
12
13
14
15
16
17
18
19
20



1
2
3
4
5
6
7
8


9
10
11
12
13
14
15
16
17

18
19
20
21
22
23
24
-
-
-
+
+
+
+
+



-
-
+
+
+
+
+




-







//// IBDAWG
/*jslint esversion: 6*/
/*global console,require,exports*/
// IBDAWG

/* jshint esversion:6, -W097 */
/* jslint esversion:6 */
/* global require, exports, console, __dirname */

"use strict";


if (typeof(require) !== 'undefined') {
if(typeof(process) !== 'undefined') {
    var str_transform = require("./str_transform.js");
    var helpers = require("./helpers.js");
    var char_player = require("./char_player.js");
} else if (typeof(require) !== 'undefined') {
    var str_transform = require("resource://grammalecte/graphspell/str_transform.js");
    var helpers = require("resource://grammalecte/graphspell/helpers.js");
    var char_player = require("resource://grammalecte/graphspell/char_player.js");
}


// Don’t remove <string>. Necessary in TB.
${string}
${map}
${set}


93
94
95
96
97
98
99




100


101
102
103
104
105
106
107
97
98
99
100
101
102
103
104
105
106
107

108
109
110
111
112
113
114
115
116







+
+
+
+
-
+
+







    // INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH

    constructor (param1, sPath="") {
        // param1 can be a filename or a object with all the necessary data.
        try {
            let oData = null;
            if (typeof(param1) == "string") {
                let sURL;
                if(typeof(process) !== 'undefined') {
                    sURL = (sPath !== "") ? sPath + "/" + param1 : __dirname + "/_dictionaries/"+param1;
                } else {
                let sURL = (sPath !== "") ? sPath + "/" + param1 : "resource://grammalecte/graphspell/_dictionaries/"+param1;
                    sURL = (sPath !== "") ? sPath + "/" + param1 : "resource://grammalecte/graphspell/_dictionaries/"+param1;
                }
                oData = JSON.parse(helpers.loadFile(sURL));
            } else {
                oData = param1;
            }
            Object.assign(this, oData);
        }
        catch (e) {
219
220
221
222
223
224
225
226

227
228
229
230
231
232
233
228
229
230
231
232
233
234

235
236
237
238
239
240
241
242







-
+







            "l2grams": this.l2grams
        };
        return oJSON;
    }

    isValidToken (sToken) {
        // checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)
        sToken = char_player.spellingNormalization(sToken)
        sToken = char_player.spellingNormalization(sToken);
        if (this.isValid(sToken)) {
            return true;
        }
        if (sToken.includes("-")) {
            if (sToken.gl_count("-") > 4) {
                return true;
            }
296
297
298
299
300
301
302
303

304
305
306
307
308
309
310
311
312
313
314
315
316
317


318
319
320
321
322
323
324
325
326
327
328
329
330

331
332
333

334
335
336
337
338
339
340
305
306
307
308
309
310
311

312
313
314
315
316
317
318
319
320
321
322
323
324


325
326
327
328
329
330
331
332
333
334
335
336
337
338

339
340


341
342
343
344
345
346
347
348







-
+












-
-
+
+












-
+

-
-
+







            }
        }
        return Boolean(this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask);
    }

    getMorph (sWord) {
        // retrieves morphologies list, different casing allowed
        sWord = char_player.spellingNormalization(sWord)
        sWord = char_player.spellingNormalization(sWord);
        let l = this.morph(sWord);
        if (sWord[0].gl_isUpperCase()) {
            l.push(...this.morph(sWord.toLowerCase()));
            if (sWord.gl_isUpperCase() && sWord.length > 1) {
                l.push(...this.morph(sWord.gl_toCapitalize()));
            }
        }
        return l;
    }

    suggest (sWord, nSuggLimit=10) {
        // returns a array of suggestions for <sWord>
        //const t0 = Date.now();
        sWord = char_player.spellingNormalization(sWord)
	//console.time("Suggestions for " + sWord + ");
        sWord = char_player.spellingNormalization(sWord);
        let sPfx = "";
        let sSfx = "";
        [sPfx, sWord, sSfx] = char_player.cut(sWord);
        let nMaxSwitch = Math.max(Math.floor(sWord.length / 3), 1);
        let nMaxDel = Math.floor(sWord.length / 5);
        let nMaxHardRepl = Math.max(Math.floor((sWord.length - 5) / 4), 1);
        let nMaxJump = Math.max(Math.floor(sWord.length / 4), 1);
        let oSuggResult = new SuggResult(sWord);
        this._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump);
        let aSugg = oSuggResult.getSuggestions(nSuggLimit);
        if (sSfx || sPfx) {
            // we add what we removed
            return aSugg.map( (sSugg) => { return sPfx + sSugg + sSfx } );
            return aSugg.map( (sSugg) => { return sPfx + sSugg + sSfx; } );
        }
        //const t1 = Date.now();
        //console.log("Suggestions for " + sWord + " in " + ((t1-t0)/1000).toString() + " s");
	//console.timeEnd("Suggestions for " + sWord + ");
        return aSugg;
    }

    _suggest (oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nMaxJump=0, nDist=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=false) {
        // returns a set of suggestions
        // recursive function
        if (sRemain == "") {
587
588
589
590
591
592
593
594

595
596
597
598
599
600
601
595
596
597
598
599
600
601

602
603
604
605
606
607
608
609







-
+







                }
                iAddr = iEndArcAddr + this.nBytesNodeAddress;
            }
        }
    }

    * _getArcs1 (iAddr) {
        "generator: return all arcs at <iAddr> as tuples of (nVal, iAddr)"
        // generator: return all arcs at <iAddr> as tuples of (nVal, iAddr)
        while (true) {
            let iEndArcAddr = iAddr+this.nBytesArc;
            let nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr));
            yield [nRawArc & this._arcMask, this._convBytesToInteger(this.byDic.slice(iEndArcAddr, iEndArcAddr+this.nBytesNodeAddress))];
            if (nRawArc & this._lastArcMask) {
                break;
            }