Grammalecte  Diff

Differences From Artifact [73e27f350e]:

To Artifact [d566558346]:


24
25
26
27
28
29
30

31
32
33
34




35
36
37
38
39
40
41
    constructor (sWord, nDistLimit=-1) {
        this.sWord = sWord;
        this.sSimplifiedWord = char_player.simplifyWord(sWord);
        this.nDistLimit = (nDistLimit >= 0) ? nDistLimit :  Math.floor(sWord.length / 3) + 1;
        this.nMinDist = 1000;
        this.aSugg = new Set();
        this.dSugg = new Map([ [0, []],  [1, []],  [2, []] ]);

    }

    addSugg (sSugg, nDeep=0) {
        // add a suggestion




        if (!this.aSugg.has(sSugg)) {
            let nDist = str_transform.distanceDamerauLevenshtein(this.sSimplifiedWord, char_player.simplifyWord(sSugg));
            if (nDist <= this.nDistLimit) {
                if (!this.dSugg.has(nDist)) {
                    this.dSugg.set(nDist, []);
                }
                this.dSugg.get(nDist).push(sSugg);







>




>
>
>
>







24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
    constructor (sWord, nDistLimit=-1) {
        this.sWord = sWord;
        this.sSimplifiedWord = char_player.simplifyWord(sWord);
        this.nDistLimit = (nDistLimit >= 0) ? nDistLimit :  Math.floor(sWord.length / 3) + 1;
        this.nMinDist = 1000;
        this.aSugg = new Set();
        this.dSugg = new Map([ [0, []],  [1, []],  [2, []] ]);
        this.aAllSugg = new Set();      // all found words even those refused
    }

    addSugg (sSugg, nDeep=0) {
        // add a suggestion
        if (this.aAllSugg.has(sSugg)) {
            return;
        }
        this.aAllSugg.add(sSugg);
        if (!this.aSugg.has(sSugg)) {
            let nDist = str_transform.distanceDamerauLevenshtein(this.sSimplifiedWord, char_player.simplifyWord(sSugg));
            if (nDist <= this.nDistLimit) {
                if (!this.dSugg.has(nDist)) {
                    this.dSugg.set(nDist, []);
                }
                this.dSugg.get(nDist).push(sSugg);
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
        for (let i = 0;  i < this.sByDic.length;  i+=2) {
            lTemp.push(parseInt(this.sByDic.slice(i, i+2), 16));
        }
        this.byDic = lTemp;
        //this.byDic = new Uint8Array(lTemp);  // not quicker, even slower
        /* end of bug workaround */

        if (!this.sHeader.startsWith("/pyfsa/")) {
            throw TypeError("# Error. Not a pyfsa binary dictionary. Header: " + this.sHeader);
        }
        if (!(this.nCompressionMethod == 1 || this.nCompressionMethod == 2 || this.nCompressionMethod == 3)) {
            throw RangeError("# Error. Unknown dictionary compression method: " + this.nCompressionMethod);
        }
        // <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
        this.dChar = helpers.objectToMap(this.dChar);
        this.dCharVal = this.dChar.gl_reverse();







|
|







120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
        for (let i = 0;  i < this.sByDic.length;  i+=2) {
            lTemp.push(parseInt(this.sByDic.slice(i, i+2), 16));
        }
        this.byDic = lTemp;
        //this.byDic = new Uint8Array(lTemp);  // not quicker, even slower
        /* end of bug workaround */

        if (!(this.sHeader.startsWith("/grammalecte-fsa/") || this.sHeader.startsWith("/pyfsa/"))) {
            throw TypeError("# Error. Not a grammalecte-fsa binary dictionary. Header: " + this.sHeader);
        }
        if (!(this.nCompressionMethod == 1 || this.nCompressionMethod == 2 || this.nCompressionMethod == 3)) {
            throw RangeError("# Error. Unknown dictionary compression method: " + this.nCompressionMethod);
        }
        // <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
        this.dChar = helpers.objectToMap(this.dChar);
        this.dCharVal = this.dChar.gl_reverse();
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
                `  Arcs values:  ${this.nArcVal} = ${this.nChar} characters,  ${this.nAff} affixes,  ${this.nTag} tags\n` +
                `  Dictionary: ${this.nEntry} entries,    ${this.nNode} nodes,   ${this.nArc} arcs\n` +
                `  Address size: ${this.nBytesNodeAddress} bytes,  Arc size: ${this.nBytesArc} bytes\n`;
    }

    getJSON () {
        let oJSON = {
            "sHeader": "/pyfsa/",
            "sLangCode": this.sLangCode,
            "sLangName": this.sLangName,
            "sDicName": this.sDicName,
            "sFileName": this.sFileName,
            "sDate": this.sDate,
            "nEntry": this.nEntry,
            "nChar": this.nChar,







|







184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
                `  Arcs values:  ${this.nArcVal} = ${this.nChar} characters,  ${this.nAff} affixes,  ${this.nTag} tags\n` +
                `  Dictionary: ${this.nEntry} entries,    ${this.nNode} nodes,   ${this.nArc} arcs\n` +
                `  Address size: ${this.nBytesNodeAddress} bytes,  Arc size: ${this.nBytesArc} bytes\n`;
    }

    getJSON () {
        let oJSON = {
            "sHeader": "/grammalecte-fsa/",
            "sLangCode": this.sLangCode,
            "sLangName": this.sLangName,
            "sDicName": this.sDicName,
            "sFileName": this.sFileName,
            "sDate": this.sDate,
            "nEntry": this.nEntry,
            "nChar": this.nChar,
229
230
231
232
233
234
235



236
237
238
239
240
241
242
            return null;
        }
        if (sWord.includes("’")) { // ugly hack
            sWord = sWord.replace("’", "'");
        }
        if (this.lookup(sWord)) {
            return true;



        }
        if (sWord.charAt(0).gl_isUpperCase()) {
            if (sWord.length > 1) {
                if (sWord.gl_isTitle()) {
                    return !!this.lookup(sWord.toLowerCase());
                }
                if (sWord.gl_isUpperCase()) {







>
>
>







234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
            return null;
        }
        if (sWord.includes("’")) { // ugly hack
            sWord = sWord.replace("’", "'");
        }
        if (this.lookup(sWord)) {
            return true;
        }
        if (sWord.gl_isDigit()) {
            return true;
        }
        if (sWord.charAt(0).gl_isUpperCase()) {
            if (sWord.length > 1) {
                if (sWord.gl_isTitle()) {
                    return !!this.lookup(sWord.toLowerCase());
                }
                if (sWord.gl_isUpperCase()) {
325
326
327
328
329
330
331

332
333
334
335
336
337
338
                oSuggResult.addSugg(sNewWord);
            }
            for (let sTail of this._getTails(iAddr)) {
                oSuggResult.addSugg(sNewWord+sTail);
            }
            return;
        }

        let cCurrent = sRemain.slice(0, 1);
        for (let [cChar, jAddr] of this._getCharArcs(iAddr)) {
            if (char_player.d1to1.gl_get(cCurrent, cCurrent).indexOf(cChar) != -1) {
                this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, jAddr, sNewWord+cChar);
            }
            else if (!bAvoidLoop && nMaxHardRepl) {
                this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl-1, nDeep+1, jAddr, sNewWord+cChar, true);







>







333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
                oSuggResult.addSugg(sNewWord);
            }
            for (let sTail of this._getTails(iAddr)) {
                oSuggResult.addSugg(sNewWord+sTail);
            }
            return;
        }

        let cCurrent = sRemain.slice(0, 1);
        for (let [cChar, jAddr] of this._getCharArcs(iAddr)) {
            if (char_player.d1to1.gl_get(cCurrent, cCurrent).indexOf(cChar) != -1) {
                this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, jAddr, sNewWord+cChar);
            }
            else if (!bAvoidLoop && nMaxHardRepl) {
                this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl-1, nDeep+1, jAddr, sNewWord+cChar, true);
413
414
415
416
417
418
419




420









421

422
423
424
425
426

427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444

445
446
447
448

449
450
451
452
453
454
455
        }
        return aTails;
    }

    // morph (sWord) {
    //     is defined in constructor
    // }




    









    * select (sPattern="") {

        // generator: returns all entries which morphology fits <sPattern>
        let zPattern = null;
        if (sPattern !== "") {
            try {
                zPattern = new RegExp(sPattern);

            }
            catch (e) {
                console.log("Error in regex pattern");
                console.log(e.message);
            }
        }
        yield* this._select1(zPattern, 0, "");
    }

    // VERSION 1

    * _select1 (zPattern, iAddr, sWord) {
        // recursive generator
        for (let [nVal, jAddr] of this._getArcs1(iAddr)) {
            if (nVal <= this.nChar) {
                // simple character
                yield* this._select1(zPattern, jAddr, sWord + this.lArcVal[nVal]);
            } else {

                let sEntry = sWord + "\t" + this.funcStemming(sWord, this.lArcVal[nVal]);
                for (let [nMorphVal, _] of this._getArcs1(jAddr)) {
                    if (!zPattern || zPattern.test(this.lArcVal[nMorphVal])) {
                        yield sEntry + "\t" + this.lArcVal[nMorphVal];

                    }
                }
            }
        }
    }            

    _morph1 (sWord) {







>
>
>
>
|
>
>
>
>
>
>
>
>
>
|
>
|
|
|
|
|
>
|
|
|
|
|
<
|




|




|

>
|
|
|
|
>







422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455

456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
        }
        return aTails;
    }

    // morph (sWord) {
    //     is defined in constructor
    // }
    getSimilarEntries (sWord, nSuggLimit=10) {
        // return a list of tuples (similar word, stem, morphology)
        if (sWord == "") {
            return [];
        }
        let lResult = [];
        for (let sSimilar of this.suggest(sWord, nSuggLimit)) {
            for (let sMorph of this.getMorph(sSimilar)) {
                let nCut = sMorph.indexOf(" ");
                lResult.push( [sSimilar, sMorph.slice(1, nCut), sMorph.slice(nCut+1)] );
            }
        }
        return lResult;
    }

    * select (sFlexPattern="", sTagsPattern="") {
        // generator: returns all entries which flexion fits <sFlexPattern> and morphology fits <sTagsPattern>
        let zFlexPattern = null;
        let zTagsPattern = null;
        try {
            zFlexPattern = (sFlexPattern !== "") ? new RegExp(sFlexPattern) : null;
            zTagsPattern = (sTagsPattern !== "") ? new RegExp(sTagsPattern) : null;
        }
        catch (e) {
            console.log("Error in regex pattern");
            console.log(e.message);
        }

        yield* this._select1(zFlexPattern, zTagsPattern, 0, "");
    }

    // VERSION 1

    * _select1 (zFlexPattern, zTagsPattern, iAddr, sWord) {
        // recursive generator
        for (let [nVal, jAddr] of this._getArcs1(iAddr)) {
            if (nVal <= this.nChar) {
                // simple character
                yield* this._select1(zFlexPattern, zTagsPattern, jAddr, sWord + this.lArcVal[nVal]);
            } else {
                if (!zFlexPattern || zFlexPattern.test(sWord)) {
                    let sStem = this.funcStemming(sWord, this.lArcVal[nVal]);
                    for (let [nMorphVal, _] of this._getArcs1(jAddr)) {
                        if (!zTagsPattern || zTagsPattern.test(this.lArcVal[nMorphVal])) {
                            yield [sWord, sStem, this.lArcVal[nMorphVal]];
                        }
                    }
                }
            }
        }
    }            

    _morph1 (sWord) {