Grammalecte  Diff

Differences From Artifact [39524e288d]:

To Artifact [a6b92daf69]:


23
24
25
26
27
28
29
30

31
32
33
34
35
36
37
38
39
40
41
42
43
44
45

46
47
48
49
50
51
52
23
24
25
26
27
28
29

30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

45
46
47
48
49
50
51
52







-
+














-
+









class SuggResult {
    // Structure for storing, classifying and filtering suggestions

    constructor (sWord, nDistLimit=-1) {
        this.sWord = sWord;
        this.sSimplifiedWord = char_player.simplifyWord(sWord);
        this.sSimplifiedWord = str_transform.simplifyWord(sWord);
        this.nDistLimit = (nDistLimit >= 0) ? nDistLimit :  Math.floor(sWord.length / 3) + 1;
        this.nMinDist = 1000;
        this.aSugg = new Set();
        this.dSugg = new Map([ [0, []],  [1, []],  [2, []] ]);
        this.aAllSugg = new Set();      // all found words even those refused
    }

    addSugg (sSugg, nDeep=0) {
        // add a suggestion
        if (this.aAllSugg.has(sSugg)) {
            return;
        }
        this.aAllSugg.add(sSugg);
        if (!this.aSugg.has(sSugg)) {
            let nDist = str_transform.distanceDamerauLevenshtein(this.sSimplifiedWord, char_player.simplifyWord(sSugg));
            let nDist = str_transform.distanceDamerauLevenshtein(this.sSimplifiedWord, str_transform.simplifyWord(sSugg));
            if (nDist <= this.nDistLimit) {
                if (sSugg.includes(" ")) { // add 1 to distance for split suggestions
                    nDist += 1;
                }
                if (!this.dSugg.has(nDist)) {
                    this.dSugg.set(nDist, []);
                }
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
75
76
77
78
79
80
81

82
83
84
85
86
87
88







-







                break;
            }
            lRes.push(...lSugg);
            if (lRes.length > nSuggLimit) {
                break;
            }
        }
        lRes = char_player.filterSugg(lRes);
        if (this.sWord.gl_isUpperCase()) {
            lRes = lRes.map((sSugg) => { return sSugg.toUpperCase(); });
            lRes = [...new Set(lRes)];
        }
        else if (this.sWord.slice(0,1).gl_isUpperCase()) {
            lRes = lRes.map((sSugg) => { return sSugg.slice(0,1).toUpperCase() + sSugg.slice(1); });
            lRes = [...new Set(lRes)];
193
194
195
196
197
198
199








200
201
202
203
204
205
206
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213







+
+
+
+
+
+
+
+







                break;
            default:
                throw ValueError("# Error: unknown code: " + this.nCompressionMethod);
        }
        //console.log(this.getInfo());
        this.bAcronymValid = true;
        this.bNumAtLastValid = false;

        // lexicographer module ?
        this.lexicographer = null;
        // JS still sucks: we’ll try importation when importation will be available in Workers. Still waiting...
        if (self && self.hasOwnProperty("lexgraph_"+this.sLangCode)) { // self is the Worker
            this.lexicographer = self["lexgraph_"+this.sLangCode];
        }

    }

    getInfo () {
        return  `  Language: ${this.sLangName}   Lang code: ${this.sLangCode}   Dictionary name: ${this.sDicName}\n` +
                `  Compression method: ${this.nCompressionMethod}   Date: ${this.sDate}   Stemming: ${this.cStemming}FX\n` +
                `  Arcs values:  ${this.nArcVal} = ${this.nChar} characters,  ${this.nAff} affixes,  ${this.nTag} tags\n` +
                `  Dictionary: ${this.nEntry} entries,    ${this.nNode} nodes,   ${this.nArc} arcs\n` +
324
325
326
327
328
329
330

331


332
333
334
335
336
337
338
339
340
341
342



343
344
345
346
347
348
349
350
351
352
353
354

355
356
357
358
359
360
361
331
332
333
334
335
336
337
338

339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365

366
367
368
369
370
371
372
373







+
-
+
+











+
+
+











-
+








    suggest (sWord, nSuggLimit=10, bSplitTrailingNumbers=false) {
        // returns a array of suggestions for <sWord>
        //console.time("Suggestions for " + sWord);
        sWord = str_transform.spellingNormalization(sWord);
        let sPfx = "";
        let sSfx = "";
        if (this.lexicographer) {
        [sPfx, sWord, sSfx] = char_player.cut(sWord);
            [sPfx, sWord, sSfx] = this.lexicographer.split(sWord);
        }
        let nMaxSwitch = Math.max(Math.floor(sWord.length / 3), 1);
        let nMaxDel = Math.floor(sWord.length / 5);
        let nMaxHardRepl = Math.max(Math.floor((sWord.length - 5) / 4), 1);
        let nMaxJump = Math.max(Math.floor(sWord.length / 4), 1);
        let oSuggResult = new SuggResult(sWord);
        if (bSplitTrailingNumbers) {
            this._splitTrailingNumbers(oSuggResult, sWord);
        }
        this._splitSuggest(oSuggResult, sWord);
        this._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump);
        let aSugg = oSuggResult.getSuggestions(nSuggLimit);
        if (this.lexicographer) {
            aSugg = this.lexicographer.filterSugg(aSugg);
        }
        if (sSfx || sPfx) {
            // we add what we removed
            return aSugg.map( (sSugg) => { return sPfx + sSugg + sSfx; } );
        }
        //console.timeEnd("Suggestions for " + sWord);
        return aSugg;
    }

    _splitTrailingNumbers (oSuggResult, sWord) {
        let m = /^([a-zA-Zà-öÀ-Ö_ø-ÿØ-ßĀ-ʯfi-st][a-zA-Zà-öÀ-Ö_ø-ÿØ-ßĀ-ʯfi-st-]+)([0-9]+)$/.exec(sWord);
        if (m  &&  !m[1].endsWith("-")  &&  !m[1].endsWith("_")) {
            oSuggResult.addSugg(m[1] + " " + char_player.numbersToExponent(m[2]));
            oSuggResult.addSugg(m[1] + " " + str_transform.numbersToExponent(m[2]));
        }
    }

    _splitSuggest (oSuggResult, sWord) {
        // split at apostrophes
        for (let cSplitter of "'’") {
            if (sWord.includes(cSplitter)) {