Grammalecte  Check-in [2f30b8f748]

Overview
Comment:[core][js] ibdawg: use SuggResult
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | spellsugg
Files: files | file ages | folders
SHA3-256: 2f30b8f748df87150fde43b073799aa488b9f038725eac24008632eae862d8e7
User & Date: olr on 2017-11-08 15:14:27
Other Links: branch diff | manifest | tags
Context
2017-11-08
21:16
[core] ibdawg: update char_player check-in: 51e3a2e76e user: olr tags: core, spellsugg
19:06
[core] merge spellsugg: faster suggestion engine check-in: ec1136c73d user: olr tags: trunk, core
15:14
[core][js] ibdawg: use SuggResult check-in: 2f30b8f748 user: olr tags: core, spellsugg
14:59
[core][js] ibdawg: object SuggResult check-in: 922f22848d user: olr tags: core, spellsugg
Changes

Modified gc_core/js/ibdawg.js from [f06dd595e9] to [ca747a7a44].

255
256
257
258
259
260
261

262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
        // returns a array of suggestions for <sWord>
        let sPfx = "";
        let sSfx = "";
        [sPfx, sWord, sSfx] = char_player.cut(sWord);
        let nMaxSwitch = Math.max(Math.floor(sWord.length / 3), 1);
        let nMaxDel = Math.floor(sWord.length / 5);
        let nMaxHardRepl = Math.max(Math.floor((sWord.length - 5) / 4), 1);

        let aSugg = this._suggest(sWord, nMaxSwitch, nMaxDel, nMaxHardRepl);
        if (sWord.gl_isTitle()) {
            aSugg.gl_update(this._suggest(sWord.toLowerCase(), nMaxSwitch, nMaxDel, nMaxHardRepl));
        }
        else if (sWord.gl_isLowerCase()) {
            aSugg.gl_update(this._suggest(sWord.gl_toCapitalize(), nMaxSwitch, nMaxDel, nMaxHardRepl));
        }
        // Set to Array
        aSugg = Array.from(aSugg);
        aSugg = char_player.filterSugg(aSugg);
        if (sWord.gl_isTitle()) {
            aSugg = aSugg.map((sSugg) => { return sSugg.gl_toCapitalize(); });
        }
        let dDistTemp = new Map();
        let sCleanWord = char_player.cleanWord(sWord);
        aSugg.forEach((sSugg) => { dDistTemp.set(sSugg, str_transform.distanceDamerauLevenshtein(sCleanWord, char_player.cleanWord(sSugg))); });
        aSugg = aSugg.sort((sA, sB) => { return dDistTemp.get(sA) - dDistTemp.get(sB); }).slice(0, nMaxSugg);
        dDistTemp.clear();
        if (sSfx || sPfx) {
            // we add what we removed
            return aSugg.map( (sSugg) => { return sPfx + sSugg + sSfx } );
        }
        return aSugg;
    }

    _suggest (sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=false) {
        // returns a set of suggestions
        // recursive function
        let aSugg = new Set();
        if (sRemain == "") {
            if (this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask) {
                aSugg.add(sNewWord);
            }
            for (let sTail of this._getTails(iAddr)) {
                aSugg.add(sNewWord+sTail);
            }
            return aSugg;
        }
        let cCurrent = sRemain.slice(0, 1);
        for (let [cChar, jAddr] of this._getSimilarCharArcs(cCurrent, iAddr)) {
            aSugg.gl_update(this._suggest(sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, jAddr, sNewWord+cChar));
        }
        if (!bAvoidLoop) { // avoid infinite loop
            if (sRemain.length > 1) {
                if (cCurrent == sRemain.slice(1, 2)) {
                    // same char, we remove 1 char without adding 1 to <sNewWord>
                    aSugg.gl_update(this._suggest(sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord));
                }
                else {
                    // switching chars
                    if (nMaxSwitch > 0) {
                        aSugg.gl_update(this._suggest(sRemain.slice(1, 2)+sRemain.slice(0, 1)+sRemain.slice(2), nMaxSwitch-1, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true));
                    }
                    // delete char
                    if (nMaxDel > 0) {
                        aSugg.gl_update(this._suggest(sRemain.slice(1), nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true));
                    }
                }
                // Phonetic replacements
                for (let sRepl of char_player.d1toX.gl_get(cCurrent, [])) {
                    aSugg.gl_update(this._suggest(sRepl + sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true));
                }
                for (let sRepl of char_player.d2toX.gl_get(sRemain.slice(0, 2), [])) {
                    aSugg.gl_update(this._suggest(sRepl + sRemain.slice(2), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true));
                }
                // Hard replacements
                if (nDeep > 3 && nMaxHardRepl && sRemain.length >= 2) {
                    for (let [cChar, kAddr] of this._getCharArcs(iAddr)) {
                        if (!char_player.d1to1.gl_get(cCurrent, "").includes(cChar)) {
                            aSugg.gl_update(this._suggest(sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl-1, nDeep+1, kAddr, sNewWord+cChar, true));
                        }
                    }
                }
            }
            // end of word
            if (sRemain.length == 2) {
                for (let sRepl of char_player.dFinal2.gl_get(sRemain, [])) {
                    aSugg.gl_update(this._suggest(sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true));
                }
            }
            else if (sRemain.length == 1) {
                aSugg.gl_update(this._suggest("", nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true)); // remove last char and go on
                for (let sRepl of char_player.dFinal1.gl_get(sRemain, [])) {
                    aSugg.gl_update(this._suggest(sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true));
                }
            }
        }
        return aSugg;
    }

    * _getCharArcs (iAddr) {
        // generator: yield all chars and addresses from node at address <iAddr>
        for (let [nVal, jAddr] of this._getArcs(iAddr)) {
            if (nVal < this.nChar) {
                yield [this.dCharVal.get(nVal), jAddr];







>
|

|


|

<
|
<
<
<
<
<
<
<
<
<







|


<


|


|

|



|





|




|



|




|


|





|







|



|

|



<







255
256
257
258
259
260
261
262
263
264
265
266
267
268
269

270









271
272
273
274
275
276
277
278
279
280

281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338

339
340
341
342
343
344
345
        // returns a array of suggestions for <sWord>
        let sPfx = "";
        let sSfx = "";
        [sPfx, sWord, sSfx] = char_player.cut(sWord);
        let nMaxSwitch = Math.max(Math.floor(sWord.length / 3), 1);
        let nMaxDel = Math.floor(sWord.length / 5);
        let nMaxHardRepl = Math.max(Math.floor((sWord.length - 5) / 4), 1);
        let oSuggResult = new SuggResult(sWord);
        this._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl);
        if (sWord.gl_isTitle()) {
            this._suggest(oSuggResult, sWord.toLowerCase(), nMaxSwitch, nMaxDel, nMaxHardRepl);
        }
        else if (sWord.gl_isLowerCase()) {
            this._suggest(oSuggResult, sWord.gl_toCapitalize(), nMaxSwitch, nMaxDel, nMaxHardRepl);
        }

        let aSugg = oSuggResult.getSuggestions();









        if (sSfx || sPfx) {
            // we add what we removed
            return aSugg.map( (sSugg) => { return sPfx + sSugg + sSfx } );
        }
        return aSugg;
    }

    _suggest (oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=false) {
        // returns a set of suggestions
        // recursive function

        if (sRemain == "") {
            if (this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask) {
                oSuggResult.addSugg(sNewWord);
            }
            for (let sTail of this._getTails(iAddr)) {
                oSuggResult.addSugg(sNewWord+sTail);
            }
            return;
        }
        let cCurrent = sRemain.slice(0, 1);
        for (let [cChar, jAddr] of this._getSimilarCharArcs(cCurrent, iAddr)) {
            this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, jAddr, sNewWord+cChar);
        }
        if (!bAvoidLoop) { // avoid infinite loop
            if (sRemain.length > 1) {
                if (cCurrent == sRemain.slice(1, 2)) {
                    // same char, we remove 1 char without adding 1 to <sNewWord>
                    this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord);
                }
                else {
                    // switching chars
                    if (nMaxSwitch > 0) {
                        this._suggest(oSuggResult, sRemain.slice(1, 2)+sRemain.slice(0, 1)+sRemain.slice(2), nMaxSwitch-1, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true);
                    }
                    // delete char
                    if (nMaxDel > 0) {
                        this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true);
                    }
                }
                // Phonetic replacements
                for (let sRepl of char_player.d1toX.gl_get(cCurrent, [])) {
                    this._suggest(oSuggResult, sRepl + sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true);
                }
                for (let sRepl of char_player.d2toX.gl_get(sRemain.slice(0, 2), [])) {
                    this._suggest(oSuggResult, sRepl + sRemain.slice(2), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true);
                }
                // Hard replacements
                if (nDeep > 3 && nMaxHardRepl && sRemain.length >= 2) {
                    for (let [cChar, kAddr] of this._getCharArcs(iAddr)) {
                        if (!char_player.d1to1.gl_get(cCurrent, "").includes(cChar)) {
                            this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl-1, nDeep+1, kAddr, sNewWord+cChar, true);
                        }
                    }
                }
            }
            // end of word
            if (sRemain.length == 2) {
                for (let sRepl of char_player.dFinal2.gl_get(sRemain, [])) {
                    this._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true);
                }
            }
            else if (sRemain.length == 1) {
                this._suggest(oSuggResult, "", nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true); // remove last char and go on
                for (let sRepl of char_player.dFinal1.gl_get(sRemain, [])) {
                    this._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true);
                }
            }
        }

    }

    * _getCharArcs (iAddr) {
        // generator: yield all chars and addresses from node at address <iAddr>
        for (let [nVal, jAddr] of this._getArcs(iAddr)) {
            if (nVal < this.nChar) {
                yield [this.dCharVal.get(nVal), jAddr];

Modified gc_core/py/ibdawg.py from [0927c91f92] to [0203105f4e].

301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
                        if cChar not in cp.d1to1.get(cCurrent, ""):
                            self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl-1, nDeep+1, kAddr, sNewWord+cChar, "[["+cChar+"]]", True)
            # end of word
            if len(sRemain) == 2:
                for sRepl in cp.dFinal2.get(sRemain, ()):
                    self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, sRemain + " >> " + sRepl, True)
            elif len(sRemain) == 1:
                self._suggest(oSuggResult, "", nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, sRemain + " [last char removed] ", True) # remove last char and go o
                for sRepl in cp.dFinal1.get(sRemain, ()):
                    self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, sRemain + " >> " + sRepl, True)
        return

    @timethis
    def suggest2 (self, sWord, nMaxSugg=10):
        "returns a set of suggestions for <sWord>"







|







301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
                        if cChar not in cp.d1to1.get(cCurrent, ""):
                            self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl-1, nDeep+1, kAddr, sNewWord+cChar, "[["+cChar+"]]", True)
            # end of word
            if len(sRemain) == 2:
                for sRepl in cp.dFinal2.get(sRemain, ()):
                    self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, sRemain + " >> " + sRepl, True)
            elif len(sRemain) == 1:
                self._suggest(oSuggResult, "", nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, sRemain + " [last char removed] ", True) # remove last char and go on
                for sRepl in cp.dFinal1.get(sRemain, ()):
                    self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, sRemain + " >> " + sRepl, True)
        return

    @timethis
    def suggest2 (self, sWord, nMaxSugg=10):
        "returns a set of suggestions for <sWord>"