Grammalecte  Check-in [c80ec8575d]

Overview
Comment:[core][js] ibdawg: spellchecking suggestion mechanism > hard replacements
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | core
Files: files | file ages | folders
SHA3-256: c80ec8575d757ee49f544a63d02f941c3281210e3b60a52814a650863571b8e5
User & Date: olr on 2017-10-22 09:07:21
Other Links: manifest | tags
Context
2017-10-22
09:15
[core][py] small code cleaning check-in: d78ef84a02 user: olr tags: trunk, core
09:07
[core][js] ibdawg: spellchecking suggestion mechanism > hard replacements check-in: c80ec8575d user: olr tags: trunk, core
08:26
[core][py] ibdawg: spellchecking suggestion mechanism > hard replacements check-in: 9fde57a772 user: olr tags: trunk, core
Changes

Modified gc_core/js/ibdawg.js from [f4fdb2bfbb] to [6ddb36891f].

186
187
188
189
190
191
192


193

194
195

196
197
198

199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216

217
218
219
220
221
222
223
224
225
226
227
228
229
230
231

232
233
234
235
236

237
238
239
240

241
242
243

244
245
246

247
248

249
250
251












252
253
254
255
256

257
258
259
260

261
262

263
264
265
266
267
268
269
186
187
188
189
190
191
192
193
194

195
196

197
198
199

200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217

218
219
220
221
222
223
224
225
226
227
228
229
230
231
232

233
234
235
236
237

238
239
240
241

242
243
244

245
246
247

248
249

250
251
252

253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268

269
270
271
272

273
274

275
276
277
278
279
280
281
282







+
+
-
+

-
+


-
+

















-
+














-
+




-
+



-
+


-
+


-
+

-
+


-
+
+
+
+
+
+
+
+
+
+
+
+




-
+



-
+

-
+







            }
        }
        return l;
    }

    suggest (sWord, nMaxSugg=10) {
        // returns a array of suggestions for <sWord>
        let nMaxDel = Math.floor(sWord.length / 5);
        let nMaxHardRepl = Math.max(Math.floor((sWord.length - 5) / 2), 1);
        let aSugg = this._suggest(sWord, Math.floor(sWord.length / 5));
        let aSugg = this._suggest(sWord, nMaxDel, nMaxHardRepl);
        if (sWord.gl_isTitle()) {
            aSugg.gl_update(this._suggest(sWord.toLowerCase(), Math.floor(sWord.length / 5)));
            aSugg.gl_update(this._suggest(sWord.toLowerCase(), nMaxDel, nMaxHardRepl));
        }
        else if (sWord.gl_isLowerCase()) {
            aSugg.gl_update(this._suggest(sWord.gl_toCapitalize(), Math.floor(sWord.length / 5)));
            aSugg.gl_update(this._suggest(sWord.gl_toCapitalize(), nMaxDel, nMaxHardRepl));
        }
        if (aSugg.size == 0) {
            aSugg.gl_update(this._suggestWithCrushedUselessChars(char_player.clearWord(sWord)));
        }
        // Set to Array
        aSugg = Array.from(aSugg);
        aSugg = aSugg.filter((sSugg) => { return !sSugg.endsWith("è") && !sSugg.endsWith("È"); }); // fr language 
        if (sWord.gl_isTitle()) {
            aSugg = aSugg.map((sSugg) => { return sSugg.gl_toCapitalize(); });
        }
        let dDistTemp = new Map();
        aSugg.forEach((sSugg) => { dDistTemp.set(sSugg, char_player.distanceDamerauLevenshtein(sWord, sSugg)); });
        aSugg = aSugg.sort((sA, sB) => { return dDistTemp.get(sA) - dDistTemp.get(sB); }).slice(0, nMaxSugg);
        dDistTemp.clear();
        return aSugg;
    }

    _suggest (sRemain, nMaxDel=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=false) {
    _suggest (sRemain, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=false) {
        // returns a set of suggestions
        // recursive function
        let aSugg = new Set();
        if (sRemain == "") {
            if (this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask) {
                aSugg.add(sNewWord);
            }
            for (let sTail of this._getTails(iAddr)) {
                aSugg.add(sNewWord+sTail);
            }
            return aSugg;
        }
        let cCurrent = sRemain.slice(0, 1);
        for (let [cChar, jAddr] of this._getSimilarArcs(cCurrent, iAddr)) {
            aSugg.gl_update(this._suggest(sRemain.slice(1), nMaxDel, nDeep+1, jAddr, sNewWord+cChar));
            aSugg.gl_update(this._suggest(sRemain.slice(1), nMaxDel, nMaxHardRepl, nDeep+1, jAddr, sNewWord+cChar));
        }
        if (!bAvoidLoop) { // avoid infinite loop
            if (cCurrent == sRemain.slice(1, 2)) {
                // same char, we remove 1 char without adding 1 to <sNewWord>
                aSugg.gl_update(this._suggest(sRemain.slice(1), nMaxDel, nDeep+1, iAddr, sNewWord));
                aSugg.gl_update(this._suggest(sRemain.slice(1), nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord));
            }
            else {
                // switching chars
                aSugg.gl_update(this._suggest(sRemain.slice(1, 2)+sRemain.slice(0, 1)+sRemain.slice(2), nMaxDel, nDeep+1, iAddr, sNewWord, true));
                aSugg.gl_update(this._suggest(sRemain.slice(1, 2)+sRemain.slice(0, 1)+sRemain.slice(2), nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true));
                // delete char
                if (nMaxDel > 0) {
                    aSugg.gl_update(this._suggest(sRemain.slice(1), nMaxDel-1, nDeep+1, iAddr, sNewWord, true));
                    aSugg.gl_update(this._suggest(sRemain.slice(1), nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true));
                }
            }
            // Replacements
            // Phonetic replacements
            for (let sRepl of char_player.d1toX.gl_get(cCurrent, [])) {
                aSugg.gl_update(this._suggest(sRepl + sRemain.slice(1), nMaxDel, nDeep+1, iAddr, sNewWord, true));
                aSugg.gl_update(this._suggest(sRepl + sRemain.slice(1), nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true));
            }
            for (let sRepl of char_player.d2toX.gl_get(sRemain.slice(0, 2), [])) {
                aSugg.gl_update(this._suggest(sRepl + sRemain.slice(2), nMaxDel, nDeep+1, iAddr, sNewWord, true));
                aSugg.gl_update(this._suggest(sRepl + sRemain.slice(2), nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true));
            }
            // Hard replacements
            if (nDeep > 3 && nMaxHardRepl && sRemain.length >= 2) {
                for (let [nVal, kAddr] of this._getArcs1(iAddr)) {
                    if (this.dCharVal.has(nVal)) {
                        let cChar = this.dCharVal.get(nVal);
                        if (!char_player.d1to1.gl_get(cCurrent, "").includes(cChar)) {
                            aSugg.gl_update(this._suggest(sRemain.slice(1), nMaxDel, nMaxHardRepl-1, nDeep+1, kAddr, sNewWord+cChar, true));
                        }
                    }
                }
            }
            // end of word
            if (sRemain.length == 2) {
                for (let sRepl of char_player.dFinal2.gl_get(sRemain, [])) {
                    aSugg.gl_update(this._suggest(sRepl, nMaxDel, nDeep+1, iAddr, sNewWord, true));
                    aSugg.gl_update(this._suggest(sRepl, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true));
                }
            }
            else if (sRemain.length == 1) {
                aSugg.gl_update(this._suggest("", nMaxDel, nDeep+1, iAddr, sNewWord, true)); // remove last char and go on
                aSugg.gl_update(this._suggest("", nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true)); // remove last char and go on
                for (let sRepl of char_player.dFinal1.gl_get(sRemain, [])) {
                    aSugg.gl_update(this._suggest(sRepl, nMaxDel, nDeep+1, iAddr, sNewWord, true));
                    aSugg.gl_update(this._suggest(sRepl, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true));
                }
            }
        }
        return aSugg;
    }

    * _getSimilarArcs (cChar, iAddr) {