Grammalecte  Check-in [e9a792a911]

Overview
Comment:[core] ibdawg: remove secondary suggestion method (pointless)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | core
Files: files | file ages | folders
SHA3-256: e9a792a911fa3a438370a43d392d73588ee2851452abb774cea09742b776eb72
User & Date: olr on 2017-10-25 16:28:42
Other Links: manifest | tags
Context
2017-10-25
17:03
[fr] règle non testée check-in: 54a4aceb2f user: olr tags: trunk, fr
16:28
[core] ibdawg: remove secondary suggestion method (pointless) check-in: e9a792a911 user: olr tags: trunk, core
14:41
[core][fr] ibdawg: clean words suggestion > replace <eau> and <au> by <o> check-in: 7d4742b272 user: olr tags: trunk, fr, core
Changes

Modified gc_core/js/ibdawg.js from [82209aec2c] to [d3a144f471].

198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
        let aSugg = this._suggest(sWord, nMaxDel, nMaxHardRepl);
        if (sWord.gl_isTitle()) {
            aSugg.gl_update(this._suggest(sWord.toLowerCase(), nMaxDel, nMaxHardRepl));
        }
        else if (sWord.gl_isLowerCase()) {
            aSugg.gl_update(this._suggest(sWord.gl_toCapitalize(), nMaxDel, nMaxHardRepl));
        }
        if (aSugg.size == 0) {
            aSugg.gl_update(this._suggestWithCrushedUselessChars(char_player.shrinkWord(sWord)));
        }
        // Set to Array
        aSugg = Array.from(aSugg);
        aSugg = aSugg.filter((sSugg) => { return !sSugg.endsWith("è") && !sSugg.endsWith("È"); }); // fr language 
        if (sWord.gl_isTitle()) {
            aSugg = aSugg.map((sSugg) => { return sSugg.gl_toCapitalize(); });
        }
        let dDistTemp = new Map();







<
<
<







198
199
200
201
202
203
204



205
206
207
208
209
210
211
        let aSugg = this._suggest(sWord, nMaxDel, nMaxHardRepl);
        if (sWord.gl_isTitle()) {
            aSugg.gl_update(this._suggest(sWord.toLowerCase(), nMaxDel, nMaxHardRepl));
        }
        else if (sWord.gl_isLowerCase()) {
            aSugg.gl_update(this._suggest(sWord.gl_toCapitalize(), nMaxDel, nMaxHardRepl));
        }



        // Set to Array
        aSugg = Array.from(aSugg);
        aSugg = aSugg.filter((sSugg) => { return !sSugg.endsWith("è") && !sSugg.endsWith("È"); }); // fr language 
        if (sWord.gl_isTitle()) {
            aSugg = aSugg.map((sSugg) => { return sSugg.gl_toCapitalize(); });
        }
        let dDistTemp = new Map();
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
                    aTails.gl_update(this._getTails(jAddr, sTail+this.dCharVal.get(nVal), n-1));
                }
            }
        }
        return aTails;
    }

    _suggestWithCrushedUselessChars (sWord, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=false) {
        let aSugg = new Set();
        if (sWord.length == 0) {
            if (this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask) {
                aSugg.add(sNewWord);
            }
            return aSugg;
        }
        let cCurrent = sWord.slice(0, 1);
        for (let [cChar, jAddr] of this._getSimilarArcsAndCrushedChars(cCurrent, iAddr)) {
            aSugg.gl_update(this._suggestWithCrushedUselessChars(sWord.slice(1), nDeep+1, jAddr, sNewWord+cChar));
        }
        return aSugg;
    }

    * _getSimilarArcsAndCrushedChars (cChar, iAddr) {
        // generator: yield similar char of <cChar> and address of the following node
        for (let [nVal, jAddr] of this._getArcs(iAddr)) {
            if (this.dCharVal.get(nVal, null) in char_player.aVovels) {
                yield [this.dCharVal[nVal], jAddr];
            }
        }
        yield* this._getSimilarArcs(cChar, iAddr);
    }

    // morph (sWord) {
    //     is defined in constructor
    // }
    
    // VERSION 1
    _morph1 (sWord) {
        // returns morphologies of sWord







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







308
309
310
311
312
313
314

























315
316
317
318
319
320
321
                    aTails.gl_update(this._getTails(jAddr, sTail+this.dCharVal.get(nVal), n-1));
                }
            }
        }
        return aTails;
    }


























    // morph (sWord) {
    //     is defined in constructor
    // }
    
    // VERSION 1
    _morph1 (sWord) {
        // returns morphologies of sWord

Modified gc_core/py/ibdawg.py from [f563ae7bdb] to [816500e455].

194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
        nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
        aSugg = self._suggest(sWord, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)
        if sWord.istitle():
            aSugg.update(self._suggest(sWord.lower(), nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl))
            aSugg = set(map(lambda sSugg: sSugg.title(), aSugg))
        elif sWord.islower():
            aSugg.update(self._suggest(sWord.title(), nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl))
        if not aSugg:
            #print("crush useless chars")
            aSugg.update(self._suggestWithCrushedUselessChars(cp.shrinkWord(sWord)))
        aSugg = cp.filterSugg(aSugg)
        sCleanWord = cp.cleanWord(sWord)
        aSugg = sorted(aSugg, key=lambda sSugg: cp.distanceDamerauLevenshtein(sCleanWord, cp.cleanWord(sSugg)))[:nMaxSugg]
        if sSfx or sPfx:
            # we add what we removed
            return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
        return aSugg







<
<
<







194
195
196
197
198
199
200



201
202
203
204
205
206
207
        nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
        aSugg = self._suggest(sWord, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)
        if sWord.istitle():
            aSugg.update(self._suggest(sWord.lower(), nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl))
            aSugg = set(map(lambda sSugg: sSugg.title(), aSugg))
        elif sWord.islower():
            aSugg.update(self._suggest(sWord.title(), nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl))



        aSugg = cp.filterSugg(aSugg)
        sCleanWord = cp.cleanWord(sWord)
        aSugg = sorted(aSugg, key=lambda sSugg: cp.distanceDamerauLevenshtein(sCleanWord, cp.cleanWord(sSugg)))[:nMaxSugg]
        if sSfx or sPfx:
            # we add what we removed
            return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
        return aSugg
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
            if nVal < self.nChar:
                if int.from_bytes(self.byDic[jAddr:jAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
                    aTails.add(sTail + self.dCharVal[nVal])
                if n and not aTails:
                    aTails.update(self._getTails(jAddr, sTail+self.dCharVal[nVal], n-1))
        return aTails

    def _suggestWithCrushedUselessChars (self, sWord, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
        aSugg = set()
        if not sWord:
            if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
                #show(nDeep, "!!! " + sNewWord + " !!!")
                aSugg.add(sNewWord)
            return aSugg
        cCurrent = sWord[0:1]
        for cChar, jAddr in self._getSimilarArcsAndCrushedChars(cCurrent, iAddr):
            #show(nDeep, cChar)
            aSugg.update(self._suggestWithCrushedUselessChars(sWord[1:], nDeep+1, jAddr, sNewWord+cChar))
        return aSugg

    def _getSimilarArcsAndCrushedChars (self, cChar, iAddr):
        "generator: yield similar char of <cChar> and address of the following node"
        for nVal, jAddr in self._getArcs(iAddr):
            if self.dCharVal.get(nVal, None) in cp.aVovels:
                yield (self.dCharVal[nVal], jAddr)
        yield from self._getSimilarArcs(cChar, iAddr)

    def drawPath (self, sWord, iAddr=0):
        "show the path taken by <sWord> in the graph"
        cChar = sWord[0:1]  if sWord  else " "
        iPos = -1
        n = 0
        print(cChar + ": ", end="")
        for nVal, jAddr in self._getArcs(iAddr):







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







268
269
270
271
272
273
274




















275
276
277
278
279
280
281
            if nVal < self.nChar:
                if int.from_bytes(self.byDic[jAddr:jAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
                    aTails.add(sTail + self.dCharVal[nVal])
                if n and not aTails:
                    aTails.update(self._getTails(jAddr, sTail+self.dCharVal[nVal], n-1))
        return aTails





















    def drawPath (self, sWord, iAddr=0):
        "show the path taken by <sWord> in the graph"
        cChar = sWord[0:1]  if sWord  else " "
        iPos = -1
        n = 0
        print(cChar + ": ", end="")
        for nVal, jAddr in self._getArcs(iAddr):