Grammalecte  Check-in [e07a5e6edb]

Overview
Comment:[core] ibdawg: suggest() seeks for tails
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | core
Files: files | file ages | folders
SHA3-256: e07a5e6edbde29528643f638359997a87dc006fb4867f04d5e8b82218233ea06
User & Date: olr on 2017-07-03 09:30:22
Other Links: manifest | tags
Context
2017-07-04
15:50
[fr] oops, option conjugaison réactivée pour Writer +détails mineurs check-in: 7e75e7a4d4 user: olr tags: trunk, fr, v0.5.17.2
2017-07-03
09:30
[core] ibdawg: suggest() seeks for tails check-in: e07a5e6edb user: olr tags: trunk, core
02:35
[core] ibdawg: code cleaning check-in: f4c85fbe0b user: olr tags: trunk, core
Changes

Modified gc_core/py/ibdawg.py from [e66ef9b298] to [540dfd31d3].

200
201
202
203
204
205
206


207
208
209
210
211
212
213
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215







+
+







    def _suggest (self, sWord, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
        # recursive function
        aSugg = set()
        if not sWord:
            if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
                #show(nDeep, "___" + sNewWord + "___")
                aSugg.add(sNewWord)
            for sTail in self._getTails(iAddr):
                aSugg.add(sNewWord+sTail)
            return aSugg
        #show(nDeep, "<" + sWord + ">  ===>  " + sNewWord)
        cCurrent = sWord[0:1]
        for cChar, jAddr in self._getSimilarArcs(cCurrent, iAddr):
            #show(nDeep, cChar)
            aSugg.update(self._suggest(sWord[1:], nDeep+1, jAddr, sNewWord+cChar))
        if not bAvoidLoop: # avoid infinite loop
236
237
238
239
240
241
242











243
244
245
246
247
248
249
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262







+
+
+
+
+
+
+
+
+
+
+







    def _getSimilarArcs (self, cChar, iAddr):
        "generator: yield similar char of <cChar> and address of the following node"
        for c in cp.d1to1.get(cChar, [cChar]):
            if c in self.dChar:
                jAddr = self._lookupArcNode(self.dChar[c], iAddr)
                if jAddr:
                    yield (c, jAddr)

    def _getTails (self, iAddr, sTail="", n=2):
        "return a list of suffixes ending at a distance of <n> from <iAddr>"
        aTails = set()
        for nVal, jAddr in self._getArcs(iAddr):
            if nVal < self.nChar:
                if int.from_bytes(self.byDic[jAddr:jAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
                    aTails.add(sTail + self.dCharVal[nVal])
                if n:
                    aTails.update(self._getTails(jAddr, sTail+self.dCharVal[nVal], n-1))
        return aTails

    def _suggestWithCrushedUselessChars (self, sWord, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
        aSugg = set()
        if not sWord:
            if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
                show(nDeep, "!!! " + sNewWord + " !!!")
                aSugg.add(sNewWord)