Grammalecte  Check-in [e07a5e6edb]

Overview
Comment:[core] ibdawg: suggest() seeks for tails
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | core
Files: files | file ages | folders
SHA3-256: e07a5e6edbde29528643f638359997a87dc006fb4867f04d5e8b82218233ea06
User & Date: olr on 2017-07-03 09:30:22
Other Links: manifest | tags
Context
2017-07-04
15:50
[fr] oops, option conjugaison réactivée pour Writer +détails mineurs check-in: 7e75e7a4d4 user: olr tags: trunk, fr, v0.5.17.2
2017-07-03
09:30
[core] ibdawg: suggest() seeks for tails check-in: e07a5e6edb user: olr tags: trunk, core
02:35
[core] ibdawg: code cleaning check-in: f4c85fbe0b user: olr tags: trunk, core
Changes

Modified gc_core/py/ibdawg.py from [e66ef9b298] to [540dfd31d3].

200
201
202
203
204
205
206


207
208
209
210
211
212
213
    def _suggest (self, sWord, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
        # recursive function
        aSugg = set()
        if not sWord:
            if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
                #show(nDeep, "___" + sNewWord + "___")
                aSugg.add(sNewWord)


            return aSugg
        #show(nDeep, "<" + sWord + ">  ===>  " + sNewWord)
        cCurrent = sWord[0:1]
        for cChar, jAddr in self._getSimilarArcs(cCurrent, iAddr):
            #show(nDeep, cChar)
            aSugg.update(self._suggest(sWord[1:], nDeep+1, jAddr, sNewWord+cChar))
        if not bAvoidLoop: # avoid infinite loop







>
>







200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
    def _suggest (self, sWord, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
        # recursive function
        aSugg = set()
        if not sWord:
            if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
                #show(nDeep, "___" + sNewWord + "___")
                aSugg.add(sNewWord)
            for sTail in self._getTails(iAddr):
                aSugg.add(sNewWord+sTail)
            return aSugg
        #show(nDeep, "<" + sWord + ">  ===>  " + sNewWord)
        cCurrent = sWord[0:1]
        for cChar, jAddr in self._getSimilarArcs(cCurrent, iAddr):
            #show(nDeep, cChar)
            aSugg.update(self._suggest(sWord[1:], nDeep+1, jAddr, sNewWord+cChar))
        if not bAvoidLoop: # avoid infinite loop
236
237
238
239
240
241
242











243
244
245
246
247
248
249
    def _getSimilarArcs (self, cChar, iAddr):
        "generator: yield similar char of <cChar> and address of the following node"
        for c in cp.d1to1.get(cChar, [cChar]):
            if c in self.dChar:
                jAddr = self._lookupArcNode(self.dChar[c], iAddr)
                if jAddr:
                    yield (c, jAddr)












    def _suggestWithCrushedUselessChars (self, sWord, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
        aSugg = set()
        if not sWord:
            if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
                show(nDeep, "!!! " + sNewWord + " !!!")
                aSugg.add(sNewWord)







>
>
>
>
>
>
>
>
>
>
>







238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
    def _getSimilarArcs (self, cChar, iAddr):
        "generator: yield similar char of <cChar> and address of the following node"
        for c in cp.d1to1.get(cChar, [cChar]):
            if c in self.dChar:
                jAddr = self._lookupArcNode(self.dChar[c], iAddr)
                if jAddr:
                    yield (c, jAddr)

    def _getTails (self, iAddr, sTail="", n=2):
        "return a list of suffixes ending at a distance of <n> from <iAddr>"
        aTails = set()
        for nVal, jAddr in self._getArcs(iAddr):
            if nVal < self.nChar:
                if int.from_bytes(self.byDic[jAddr:jAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
                    aTails.add(sTail + self.dCharVal[nVal])
                if n:
                    aTails.update(self._getTails(jAddr, sTail+self.dCharVal[nVal], n-1))
        return aTails

    def _suggestWithCrushedUselessChars (self, sWord, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
        aSugg = set()
        if not sWord:
            if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
                show(nDeep, "!!! " + sNewWord + " !!!")
                aSugg.add(sNewWord)