327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
|
if nDeep >= oSuggResult.nDistLimit:
sCleanNewWord = cp.cleanWord(sNewWord)
if st.distanceSift4(oSuggResult.sCleanWord[:len(sCleanNewWord)], sCleanNewWord) > oSuggResult.nDistLimit:
return
if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
#logging.info((nDeep * " ") + "__" + sNewWord + "__")
oSuggResult.addSugg(sNewWord, nDeep)
for cChar, jAddr in self._getCharArcs(iAddr):
self._suggest2(oSuggResult, nDeep+1, jAddr, sNewWord+cChar)
return
def _getCharArcs (self, iAddr):
"generator: yield all chars and addresses from node at address <iAddr>"
for nVal, jAddr in self._getArcs(iAddr):
if nVal < self.nChar:
yield (self.dCharVal[nVal], jAddr)
def _getSimilarCharArcs (self, cChar, iAddr):
"generator: yield similar char of <cChar> and address of the following node"
for c in cp.d1to1.get(cChar, [cChar]):
if c in self.dChar:
jAddr = self._lookupArcNode(self.dChar[c], iAddr)
if jAddr:
yield (c, jAddr)
def _getTails (self, iAddr, sTail="", n=2):
"return a list of suffixes ending at a distance of <n> from <iAddr>"
aTails = set()
for nVal, jAddr in self._getArcs(iAddr):
if nVal < self.nChar:
if int.from_bytes(self.byDic[jAddr:jAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
|
|
>
>
>
>
>
>
>
|
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
|
if nDeep >= oSuggResult.nDistLimit:
sCleanNewWord = cp.cleanWord(sNewWord)
if st.distanceSift4(oSuggResult.sCleanWord[:len(sCleanNewWord)], sCleanNewWord) > oSuggResult.nDistLimit:
return
if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
#logging.info((nDeep * " ") + "__" + sNewWord + "__")
oSuggResult.addSugg(sNewWord, nDeep)
for cChar, jAddr in self._getCharArcsWithPriority(iAddr, oSuggResult.sWord[nDeep:nDeep+1]):
self._suggest2(oSuggResult, nDeep+1, jAddr, sNewWord+cChar)
return
def _getCharArcs (self, iAddr):
"generator: yield all chars and addresses from node at address <iAddr>"
for nVal, jAddr in self._getArcs(iAddr):
if nVal < self.nChar:
yield (self.dCharVal[nVal], jAddr)
def _getSimilarCharArcs (self, cChar, iAddr):
"generator: yield similar char of <cChar> and address of the following node"
for c in cp.d1to1.get(cChar, [cChar]):
if c in self.dChar:
jAddr = self._lookupArcNode(self.dChar[c], iAddr)
if jAddr:
yield (c, jAddr)
def _getCharArcsWithPriority (self, iAddr, cChar):
if not cChar:
yield from self._getCharArcs(iAddr)
lTuple = list(self._getCharArcs(iAddr))
lTuple.sort(key=lambda t: 0 if t[0] in cp.d1to1.get(cChar, "") else 1)
yield from lTuple
def _getTails (self, iAddr, sTail="", n=2):
"return a list of suffixes ending at a distance of <n> from <iAddr>"
aTails = set()
for nVal, jAddr in self._getArcs(iAddr):
if nVal < self.nChar:
if int.from_bytes(self.byDic[jAddr:jAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
|