Overview
| Comment: | [core] sort first range of suggestions + code clarification |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | core | spellsugg |
| Files: | files | file ages | folders |
| SHA3-256: |
d22466bd67f2df09b8e8d933ea100cc4 |
| User & Date: | olr on 2017-11-07 19:28:50 |
| Other Links: | branch diff | manifest | tags |
Context
|
2017-11-07
| ||
| 19:56 | [core] ibdawg: suggest2 > char priority check-in: 8ea89d19b5 user: olr tags: core, spellsugg | |
| 19:28 | [core] sort first range of suggestions + code clarification check-in: d22466bd67 user: olr tags: core, spellsugg | |
| 18:25 | [core][bug] ibdawg: avoid storing several times the same suggestion check-in: 64ccfa7e38 user: olr tags: core, spellsugg | |
Changes
Modified gc_core/py/ibdawg.py from [2152c0fca3] to [69a64ae917].
| ︙ | ︙ | |||
37 38 39 40 41 42 43 |
self.nDistLimit = nDistLimit if nDistLimit >= 0 else (len(sWord) // 3) + 1
self.nMinDist = 1000
self.aSugg = set()
self.dSugg = { 0: [], 1: [] }
def addSugg (self, sSugg, nDeep=0):
"add a suggestion"
| | < | | | > > | 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
self.nDistLimit = nDistLimit if nDistLimit >= 0 else (len(sWord) // 3) + 1
self.nMinDist = 1000
self.aSugg = set()
self.dSugg = { 0: [], 1: [] }
def addSugg (self, sSugg, nDeep=0):
"add a suggestion"
#logging.info((nDeep * " ") + "__" + sSugg + "__")
if sSugg not in self.aSugg:
nDist = st.distanceDamerauLevenshtein(self.sCleanWord, cp.cleanWord(sSugg))
if nDist <= self.nDistLimit:
if nDist not in self.dSugg:
self.dSugg[nDist] = []
self.dSugg[nDist].append(sSugg)
self.aSugg.add(sSugg)
if nDist < self.nMinDist:
self.nMinDist = nDist
self.nDistLimit = min(self.nDistLimit, self.nMinDist+2)
def getSuggestions (self, nSuggLimit=10, nDistLimit=-1):
"return a list of suggestions"
lRes = []
if self.dSugg[0]:
# we sort the better results with the original word
self.dSugg[0].sort(key=lambda sSugg: st.distanceDamerauLevenshtein(self.sWord, sSugg))
for lSugg in self.dSugg.values():
lRes.extend(lSugg)
if len(lRes) > nSuggLimit:
break
lRes = list(cp.filterSugg(lRes))
if self.sWord.istitle():
lRes = list(map(lambda sSugg: sSugg.title(), lRes))
elif self.sWord.isupper():
lRes = list(map(lambda sSugg: sSugg.upper(), lRes))
return lRes[:nSuggLimit]
def reset (self):
self.aSugg.clear()
self.dSugg.clear()
|
| ︙ | ︙ | |||
265 266 267 268 269 270 271 |
return aSugg
def _suggest (self, oSuggResult, sRemain, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", sAction="", bAvoidLoop=False):
# recursive function
#logging.info((nDeep * " ") + sNewWord + ":" + sRemain + " · " + sAction)
if not sRemain:
if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
| < < | | 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 |
return aSugg
def _suggest (self, oSuggResult, sRemain, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", sAction="", bAvoidLoop=False):
# recursive function
#logging.info((nDeep * " ") + sNewWord + ":" + sRemain + " · " + sAction)
if not sRemain:
if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
oSuggResult.addSugg(sNewWord)
for sTail in self._getTails(iAddr):
oSuggResult.addSugg(sNewWord+sTail)
return
cCurrent = sRemain[0:1]
for cChar, jAddr in self._getSimilarCharArcs(cCurrent, iAddr):
self._suggest(oSuggResult, sRemain[1:], nMaxDel, nMaxHardRepl, nDeep+1, jAddr, sNewWord+cChar, "*")
if not bAvoidLoop: # avoid infinite loop
if cCurrent == sRemain[1:2]:
# same char, we remove 1 char without adding 1 to <sNewWord>
self._suggest(oSuggResult, sRemain[1:], nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, cCurrent+"/2")
else:
# switching chars
|
| ︙ | ︙ | |||
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 |
aSugg = oSuggResult.getSuggestions()
if sSfx or sPfx:
# we add what we removed
return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
return aSugg
def _suggest2 (self, oSuggResult, nDeep=0, iAddr=0, sNewWord=""):
#logging.info((nDeep * " ") + sNewWord)
if nDeep >= oSuggResult.nDistLimit:
sCleanNewWord = cp.cleanWord(sNewWord)
if st.distanceSift4(oSuggResult.sCleanWord[:len(sCleanNewWord)], sCleanNewWord) > oSuggResult.nDistLimit:
return
if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
#logging.info((nDeep * " ") + "__" + sNewWord + "__")
oSuggResult.addSugg(sNewWord, nDeep)
for cChar, jAddr in self._getCharArcs(iAddr):
self._suggest2(oSuggResult, nDeep+1, jAddr, sNewWord+cChar)
return
def _getCharArcs (self, iAddr):
"generator: yield all chars and addresses from node at address <iAddr>"
for nVal, jAddr in self._getArcs(iAddr):
if nVal < self.nChar:
yield (self.dCharVal[nVal], jAddr)
| > | | | | < | | | | | | 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 |
aSugg = oSuggResult.getSuggestions()
if sSfx or sPfx:
# we add what we removed
return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
return aSugg
def _suggest2 (self, oSuggResult, nDeep=0, iAddr=0, sNewWord=""):
# recursive function
#logging.info((nDeep * " ") + sNewWord)
if nDeep >= oSuggResult.nDistLimit:
sCleanNewWord = cp.cleanWord(sNewWord)
if st.distanceSift4(oSuggResult.sCleanWord[:len(sCleanNewWord)], sCleanNewWord) > oSuggResult.nDistLimit:
return
if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
#logging.info((nDeep * " ") + "__" + sNewWord + "__")
oSuggResult.addSugg(sNewWord, nDeep)
for cChar, jAddr in self._getCharArcs(iAddr):
self._suggest2(oSuggResult, nDeep+1, jAddr, sNewWord+cChar)
return
def _getCharArcs (self, iAddr):
"generator: yield all chars and addresses from node at address <iAddr>"
for nVal, jAddr in self._getArcs(iAddr):
if nVal < self.nChar:
yield (self.dCharVal[nVal], jAddr)
def _getSimilarCharArcs (self, cChar, iAddr):
"generator: yield similar char of <cChar> and address of the following node"
for c in cp.d1to1.get(cChar, [cChar]):
if c in self.dChar:
jAddr = self._lookupArcNode(self.dChar[c], iAddr)
if jAddr:
yield (c, jAddr)
def _getTails (self, iAddr, sTail="", n=2):
"return a list of suffixes ending at a distance of <n> from <iAddr>"
aTails = set()
for nVal, jAddr in self._getArcs(iAddr):
if nVal < self.nChar:
if int.from_bytes(self.byDic[jAddr:jAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
aTails.add(sTail + self.dCharVal[nVal])
if n and not aTails:
aTails.update(self._getTails(jAddr, sTail+self.dCharVal[nVal], n-1))
return aTails
def drawPath (self, sWord, iAddr=0):
"show the path taken by <sWord> in the graph"
c1 = sWord[0:1] if sWord else " "
iPos = -1
n = 0
print(c1 + ": ", end="")
for c2, jAddr in self._getCharArcs(iAddr):
print(c2, end="")
if c2 == sWord[0:1]:
iNextNodeAddr = jAddr
iPos = n
n += 1
if not sWord:
return
if iPos >= 0:
print("\n "+ " " * iPos + "|")
self.drawPath(sWord[1:], iNextNodeAddr)
def select (self, sPattern=""):
|
| ︙ | ︙ |