Overview
| Comment: | [core][bug] ibdawg: avoid storing several times the same suggestion |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | core | spellsugg |
| Files: | files | file ages | folders |
| SHA3-256: |
64ccfa7e38801aa95ad1ce8437396c96 |
| User & Date: | olr on 2017-11-07 18:25:09 |
| Other Links: | branch diff | manifest | tags |
Context
|
2017-11-07
| ||
| 19:28 | [core] sort first range of suggestions + code clarification check-in: d22466bd67 user: olr tags: core, spellsugg | |
| 18:25 | [core][bug] ibdawg: avoid storing several times the same suggestion check-in: 64ccfa7e38 user: olr tags: core, spellsugg | |
| 17:59 | [core] ibdawg: use SuggResult for the first suggestion method also check-in: 515e7f3768 user: olr tags: core, spellsugg | |
Changes
Modified gc_core/py/ibdawg.py from [8db71f38ab] to [2152c0fca3].
| ︙ | ︙ | |||
25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
fEnd = time.time()
print(func.__name__, fEnd - fStart)
return result
return wrapper
class SuggResult:
def __init__ (self, sWord, nDistLimit=-1):
self.sWord = sWord
self.sCleanWord = cp.cleanWord(sWord)
self.nDistLimit = nDistLimit if nDistLimit >= 0 else (len(sWord) // 3) + 1
self.nMinDist = 1000
self.aSugg = set()
| > | > > | > > > > > > > | 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
fEnd = time.time()
print(func.__name__, fEnd - fStart)
return result
return wrapper
class SuggResult:
"""Structure for storing, classifying and filtering suggestions"""
def __init__ (self, sWord, nDistLimit=-1):
self.sWord = sWord
self.sCleanWord = cp.cleanWord(sWord)
self.nDistLimit = nDistLimit if nDistLimit >= 0 else (len(sWord) // 3) + 1
self.nMinDist = 1000
self.aSugg = set()
self.dSugg = { 0: [], 1: [] }
def addSugg (self, sSugg, nDeep=0):
"add a suggestion"
#print(sSugg)
if sSugg not in self.aSugg:
nDist = st.distanceDamerauLevenshtein(self.sCleanWord, cp.cleanWord(sSugg))
if nDist <= self.nDistLimit:
if nDist not in self.dSugg:
self.dSugg[nDist] = []
self.dSugg[nDist].append(sSugg)
self.aSugg.add(sSugg)
#logging.info((nDeep * " ") + "__" + sSugg + "__")
if nDist < self.nMinDist:
self.nMinDist = nDist
self.nDistLimit = min(self.nDistLimit, self.nMinDist+2)
def getSuggestions (self, nSuggLimit=10, nDistLimit=-1):
"return a list of suggestions"
lRes = []
#if self.dSugg[0]:
# # we sort the better results with the original word
# self.dSugg[0].sort(key=lambda sSugg: cp.distanceDamerauLevenshtein(self.sWord, sSugg))
for lSugg in self.dSugg.values():
lRes.extend(lSugg)
if len(lRes) > nSuggLimit:
break
lRes = list(cp.filterSugg(lRes))
if self.sWord.istitle():
lRes = list(map(lambda sSugg: sSugg.title(), lRes))
return lRes[:nSuggLimit]
def reset (self):
self.aSugg.clear()
self.dSugg.clear()
class IBDAWG:
"""INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""
def __init__ (self, sDicName):
self.by = pkgutil.get_data(__package__, "_dictionaries/" + sDicName)
if not self.by:
|
| ︙ | ︙ | |||
251 252 253 254 255 256 257 |
aSugg = oSuggResult.getSuggestions()
if sSfx or sPfx:
# we add what we removed
return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
return aSugg
def _suggest (self, oSuggResult, sRemain, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", sAction="", bAvoidLoop=False):
| < | 261 262 263 264 265 266 267 268 269 270 271 272 273 274 |
aSugg = oSuggResult.getSuggestions()
if sSfx or sPfx:
# we add what we removed
return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
return aSugg
def _suggest (self, oSuggResult, sRemain, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", sAction="", bAvoidLoop=False):
# recursive function
#logging.info((nDeep * " ") + sNewWord + ":" + sRemain + " · " + sAction)
if not sRemain:
if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
#logging.info((nDeep * " ") + "__" + sNewWord + "__")
oSuggResult.addSugg(sNewWord)
for sTail in self._getTails(iAddr):
|
| ︙ | ︙ |