Overview
Comment: | [core][bug] ibdawg: avoid storing several times the same suggestion |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | core | spellsugg |
Files: | files | file ages | folders |
SHA3-256: |
64ccfa7e38801aa95ad1ce8437396c96 |
User & Date: | olr on 2017-11-07 18:25:09 |
Other Links: | branch diff | manifest | tags |
Context
2017-11-07
| ||
19:28 | [core] sort first range of suggestions + code clarification check-in: d22466bd67 user: olr tags: core, spellsugg | |
18:25 | [core][bug] ibdawg: avoid storing several times the same suggestion check-in: 64ccfa7e38 user: olr tags: core, spellsugg | |
17:59 | [core] ibdawg: use SuggResult for the first suggestion method also check-in: 515e7f3768 user: olr tags: core, spellsugg | |
Changes
Modified gc_core/py/ibdawg.py from [8db71f38ab] to [2152c0fca3].
︙ | ︙ | |||
25 26 27 28 29 30 31 32 33 34 35 36 37 38 | fEnd = time.time() print(func.__name__, fEnd - fStart) return result return wrapper class SuggResult: def __init__ (self, sWord, nDistLimit=-1): self.sWord = sWord self.sCleanWord = cp.cleanWord(sWord) self.nDistLimit = nDistLimit if nDistLimit >= 0 else (len(sWord) // 3) + 1 self.nMinDist = 1000 self.aSugg = set() | > | > > | > > > > > > > | 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | fEnd = time.time() print(func.__name__, fEnd - fStart) return result return wrapper class SuggResult: """Structure for storing, classifying and filtering suggestions""" def __init__ (self, sWord, nDistLimit=-1): self.sWord = sWord self.sCleanWord = cp.cleanWord(sWord) self.nDistLimit = nDistLimit if nDistLimit >= 0 else (len(sWord) // 3) + 1 self.nMinDist = 1000 self.aSugg = set() self.dSugg = { 0: [], 1: [] } def addSugg (self, sSugg, nDeep=0): "add a suggestion" #print(sSugg) if sSugg not in self.aSugg: nDist = st.distanceDamerauLevenshtein(self.sCleanWord, cp.cleanWord(sSugg)) if nDist <= self.nDistLimit: if nDist not in self.dSugg: self.dSugg[nDist] = [] self.dSugg[nDist].append(sSugg) self.aSugg.add(sSugg) #logging.info((nDeep * " ") + "__" + sSugg + "__") if nDist < self.nMinDist: self.nMinDist = nDist self.nDistLimit = min(self.nDistLimit, self.nMinDist+2) def getSuggestions (self, nSuggLimit=10, nDistLimit=-1): "return a list of suggestions" lRes = [] #if self.dSugg[0]: # # we sort the better results with the original word # self.dSugg[0].sort(key=lambda sSugg: cp.distanceDamerauLevenshtein(self.sWord, sSugg)) for lSugg in self.dSugg.values(): lRes.extend(lSugg) if len(lRes) > nSuggLimit: break lRes = list(cp.filterSugg(lRes)) if self.sWord.istitle(): lRes = list(map(lambda sSugg: sSugg.title(), lRes)) return lRes[:nSuggLimit] def reset (self): self.aSugg.clear() self.dSugg.clear() class IBDAWG: """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH""" def __init__ (self, sDicName): self.by = pkgutil.get_data(__package__, "_dictionaries/" + sDicName) if not self.by: |
︙ | ︙ | |||
251 252 253 254 255 256 257 | aSugg = oSuggResult.getSuggestions() if sSfx or sPfx: # we add what we removed return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg)) return aSugg def _suggest (self, oSuggResult, sRemain, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", sAction="", bAvoidLoop=False): | < | 261 262 263 264 265 266 267 268 269 270 271 272 273 274 | aSugg = oSuggResult.getSuggestions() if sSfx or sPfx: # we add what we removed return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg)) return aSugg def _suggest (self, oSuggResult, sRemain, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", sAction="", bAvoidLoop=False): # recursive function #logging.info((nDeep * " ") + sNewWord + ":" + sRemain + " · " + sAction) if not sRemain: if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask: #logging.info((nDeep * " ") + "__" + sNewWord + "__") oSuggResult.addSugg(sNewWord) for sTail in self._getTails(iAddr): |
︙ | ︙ |