28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
class SuggResult:
"""Structure for storing, classifying and filtering suggestions"""
def __init__ (self, sWord, nDistLimit=-1):
self.sWord = sWord
self.sCleanWord = cp.cleanWord(sWord)
self.nDistLimit = nDistLimit if nDistLimit >= 0 else (len(sWord) // 3) + 1
self.nMinDist = 1000
self.aSugg = set()
self.dSugg = { 0: [], 1: [] }
def addSugg (self, sSugg, nDeep=0):
"add a suggestion"
#logging.info((nDeep * " ") + "__" + sSugg + "__")
if sSugg not in self.aSugg:
nDist = st.distanceDamerauLevenshtein(self.sCleanWord, cp.cleanWord(sSugg))
if nDist <= self.nDistLimit:
if nDist not in self.dSugg:
self.dSugg[nDist] = []
self.dSugg[nDist].append(sSugg)
self.aSugg.add(sSugg)
if nDist < self.nMinDist:
self.nMinDist = nDist
|
|
|
|
|
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
class SuggResult:
"""Structure for storing, classifying and filtering suggestions"""
def __init__ (self, sWord, nDistLimit=-1):
self.sWord = sWord
self.sSimplifiedWord = cp.simplifyWord(sWord)
self.nDistLimit = nDistLimit if nDistLimit >= 0 else (len(sWord) // 3) + 1
self.nMinDist = 1000
self.aSugg = set()
self.dSugg = { 0: [], 1: [], 2: [] }
def addSugg (self, sSugg, nDeep=0):
"add a suggestion"
#logging.info((nDeep * " ") + "__" + sSugg + "__")
if sSugg not in self.aSugg:
nDist = st.distanceDamerauLevenshtein(self.sSimplifiedWord, cp.simplifyWord(sSugg))
if nDist <= self.nDistLimit:
if nDist not in self.dSugg:
self.dSugg[nDist] = []
self.dSugg[nDist].append(sSugg)
self.aSugg.add(sSugg)
if nDist < self.nMinDist:
self.nMinDist = nDist
|
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
|
if sWord[0:1].isupper():
l.extend(self.morph(sWord.lower()))
if sWord.isupper() and len(sWord) > 1:
l.extend(self.morph(sWord.capitalize()))
return l
#@timethis
def suggest (self, sWord, nMaxSugg=10):
"returns a set of suggestions for <sWord>"
sPfx, sWord, sSfx = cp.cut(sWord)
nMaxSwitch = max(len(sWord) // 3, 1)
nMaxDel = len(sWord) // 5
nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
oSuggResult = SuggResult(sWord)
self._suggest(oSuggResult, sWord, nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)
if sWord.istitle():
self._suggest(oSuggResult, sWord.lower(), nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)
elif sWord.islower():
self._suggest(oSuggResult, sWord.title(), nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)
aSugg = oSuggResult.getSuggestions()
if sSfx or sPfx:
# we add what we removed
return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
return aSugg
def _suggest (self, oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
# recursive function
|
|
|
|
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
|
if sWord[0:1].isupper():
l.extend(self.morph(sWord.lower()))
if sWord.isupper() and len(sWord) > 1:
l.extend(self.morph(sWord.capitalize()))
return l
#@timethis
def suggest (self, sWord, nSuggLimit=10):
"returns a set of suggestions for <sWord>"
sPfx, sWord, sSfx = cp.cut(sWord)
nMaxSwitch = max(len(sWord) // 3, 1)
nMaxDel = len(sWord) // 5
nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
oSuggResult = SuggResult(sWord)
self._suggest(oSuggResult, sWord, nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)
if sWord.istitle():
self._suggest(oSuggResult, sWord.lower(), nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)
elif sWord.islower():
self._suggest(oSuggResult, sWord.title(), nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)
aSugg = oSuggResult.getSuggestions(nSuggLimit)
if sSfx or sPfx:
# we add what we removed
return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
return aSugg
def _suggest (self, oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
# recursive function
|
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
|
return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
return aSugg
def _suggest2 (self, oSuggResult, nDeep=0, iAddr=0, sNewWord=""):
# recursive function
#logging.info((nDeep * " ") + sNewWord)
if nDeep >= oSuggResult.nDistLimit:
sCleanNewWord = cp.cleanWord(sNewWord)
if st.distanceSift4(oSuggResult.sCleanWord[:len(sCleanNewWord)], sCleanNewWord) > oSuggResult.nDistLimit:
return
if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
oSuggResult.addSugg(sNewWord, nDeep)
for cChar, jAddr in self._getCharArcsWithPriority(iAddr, oSuggResult.sWord[nDeep:nDeep+1]):
self._suggest2(oSuggResult, nDeep+1, jAddr, sNewWord+cChar)
return
|
|
|
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
|
return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
return aSugg
def _suggest2 (self, oSuggResult, nDeep=0, iAddr=0, sNewWord=""):
# recursive function
#logging.info((nDeep * " ") + sNewWord)
if nDeep >= oSuggResult.nDistLimit:
sCleanNewWord = cp.simplifyWord(sNewWord)
if st.distanceSift4(oSuggResult.sCleanWord[:len(sCleanNewWord)], sCleanNewWord) > oSuggResult.nDistLimit:
return
if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
oSuggResult.addSugg(sNewWord, nDeep)
for cChar, jAddr in self._getCharArcsWithPriority(iAddr, oSuggResult.sWord[nDeep:nDeep+1]):
self._suggest2(oSuggResult, nDeep+1, jAddr, sNewWord+cChar)
return
|