25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
|
fEnd = time.time()
print(func.__name__, fEnd - fStart)
return result
return wrapper
class SuggResult:
def __init__ (self, sWord, nDistLimit=-1):
self.sWord = sWord
self.sCleanWord = cp.cleanWord(sWord)
self.nDistLimit = nDistLimit if nDistLimit >= 0 else (len(sWord) // 3) + 1
self.nMinDist = 1000
self.aSugg = set()
self.dSugg = { 0: [], 1: [], 2: [] }
def addSugg (self, sSugg, nDeep=0):
"add a suggestion"
if sSugg not in self.aSugg:
nDist = st.distanceDamerauLevenshtein(self.sCleanWord, cp.cleanWord(sSugg))
if nDist <= self.nDistLimit:
if nDist not in self.dSugg:
self.dSugg[nDist] = []
self.dSugg[nDist].append(sSugg)
logging.info((nDeep * " ") + "__" + sSugg + "__")
if nDist < self.nMinDist:
self.nMinDist = nDist
self.nDistLimit = min(self.nDistLimit, self.nMinDist+2)
def getSuggestions (self, nSuggLimit=10, nDistLimit=-1):
"return a list of suggestions"
lRes = []
for lSugg in self.dSugg.values():
lRes.extend(lSugg)
if len(lRes) > nSuggLimit:
break
lRes = list(cp.filterSugg(lRes))
if self.sWord.istitle():
lRes = list(map(lambda sSugg: sSugg.title(), lRes))
return lRes[:nSuggLimit]
class IBDAWG:
"""INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""
def __init__ (self, sDicName):
self.by = pkgutil.get_data(__package__, "_dictionaries/" + sDicName)
if not self.by:
|
>
|
>
>
|
>
>
>
>
>
>
>
|
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
|
fEnd = time.time()
print(func.__name__, fEnd - fStart)
return result
return wrapper
class SuggResult:
"""Structure for storing, classifying and filtering suggestions"""
def __init__ (self, sWord, nDistLimit=-1):
self.sWord = sWord
self.sCleanWord = cp.cleanWord(sWord)
self.nDistLimit = nDistLimit if nDistLimit >= 0 else (len(sWord) // 3) + 1
self.nMinDist = 1000
self.aSugg = set()
self.dSugg = { 0: [], 1: [] }
def addSugg (self, sSugg, nDeep=0):
"add a suggestion"
#print(sSugg)
if sSugg not in self.aSugg:
nDist = st.distanceDamerauLevenshtein(self.sCleanWord, cp.cleanWord(sSugg))
if nDist <= self.nDistLimit:
if nDist not in self.dSugg:
self.dSugg[nDist] = []
self.dSugg[nDist].append(sSugg)
self.aSugg.add(sSugg)
#logging.info((nDeep * " ") + "__" + sSugg + "__")
if nDist < self.nMinDist:
self.nMinDist = nDist
self.nDistLimit = min(self.nDistLimit, self.nMinDist+2)
def getSuggestions (self, nSuggLimit=10, nDistLimit=-1):
"return a list of suggestions"
lRes = []
#if self.dSugg[0]:
# # we sort the better results with the original word
# self.dSugg[0].sort(key=lambda sSugg: cp.distanceDamerauLevenshtein(self.sWord, sSugg))
for lSugg in self.dSugg.values():
lRes.extend(lSugg)
if len(lRes) > nSuggLimit:
break
lRes = list(cp.filterSugg(lRes))
if self.sWord.istitle():
lRes = list(map(lambda sSugg: sSugg.title(), lRes))
return lRes[:nSuggLimit]
def reset (self):
self.aSugg.clear()
self.dSugg.clear()
class IBDAWG:
"""INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""
def __init__ (self, sDicName):
self.by = pkgutil.get_data(__package__, "_dictionaries/" + sDicName)
if not self.by:
|
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
|
aSugg = oSuggResult.getSuggestions()
if sSfx or sPfx:
# we add what we removed
return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
return aSugg
def _suggest (self, oSuggResult, sRemain, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", sAction="", bAvoidLoop=False):
"returns a set of suggestions"
# recursive function
#logging.info((nDeep * " ") + sNewWord + ":" + sRemain + " · " + sAction)
if not sRemain:
if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
#logging.info((nDeep * " ") + "__" + sNewWord + "__")
oSuggResult.addSugg(sNewWord)
for sTail in self._getTails(iAddr):
|
<
|
261
262
263
264
265
266
267
268
269
270
271
272
273
274
|
aSugg = oSuggResult.getSuggestions()
if sSfx or sPfx:
# we add what we removed
return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
return aSugg
def _suggest (self, oSuggResult, sRemain, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", sAction="", bAvoidLoop=False):
# recursive function
#logging.info((nDeep * " ") + sNewWord + ":" + sRemain + " · " + sAction)
if not sRemain:
if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
#logging.info((nDeep * " ") + "__" + sNewWord + "__")
oSuggResult.addSugg(sNewWord)
for sTail in self._getTails(iAddr):
|