11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
import re
from functools import wraps
import time
import json
import binascii
import importlib
from collections import OrderedDict
#import logging
#logging.basicConfig(filename="suggestions.log", level=logging.DEBUG)
from . import str_transform as st
from . import char_player as cp
from .echo import echo
|
>
|
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
import re
from functools import wraps
import time
import json
import binascii
import importlib
from collections import OrderedDict
from math import floor
#import logging
#logging.basicConfig(filename="suggestions.log", level=logging.DEBUG)
from . import str_transform as st
from . import char_player as cp
from .echo import echo
|
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
|
return result
return wrapper
class SuggResult:
"""Structure for storing, classifying and filtering suggestions"""
def __init__ (self, sWord, nDistLimit=-1):
self.sWord = sWord
self.sSimplifiedWord = st.simplifyWord(sWord)
self.nDistLimit = nDistLimit if nDistLimit >= 0 else (len(sWord) // 3) + 1
self.nMinDist = 1000
self.aSugg = set()
self.dSugg = { 0: [], 1: [], 2: [] }
self.aAllSugg = set() # all found words even those refused
def addSugg (self, sSugg, nDeep=0):
"add a suggestion"
#logging.info((nDeep * " ") + "__" + sSugg + "__")
if sSugg in self.aAllSugg:
return
self.aAllSugg.add(sSugg)
if sSugg not in self.aSugg:
#nDist = min(st.distanceDamerauLevenshtein(self.sWord, sSugg), st.distanceDamerauLevenshtein(self.sSimplifiedWord, st.simplifyWord(sSugg)))
nDist = int(st.distanceDamerauLevenshtein(self.sSimplifiedWord, st.simplifyWord(sSugg)))
#logging.info((nDeep * " ") + "__" + sSugg + "__ :" + self.sSimplifiedWord +"|"+ st.simplifyWord(sSugg) +" -> "+ str(nDist))
if nDist <= self.nDistLimit:
if " " in sSugg:
nDist += 1
if nDist not in self.dSugg:
self.dSugg[nDist] = []
self.dSugg[nDist].append(sSugg)
self.aSugg.add(sSugg)
if nDist < self.nMinDist:
self.nMinDist = nDist
self.nDistLimit = min(self.nDistLimit, self.nMinDist+1)
def getSuggestions (self, nSuggLimit=10):
"return a list of suggestions"
# we sort the better results with the original word
lRes = []
bFirstListSorted = False
for nDist, lSugg in self.dSugg.items():
if nDist > self.nDistLimit:
break
if not bFirstListSorted and len(lSugg) > 1:
lSugg.sort(key=lambda sSugg: st.distanceDamerauLevenshtein(self.sWord, sSugg))
bFirstListSorted = True
#print(nDist, "|".join(lSugg))
#for sSugg in lSugg:
# print(sSugg, st.distanceDamerauLevenshtein(self.sWord, sSugg))
lRes.extend(lSugg)
if len(lRes) > nSuggLimit:
break
if self.sWord.isupper():
lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg.upper(), lRes))) # use dict, when Python 3.6+
elif self.sWord[0:1].isupper():
# dont’ use <.istitle>
lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes))) # use dict, when Python 3.6+
return lRes[:nSuggLimit]
def reset (self):
"clear data"
self.aSugg.clear()
self.dSugg.clear()
|
|
>
|
|
|
>
>
>
>
>
<
|
|
>
|
>
|
>
|
>
|
>
|
|
|
|
|
|
|
|
<
<
<
<
|
|
|
<
|
<
|
|
>
>
>
>
|
|
|
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
return result
return wrapper
class SuggResult:
"""Structure for storing, classifying and filtering suggestions"""
def __init__ (self, sWord, nSuggLimit=10, nDistLimit=-1):
self.sWord = sWord
self.sSimplifiedWord = st.simplifyWord(sWord)
self.nDistLimit = nDistLimit if nDistLimit >= 0 else (len(sWord) // 3) + 1
self.nMinDist = 1000
# Temporary sets
self.aAllSugg = set() # All suggestions, even the one rejected
self.dGoodSugg = {} # Acceptable suggestions
self.dBestSugg = {} # Best suggestions
# Parameters
self.nSuggLimit = nSuggLimit
self.nSuggLimitExt = nSuggLimit + 2 # we add few entries in case suggestions merge after casing modifications
self.nBestSuggLimit = floor(nSuggLimit * 1.5) # n times the requested limit
self.nGoodSuggLimit = nSuggLimit * 15 # n times the requested limit
def addSugg (self, sSugg, nDeep=0):
"add a suggestion"
#logging.info((nDeep * " ") + "__" + sSugg + "__")
if sSugg in self.aAllSugg:
return
self.aAllSugg.add(sSugg)
nDistJaro = 1 - st.distanceJaroWinkler(self.sSimplifiedWord, st.simplifyWord(sSugg))
nDist = floor(nDistJaro * 10)
if nDistJaro < .11: # Best suggestions
self.dBestSugg[sSugg] = round(nDistJaro*1000)
if len(self.dBestSugg) > self.nBestSuggLimit:
self.nDistLimit = -1 # make suggest() to end search
elif nDistJaro < .33: # Good suggestions
self.dGoodSugg[sSugg] = round(nDistJaro*1000)
if len(self.dGoodSugg) > self.nGoodSuggLimit:
self.nDistLimit = -1 # make suggest() to end search
else:
if nDist < self.nMinDist:
self.nMinDist = nDist
self.nDistLimit = min(self.nDistLimit, self.nMinDist)
if nDist <= self.nDistLimit:
if nDist < self.nMinDist:
self.nMinDist = nDist
self.nDistLimit = min(self.nDistLimit, self.nMinDist+1)
def getSuggestions (self):
"return a list of suggestions"
# we sort the better results with the original word
lRes = []
if len(self.dBestSugg) > 0:
# sort only with simplified words
lResTmp = sorted(self.dBestSugg.items(), key=lambda x: x[1])
for i in range(min(self.nSuggLimitExt, len(lResTmp))):
lRes.append(lResTmp[i][0])
if len(lRes) < self.nSuggLimitExt:
# sort with simplified words and original word
lResTmp = sorted(self.dGoodSugg.items(), key=lambda x: ((1-st.distanceJaroWinkler(self.sWord, x[0]))*10, x[1]))
for i in range(min(self.nSuggLimitExt, len(lResTmp))):
lRes.append(lResTmp[i][0])
# casing
if self.sWord.isupper():
lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg.upper(), lRes))) # use dict, when Python 3.6+
elif self.sWord[0:1].isupper():
# dont’ use <.istitle>
lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes))) # use dict, when Python 3.6+
return lRes[:self.nSuggLimit]
def reset (self):
"clear data"
self.aSugg.clear()
self.dSugg.clear()
|
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
|
sSfx = ""
if self.lexicographer:
sPfx, sWord, sSfx = self.lexicographer.split(sWord)
nMaxSwitch = max(len(sWord) // 3, 1)
nMaxDel = len(sWord) // 5
nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
nMaxJump = max(len(sWord) // 4, 1)
oSuggResult = SuggResult(sWord)
if bSplitTrailingNumbers:
self._splitTrailingNumbers(oSuggResult, sWord)
self._splitSuggest(oSuggResult, sWord)
self._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump)
aSugg = oSuggResult.getSuggestions(nSuggLimit)
if self.lexicographer:
aSugg = self.lexicographer.filterSugg(aSugg)
if sSfx or sPfx:
# we add what we removed
return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
return aSugg
|
|
>
|
|
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
|
sSfx = ""
if self.lexicographer:
sPfx, sWord, sSfx = self.lexicographer.split(sWord)
nMaxSwitch = max(len(sWord) // 3, 1)
nMaxDel = len(sWord) // 5
nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
nMaxJump = max(len(sWord) // 4, 1)
oSuggResult = SuggResult(sWord, nSuggLimit)
sWord = st.cleanWord(sWord)
if bSplitTrailingNumbers:
self._splitTrailingNumbers(oSuggResult, sWord)
self._splitSuggest(oSuggResult, sWord)
self._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump)
aSugg = oSuggResult.getSuggestions()
if self.lexicographer:
aSugg = self.lexicographer.filterSugg(aSugg)
if sSfx or sPfx:
# we add what we removed
return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
return aSugg
|