9
10
11
12
13
14
15
16
17
18
19
20
21
22
|
import traceback
import pkgutil
import re
from functools import wraps
import time
import json
import binascii
from collections import OrderedDict
#import logging
#logging.basicConfig(filename="suggestions.log", level=logging.DEBUG)
from . import str_transform as st
from . import char_player as cp
|
>
|
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
|
import traceback
import pkgutil
import re
from functools import wraps
import time
import json
import binascii
import importlib
from collections import OrderedDict
#import logging
#logging.basicConfig(filename="suggestions.log", level=logging.DEBUG)
from . import str_transform as st
from . import char_player as cp
|
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
|
class SuggResult:
"""Structure for storing, classifying and filtering suggestions"""
def __init__ (self, sWord, nDistLimit=-1):
self.sWord = sWord
self.sSimplifiedWord = cp.simplifyWord(sWord)
self.nDistLimit = nDistLimit if nDistLimit >= 0 else (len(sWord) // 3) + 1
self.nMinDist = 1000
self.aSugg = set()
self.dSugg = { 0: [], 1: [], 2: [] }
self.aAllSugg = set() # all found words even those refused
def addSugg (self, sSugg, nDeep=0):
"add a suggestion"
#logging.info((nDeep * " ") + "__" + sSugg + "__")
if sSugg in self.aAllSugg:
return
self.aAllSugg.add(sSugg)
if sSugg not in self.aSugg:
#nDist = min(st.distanceDamerauLevenshtein(self.sWord, sSugg), st.distanceDamerauLevenshtein(self.sSimplifiedWord, cp.simplifyWord(sSugg)))
nDist = st.distanceDamerauLevenshtein(self.sSimplifiedWord, cp.simplifyWord(sSugg))
#logging.info((nDeep * " ") + "__" + sSugg + "__ :" + self.sSimplifiedWord +"|"+ cp.simplifyWord(sSugg) +" -> "+ str(nDist))
if nDist <= self.nDistLimit:
if " " in sSugg:
nDist += 1
if nDist not in self.dSugg:
self.dSugg[nDist] = []
self.dSugg[nDist].append(sSugg)
self.aSugg.add(sSugg)
|
|
|
|
|
|
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
|
class SuggResult:
"""Structure for storing, classifying and filtering suggestions"""
def __init__ (self, sWord, nDistLimit=-1):
self.sWord = sWord
self.sSimplifiedWord = st.simplifyWord(sWord)
self.nDistLimit = nDistLimit if nDistLimit >= 0 else (len(sWord) // 3) + 1
self.nMinDist = 1000
self.aSugg = set()
self.dSugg = { 0: [], 1: [], 2: [] }
self.aAllSugg = set() # all found words even those refused
def addSugg (self, sSugg, nDeep=0):
"add a suggestion"
#logging.info((nDeep * " ") + "__" + sSugg + "__")
if sSugg in self.aAllSugg:
return
self.aAllSugg.add(sSugg)
if sSugg not in self.aSugg:
#nDist = min(st.distanceDamerauLevenshtein(self.sWord, sSugg), st.distanceDamerauLevenshtein(self.sSimplifiedWord, st.simplifyWord(sSugg)))
nDist = st.distanceDamerauLevenshtein(self.sSimplifiedWord, st.simplifyWord(sSugg))
#logging.info((nDeep * " ") + "__" + sSugg + "__ :" + self.sSimplifiedWord +"|"+ st.simplifyWord(sSugg) +" -> "+ str(nDist))
if nDist <= self.nDistLimit:
if " " in sSugg:
nDist += 1
if nDist not in self.dSugg:
self.dSugg[nDist] = []
self.dSugg[nDist].append(sSugg)
self.aSugg.add(sSugg)
|
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
|
self.dSugg[1].sort(key=lambda sSugg: st.distanceDamerauLevenshtein(self.sWord, sSugg))
lRes = self.dSugg.pop(0)
for nDist, lSugg in self.dSugg.items():
if nDist <= self.nDistLimit:
lRes.extend(lSugg)
if len(lRes) > nSuggLimit:
break
lRes = list(cp.filterSugg(lRes))
if self.sWord.isupper():
lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg.upper(), lRes))) # use dict, when Python 3.6+
elif self.sWord[0:1].isupper():
# dont’ use <.istitle>
lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes))) # use dict, when Python 3.6+
return lRes[:nSuggLimit]
|
<
|
79
80
81
82
83
84
85
86
87
88
89
90
91
92
|
self.dSugg[1].sort(key=lambda sSugg: st.distanceDamerauLevenshtein(self.sWord, sSugg))
lRes = self.dSugg.pop(0)
for nDist, lSugg in self.dSugg.items():
if nDist <= self.nDistLimit:
lRes.extend(lSugg)
if len(lRes) > nSuggLimit:
break
if self.sWord.isupper():
lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg.upper(), lRes))) # use dict, when Python 3.6+
elif self.sWord[0:1].isupper():
# dont’ use <.istitle>
lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes))) # use dict, when Python 3.6+
return lRes[:nSuggLimit]
|
149
150
151
152
153
154
155
156
157
158
159
160
161
162
|
self._getArcs = self._getArcs3
self._writeNodes = self._writeNodes3
else:
raise ValueError(" # Error: unknown code: {}".format(self.nCompressionMethod))
self.bAcronymValid = False
self.bNumAtLastValid = False
def _initBinary (self):
"initialize with binary structure file"
if self.by[0:17] != b"/grammalecte-fsa/":
raise TypeError("# Error. Not a grammalecte-fsa binary dictionary. Header: {}".format(self.by[0:9]))
if not(self.by[17:18] == b"1" or self.by[17:18] == b"2" or self.by[17:18] == b"3"):
raise ValueError("# Error. Unknown dictionary version: {}".format(self.by[17:18]))
|
>
>
>
>
>
>
>
>
|
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
|
self._getArcs = self._getArcs3
self._writeNodes = self._writeNodes3
else:
raise ValueError(" # Error: unknown code: {}".format(self.nCompressionMethod))
self.bAcronymValid = False
self.bNumAtLastValid = False
# lexicographer module ?
self.lexicographer = None
try:
self.lexicographer = importlib.import_module("graphspell.lexgraph_"+self.sLangCode)
except ImportError:
print("# No module <graphspell.lexgraph_"+self.sLangCode+".py>")
def _initBinary (self):
"initialize with binary structure file"
if self.by[0:17] != b"/grammalecte-fsa/":
raise TypeError("# Error. Not a grammalecte-fsa binary dictionary. Header: {}".format(self.by[0:9]))
if not(self.by[17:18] == b"1" or self.by[17:18] == b"2" or self.by[17:18] == b"3"):
raise ValueError("# Error. Unknown dictionary version: {}".format(self.by[17:18]))
|
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
|
return l
#@timethis
def suggest (self, sWord, nSuggLimit=10, bSplitTrailingNumbers=False):
"returns a set of suggestions for <sWord>"
sWord = sWord.rstrip(".") # useful for LibreOffice
sWord = st.spellingNormalization(sWord)
sPfx, sWord, sSfx = cp.cut(sWord)
nMaxSwitch = max(len(sWord) // 3, 1)
nMaxDel = len(sWord) // 5
nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
nMaxJump = max(len(sWord) // 4, 1)
oSuggResult = SuggResult(sWord)
if bSplitTrailingNumbers:
self._splitTrailingNumbers(oSuggResult, sWord)
self._splitSuggest(oSuggResult, sWord)
self._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump)
aSugg = oSuggResult.getSuggestions(nSuggLimit)
if sSfx or sPfx:
# we add what we removed
return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
return aSugg
def _splitTrailingNumbers (self, oSuggResult, sWord):
m = re.match(r"(\D+)([0-9]+)$", sWord)
if m and m.group(1)[-1:].isalpha():
oSuggResult.addSugg(m.group(1) + " " + cp.numbersToExponent(m.group(2)))
def _splitSuggest (self, oSuggResult, sWord):
# split at apostrophes
for cSplitter in "'’":
if cSplitter in sWord:
sWord1, sWord2 = sWord.split(cSplitter, 1)
if self.isValid(sWord1) and self.isValid(sWord2):
|
>
>
>
|
>
>
|
|
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
|
return l
#@timethis
def suggest (self, sWord, nSuggLimit=10, bSplitTrailingNumbers=False):
"returns a set of suggestions for <sWord>"
sWord = sWord.rstrip(".") # useful for LibreOffice
sWord = st.spellingNormalization(sWord)
sPfx = ""
sSfx = ""
if self.lexicographer:
sPfx, sWord, sSfx = self.lexicographer.split(sWord)
nMaxSwitch = max(len(sWord) // 3, 1)
nMaxDel = len(sWord) // 5
nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
nMaxJump = max(len(sWord) // 4, 1)
oSuggResult = SuggResult(sWord)
if bSplitTrailingNumbers:
self._splitTrailingNumbers(oSuggResult, sWord)
self._splitSuggest(oSuggResult, sWord)
self._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump)
aSugg = oSuggResult.getSuggestions(nSuggLimit)
if self.lexicographer:
aSugg = self.lexicographer.filterSugg(aSugg)
if sSfx or sPfx:
# we add what we removed
return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
return aSugg
def _splitTrailingNumbers (self, oSuggResult, sWord):
m = re.match(r"(\D+)([0-9]+)$", sWord)
if m and m.group(1)[-1:].isalpha():
oSuggResult.addSugg(m.group(1) + " " + st.numbersToExponent(m.group(2)))
def _splitSuggest (self, oSuggResult, sWord):
# split at apostrophes
for cSplitter in "'’":
if cSplitter in sWord:
sWord1, sWord2 = sWord.split(cSplitter, 1)
if self.isValid(sWord1) and self.isValid(sWord2):
|