244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
|
"l2grams": list(self.a2grams)
}, ensure_ascii=False))
if bInJSModule:
hDst.write(";\n\nexports.dictionary = dictionary;\n")
def isValidToken (self, sToken):
"checks if <sToken> is valid (if there is hyphens in <sToken>, <sToken> is split, each part is checked)"
sToken = cp.spellingNormalization(sToken)
if self.isValid(sToken):
return True
if "-" in sToken:
if sToken.count("-") > 4:
return True
return all(self.isValid(sWord) for sWord in sToken.split("-"))
if "." in sToken or "·" in sToken:
|
|
|
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
|
"l2grams": list(self.a2grams)
}, ensure_ascii=False))
if bInJSModule:
hDst.write(";\n\nexports.dictionary = dictionary;\n")
def isValidToken (self, sToken):
"checks if <sToken> is valid (if there is hyphens in <sToken>, <sToken> is split, each part is checked)"
sToken = st.spellingNormalization(sToken)
if self.isValid(sToken):
return True
if "-" in sToken:
if sToken.count("-") > 4:
return True
return all(self.isValid(sWord) for sWord in sToken.split("-"))
if "." in sToken or "·" in sToken:
|
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
|
iAddr = self._lookupArcNode(self.dChar[c], iAddr)
if iAddr is None:
return False
return bool(int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask)
def getMorph (self, sWord):
"retrieves morphologies list, different casing allowed"
sWord = cp.spellingNormalization(sWord)
l = self.morph(sWord)
if sWord[0:1].isupper():
l.extend(self.morph(sWord.lower()))
if sWord.isupper() and len(sWord) > 1:
l.extend(self.morph(sWord.capitalize()))
return l
#@timethis
def suggest (self, sWord, nSuggLimit=10, bSplitTrailingNumbers=False):
"returns a set of suggestions for <sWord>"
sWord = sWord.rstrip(".") # useful for LibreOffice
sWord = cp.spellingNormalization(sWord)
sPfx, sWord, sSfx = cp.cut(sWord)
nMaxSwitch = max(len(sWord) // 3, 1)
nMaxDel = len(sWord) // 5
nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
nMaxJump = max(len(sWord) // 4, 1)
oSuggResult = SuggResult(sWord)
if bSplitTrailingNumbers:
|
|
|
|
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
|
iAddr = self._lookupArcNode(self.dChar[c], iAddr)
if iAddr is None:
return False
return bool(int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask)
def getMorph (self, sWord):
"retrieves morphologies list, different casing allowed"
sWord = st.spellingNormalization(sWord)
l = self.morph(sWord)
if sWord[0:1].isupper():
l.extend(self.morph(sWord.lower()))
if sWord.isupper() and len(sWord) > 1:
l.extend(self.morph(sWord.capitalize()))
return l
#@timethis
def suggest (self, sWord, nSuggLimit=10, bSplitTrailingNumbers=False):
"returns a set of suggestions for <sWord>"
sWord = sWord.rstrip(".") # useful for LibreOffice
sWord = st.spellingNormalization(sWord)
sPfx, sWord, sSfx = cp.cut(sWord)
nMaxSwitch = max(len(sWord) // 3, 1)
nMaxDel = len(sWord) // 5
nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
nMaxJump = max(len(sWord) // 4, 1)
oSuggResult = SuggResult(sWord)
if bSplitTrailingNumbers:
|
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
|
aTails.add(sTail + self.dCharVal[nVal])
if n and not aTails:
aTails.update(self._getTails(jAddr, sTail+self.dCharVal[nVal], n-1))
return aTails
def drawPath (self, sWord, iAddr=0):
"show the path taken by <sWord> in the graph"
sWord = cp.spellingNormalization(sWord)
c1 = sWord[0:1] if sWord else " "
iPos = -1
n = 0
echo(c1 + ": ", end="")
for c2, jAddr in self._getCharArcs(iAddr):
echo(c2, end="")
if c2 == sWord[0:1]:
|
|
|
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
|
aTails.add(sTail + self.dCharVal[nVal])
if n and not aTails:
aTails.update(self._getTails(jAddr, sTail+self.dCharVal[nVal], n-1))
return aTails
def drawPath (self, sWord, iAddr=0):
"show the path taken by <sWord> in the graph"
sWord = st.spellingNormalization(sWord)
c1 = sWord[0:1] if sWord else " "
iPos = -1
n = 0
echo(c1 + ": ", end="")
for c2, jAddr in self._getCharArcs(iAddr):
echo(c2, end="")
if c2 == sWord[0:1]:
|