249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
|
if cSplitter in sWord:
sWord1, sWord2 = sWord.split(cSplitter, 1)
if self.isValid(sWord1) and self.isValid(sWord2):
oSuggResult.addSugg(sWord1+" "+sWord2)
def _suggest (self, oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nMaxJump=0, nDist=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
# recursive function
#logging.info((nDeep * " ") + sNewWord + ":" + sRemain)
if self.lByDic[iAddr] & self._finalNodeMask:
if not sRemain:
oSuggResult.addSugg(sNewWord, nDeep)
for sTail in self._getTails(iAddr):
oSuggResult.addSugg(sNewWord+sTail, nDeep)
return
if (len(sNewWord) + len(sRemain) == len(oSuggResult.sWord)) and oSuggResult.sWord.lower().startswith(sNewWord.lower()) and self.isValid(sRemain):
if self.sLangCode == "fr" and sNewWord.lower() in ("l", "d", "n", "m", "t", "s", "c", "j", "qu", "lorsqu", "puisqu", "quoiqu", "jusqu", "quelqu") and sRemain[0:1] in cp.aVowel:
oSuggResult.addSugg(sNewWord+"’"+sRemain, nDeep)
if (len(sNewWord) > 1 and len(sRemain) > 1) or sNewWord in "aày" or sRemain in "aày":
oSuggResult.addSugg(sNewWord+" "+sRemain, nDeep)
if nDist > oSuggResult.nDistLimit:
return
cCurrent = sRemain[0:1]
for cChar, jAddr in self._getCharArcs(iAddr):
if cChar in cp.d1to1.get(cCurrent, cCurrent):
self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDist, nDeep+1, jAddr, sNewWord+cChar)
elif not bAvoidLoop:
if nMaxHardRepl and self.isNgramsOK(cChar+sRemain[1:2]):
self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl-1, nMaxJump, nDist+1, nDeep+1, jAddr, sNewWord+cChar, True)
if nMaxJump:
self._suggest(oSuggResult, sRemain, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump-1, nDist+1, nDeep+1, jAddr, sNewWord+cChar, True) # True for avoiding loop?
|
|
|
|
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
|
if cSplitter in sWord:
sWord1, sWord2 = sWord.split(cSplitter, 1)
if self.isValid(sWord1) and self.isValid(sWord2):
oSuggResult.addSugg(sWord1+" "+sWord2)
def _suggest (self, oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nMaxJump=0, nDist=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
# recursive function
#logging.info((nDeep * " ") + f"{sNewWord}:{sRemain} nMaxSwitch:{nMaxSwitch} nMaxDel:{nMaxDel} nMaxHardRepl:{nMaxHardRepl} nMaxJump:{nMaxJump} | nDist:{nDist} / {oSuggResult.nDistLimit}")
if self.lByDic[iAddr] & self._finalNodeMask:
if not sRemain:
oSuggResult.addSugg(sNewWord, nDeep)
for sTail in self._getTails(iAddr):
oSuggResult.addSugg(sNewWord+sTail, nDeep)
return
if (len(sNewWord) + len(sRemain) == len(oSuggResult.sWord)) and oSuggResult.sWord.lower().startswith(sNewWord.lower()) and self.isValid(sRemain):
if self.sLangCode == "fr" and sNewWord.lower() in ("l", "d", "n", "m", "t", "s", "c", "j", "qu", "lorsqu", "puisqu", "quoiqu", "jusqu", "quelqu") and sRemain[0:1] in cp.aVowel:
oSuggResult.addSugg(sNewWord+"’"+sRemain, nDeep)
if (len(sNewWord) > 1 and len(sRemain) > 1) or sNewWord in "aày" or sRemain in "aày":
oSuggResult.addSugg(sNewWord+" "+sRemain, nDeep)
if nDist > oSuggResult.nDistLimit:
return
cCurrent = sRemain[0:1]
for cChar, jAddr in self._getCharArcs(iAddr, cCurrent):
if cChar in cp.d1to1.get(cCurrent, cCurrent):
self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDist, nDeep+1, jAddr, sNewWord+cChar)
elif not bAvoidLoop:
if nMaxHardRepl and self.isNgramsOK(cChar+sRemain[1:2]):
self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl-1, nMaxJump, nDist+1, nDeep+1, jAddr, sNewWord+cChar, True)
if nMaxJump:
self._suggest(oSuggResult, sRemain, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump-1, nDist+1, nDeep+1, jAddr, sNewWord+cChar, True) # True for avoiding loop?
|
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
|
"returns True if sChars in known 2grams"
if len(sChars) != 2:
return True
if not self.a2grams:
return True
return sChars in self.a2grams
def _getCharArcs (self, iAddr):
"generator: yield all chars and addresses from node at address <iAddr>"
for nVal, jAddr in self._getArcs(iAddr):
if nVal <= self.nChar:
yield (self.dCharVal[nVal], jAddr)
def _getTails (self, iAddr, sTail="", n=2):
"return a list of suffixes ending at a distance of <n> from <iAddr>"
aTails = set()
for nVal, jAddr in self._getArcs(iAddr):
if nVal <= self.nChar:
if self.lByDic[jAddr] & self._finalNodeMask:
|
|
>
>
|
>
>
>
>
|
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
|
"returns True if sChars in known 2grams"
if len(sChars) != 2:
return True
if not self.a2grams:
return True
return sChars in self.a2grams
def _getCharArcs (self, iAddr, cChar=""):
"generator: yield all chars and addresses from node at address <iAddr>"
lStack = []
for nVal, jAddr in self._getArcs(iAddr):
if nVal <= self.nChar:
if self.dCharVal[nVal] in cp.d1to1.get(cChar, cChar):
yield (self.dCharVal[nVal], jAddr)
else:
lStack.append((self.dCharVal[nVal], jAddr))
while lStack:
yield lStack.pop(0)
def _getTails (self, iAddr, sTail="", n=2):
"return a list of suffixes ending at a distance of <n> from <iAddr>"
aTails = set()
for nVal, jAddr in self._getArcs(iAddr):
if nVal <= self.nChar:
if self.lByDic[jAddr] & self._finalNodeMask:
|