1
2
3
4
5
6
7
8
9
10
11
12
13
|
#!python3
"""
INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH
Implementation of a spellchecker as a transducer (storing transformation code to get lemma and morphologies)
and a spell suggestion mechanim
"""
import traceback
import pkgutil
import re
from functools import wraps
import time
|
|
|
1
2
3
4
5
6
7
8
9
10
11
12
13
|
#!python3
"""
INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH
Implementation of a spellchecker as a transducer (storing transformation code to get lemma and morphologies)
and a spell suggestion mechanism
"""
import traceback
import pkgutil
import re
from functools import wraps
import time
|
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
|
if self.lookup(sWord):
return True
if sWord[0:1].isupper():
if len(sWord) > 1:
if sWord.istitle():
return self.lookup(sWord.lower())
if sWord.isupper():
if self.bAcronymValid:
return True
return self.lookup(sWord.lower()) or self.lookup(sWord.capitalize())
return self.lookup(sWord[:1].lower() + sWord[1:])
return self.lookup(sWord.lower())
if sWord[0:1].isdigit():
return True
return False
def lookup (self, sWord):
|
<
<
|
|
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
|
if self.lookup(sWord):
return True
if sWord[0:1].isupper():
if len(sWord) > 1:
if sWord.istitle():
return self.lookup(sWord.lower())
if sWord.isupper():
return self.bAcronymValid or self.lookup(sWord.lower()) or self.lookup(sWord.capitalize())
return self.lookup(sWord[:1].lower() + sWord[1:])
return self.lookup(sWord.lower())
if sWord[0:1].isdigit():
return True
return False
def lookup (self, sWord):
|
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
|
oSuggResult.addSugg(sNewWord, nDeep)
for sTail in self._getTails(iAddr):
oSuggResult.addSugg(sNewWord+sTail, nDeep)
return
if (len(sNewWord) + len(sRemain) == len(oSuggResult.sWord)) and oSuggResult.sWord.lower().startswith(sNewWord.lower()) and self.isValid(sRemain):
if self.sLangCode == "fr" and sNewWord.lower() in ("l", "d", "n", "m", "t", "s", "c", "j", "qu", "lorsqu", "puisqu", "quoiqu", "jusqu", "quelqu") and sRemain[0:1] in cp.aVowel:
oSuggResult.addSugg(sNewWord+"’"+sRemain)
oSuggResult.addSugg(sNewWord+" "+sRemain)
if nDist > oSuggResult.nDistLimit:
return
cCurrent = sRemain[0:1]
for cChar, jAddr in self._getCharArcs(iAddr):
if cChar in cp.d1to1.get(cCurrent, cCurrent):
self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDist, nDeep+1, jAddr, sNewWord+cChar)
elif not bAvoidLoop:
|
>
|
|
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
|
oSuggResult.addSugg(sNewWord, nDeep)
for sTail in self._getTails(iAddr):
oSuggResult.addSugg(sNewWord+sTail, nDeep)
return
if (len(sNewWord) + len(sRemain) == len(oSuggResult.sWord)) and oSuggResult.sWord.lower().startswith(sNewWord.lower()) and self.isValid(sRemain):
if self.sLangCode == "fr" and sNewWord.lower() in ("l", "d", "n", "m", "t", "s", "c", "j", "qu", "lorsqu", "puisqu", "quoiqu", "jusqu", "quelqu") and sRemain[0:1] in cp.aVowel:
oSuggResult.addSugg(sNewWord+"’"+sRemain)
if (len(sNewWord) > 1 and len(sRemain) > 1) or sNewWord in ("a", "à", "y") or sRemain in ("a", "à", "y"):
oSuggResult.addSugg(sNewWord+" "+sRemain)
if nDist > oSuggResult.nDistLimit:
return
cCurrent = sRemain[0:1]
for cChar, jAddr in self._getCharArcs(iAddr):
if cChar in cp.d1to1.get(cCurrent, cCurrent):
self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDist, nDeep+1, jAddr, sNewWord+cChar)
elif not bAvoidLoop:
|