164
165
166
167
168
169
170
171
172
173
174
175
176
177
|
self.byDic = byDic
self.a2grams = set(by2grams.decode("utf-8").split("\t"))
l = byInfo.decode("utf-8").split("//")
self.sLangCode = l.pop(0)
self.sLangName = l.pop(0)
self.sDicName = l.pop(0)
self.sDate = l.pop(0)
self.nChar = int(l.pop(0))
self.nBytesArc = int(l.pop(0))
self.nBytesNodeAddress = int(l.pop(0))
self.nEntry = int(l.pop(0))
self.nNode = int(l.pop(0))
self.nArc = int(l.pop(0))
|
>
|
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
|
self.byDic = byDic
self.a2grams = set(by2grams.decode("utf-8").split("\t"))
l = byInfo.decode("utf-8").split("//")
self.sLangCode = l.pop(0)
self.sLangName = l.pop(0)
self.sDicName = l.pop(0)
self.sDescription = l.pop(0)
self.sDate = l.pop(0)
self.nChar = int(l.pop(0))
self.nBytesArc = int(l.pop(0))
self.nBytesNodeAddress = int(l.pop(0))
self.nEntry = int(l.pop(0))
self.nNode = int(l.pop(0))
self.nArc = int(l.pop(0))
|
206
207
208
209
210
211
212
213
214
215
216
217
218
219
|
if bInJSModule:
hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ')
hDst.write(json.dumps({
"sHeader": "/grammalecte-fsa/",
"sLangCode": self.sLangCode,
"sLangName": self.sLangName,
"sDicName": self.sDicName,
"sFileName": self.sFileName,
"sDate": self.sDate,
"nEntry": self.nEntry,
"nChar": self.nChar,
"nAff": self.nAff,
"nTag": self.nTag,
"cStemming": self.cStemming,
|
>
|
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
|
if bInJSModule:
hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ')
hDst.write(json.dumps({
"sHeader": "/grammalecte-fsa/",
"sLangCode": self.sLangCode,
"sLangName": self.sLangName,
"sDicName": self.sDicName,
"sDescription": self.sDescription,
"sFileName": self.sFileName,
"sDate": self.sDate,
"nEntry": self.nEntry,
"nChar": self.nChar,
"nAff": self.nAff,
"nTag": self.nTag,
"cStemming": self.cStemming,
|
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
|
sWord1, sWord2 = sWord.split(cSplitter, 1)
if self.isValid(sWord1) and self.isValid(sWord2):
oSuggResult.addSugg(sWord1+" "+sWord2)
def _suggest (self, oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nMaxJump=0, nDist=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
# recursive function
#logging.info((nDeep * " ") + sNewWord + ":" + sRemain)
if not sRemain:
if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
oSuggResult.addSugg(sNewWord, nDeep)
for sTail in self._getTails(iAddr):
oSuggResult.addSugg(sNewWord+sTail, nDeep)
return
if nDist > oSuggResult.nDistLimit:
return
cCurrent = sRemain[0:1]
for cChar, jAddr in self._getCharArcs(iAddr):
if cChar in cp.d1to1.get(cCurrent, cCurrent):
self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDist, nDeep+1, jAddr, sNewWord+cChar)
elif not bAvoidLoop:
|
<
|
>
|
|
|
>
>
|
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
|
sWord1, sWord2 = sWord.split(cSplitter, 1)
if self.isValid(sWord1) and self.isValid(sWord2):
oSuggResult.addSugg(sWord1+" "+sWord2)
def _suggest (self, oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nMaxJump=0, nDist=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
# recursive function
#logging.info((nDeep * " ") + sNewWord + ":" + sRemain)
if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
if not sRemain:
oSuggResult.addSugg(sNewWord, nDeep)
for sTail in self._getTails(iAddr):
oSuggResult.addSugg(sNewWord+sTail, nDeep)
return
elif self.isValid(sRemain):
oSuggResult.addSugg(sNewWord+" "+sRemain)
if nDist > oSuggResult.nDistLimit:
return
cCurrent = sRemain[0:1]
for cChar, jAddr in self._getCharArcs(iAddr):
if cChar in cp.d1to1.get(cCurrent, cCurrent):
self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDist, nDeep+1, jAddr, sNewWord+cChar)
elif not bAvoidLoop:
|