52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
|
if self.cStemming == "S":
self.funcStemming = st.changeWordWithSuffixCode
elif self.cStemming == "A":
self.funcStemming = st.changeWordWithAffixCode
else:
self.funcStemming = st.noStemming
self.nTag = self.nArcVal - self.nChar - self.nAff
# <dChar> to get the value of an arc, <dVal> to get the char of an arc with its value
self.dChar = {}
for i in range(1, self.nChar):
self.dChar[self.lArcVal[i]] = i
self.dVal = { v: k for k, v in self.dChar.items() }
self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1
self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1)
self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2)
self._addrBitMask = 1 << ((self.nBytesArc * 8) - 3) # version 2
self.nBytesOffset = 1 # version 3
|
|
|
|
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
|
if self.cStemming == "S":
self.funcStemming = st.changeWordWithSuffixCode
elif self.cStemming == "A":
self.funcStemming = st.changeWordWithAffixCode
else:
self.funcStemming = st.noStemming
self.nTag = self.nArcVal - self.nChar - self.nAff
# <dChar> to get the value of an arc, <dArcVal> to get the char of an arc with its value
self.dChar = {}
for i in range(1, self.nChar):
self.dChar[self.lArcVal[i]] = i
self.dArcVal = { v: k for k, v in self.dChar.items() }
self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1
self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1)
self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2)
self._addrBitMask = 1 << ((self.nBytesArc * 8) - 3) # version 2
self.nBytesOffset = 1 # version 3
|
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
|
show(nDeep, cChar)
lSugg.extend(self._suggestWithCrushedUselessChars(sWord[1:], nDeep+1, jAddr, sNewWord+cChar))
return lSugg
def _getSimilarArcsAndCrushedChars (self, cChar, iAddr):
"generator: yield similar char of <cChar> and address of the following node"
for nVal, jAddr in self._getArcs(iAddr):
if self.dVal.get(nVal, "") in cp.aUselessChar:
yield (self.dVal[nVal], jAddr)
for c in cp.d1to1.get(cChar, [cChar]):
if c in self.dChar:
jAddr = self._lookupArcNode(self.dChar[c], iAddr)
if jAddr:
yield (c, jAddr)
def getMorph (self, sWord):
|
|
|
|
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
|
show(nDeep, cChar)
lSugg.extend(self._suggestWithCrushedUselessChars(sWord[1:], nDeep+1, jAddr, sNewWord+cChar))
return lSugg
def _getSimilarArcsAndCrushedChars (self, cChar, iAddr):
"generator: yield similar char of <cChar> and address of the following node"
for nVal, jAddr in self._getArcs(iAddr):
if self.dArcVal.get(nVal, "") in cp.aUselessChar:
yield (self.dArcVal[nVal], jAddr)
for c in cp.d1to1.get(cChar, [cChar]):
if c in self.dChar:
jAddr = self._lookupArcNode(self.dChar[c], iAddr)
if jAddr:
yield (c, jAddr)
def getMorph (self, sWord):
|