139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
|
nAcc = nAcc + 1
self.byDic = lTemp;
# masks
self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1
self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1)
self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2)
self._addrBitMask = 1 << ((self.nBytesArc * 8) - 3) # version 2
# function to decode the affix/suffix code
if self.cStemming == "S":
self.funcStemming = st.changeWordWithSuffixCode
elif self.cStemming == "A":
self.funcStemming = st.changeWordWithAffixCode
else:
|
<
|
139
140
141
142
143
144
145
146
147
148
149
150
151
152
|
nAcc = nAcc + 1
self.byDic = lTemp;
# masks
self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1
self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1)
self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2)
# function to decode the affix/suffix code
if self.cStemming == "S":
self.funcStemming = st.changeWordWithSuffixCode
elif self.cStemming == "A":
self.funcStemming = st.changeWordWithAffixCode
else:
|
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
|
self.cStemming = l.pop(0)
self.nTag = self.nArcVal - self.nChar - self.nAff
# <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
self.dChar = {}
for i in range(1, self.nChar+1):
self.dChar[self.lArcVal[i]] = i
self.dCharVal = { v: k for k, v in self.dChar.items() }
self.nBytesOffset = 1 # version 3
def _initJSON (self, oJSON):
"initialize with a JSON text file"
self.sByDic = "" # init to prevent pylint whining
self.__dict__.update(oJSON)
self.byDic = binascii.unhexlify(self.sByDic)
self.dCharVal = { v: k for k, v in self.dChar.items() }
|
<
|
197
198
199
200
201
202
203
204
205
206
207
208
209
210
|
self.cStemming = l.pop(0)
self.nTag = self.nArcVal - self.nChar - self.nAff
# <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
self.dChar = {}
for i in range(1, self.nChar+1):
self.dChar[self.lArcVal[i]] = i
self.dCharVal = { v: k for k, v in self.dChar.items() }
def _initJSON (self, oJSON):
"initialize with a JSON text file"
self.sByDic = "" # init to prevent pylint whining
self.__dict__.update(oJSON)
self.byDic = binascii.unhexlify(self.sByDic)
self.dCharVal = { v: k for k, v in self.dChar.items() }
|
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
|
"nNode": self.nNode,
"nArc": self.nArc,
"nArcVal": self.nArcVal,
"lArcVal": self.lArcVal,
"nCompressionMethod": self.nCompressionMethod,
"nBytesArc": self.nBytesArc,
"nBytesNodeAddress": self.nBytesNodeAddress,
"nBytesOffset": self.nBytesOffset,
# JavaScript is a pile of shit, so Mozilla’s JS parser don’t like file bigger than 4 Mb!
# So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension.
# https://github.com/mozilla/addons-linter/issues/1361
"sByDic": self.byDic.hex() if bBinaryDictAsHexString else [ e for e in self.byDic ],
"l2grams": list(self.a2grams)
}, ensure_ascii=False))
if bInJSModule:
|
<
|
240
241
242
243
244
245
246
247
248
249
250
251
252
253
|
"nNode": self.nNode,
"nArc": self.nArc,
"nArcVal": self.nArcVal,
"lArcVal": self.lArcVal,
"nCompressionMethod": self.nCompressionMethod,
"nBytesArc": self.nBytesArc,
"nBytesNodeAddress": self.nBytesNodeAddress,
# JavaScript is a pile of shit, so Mozilla’s JS parser don’t like file bigger than 4 Mb!
# So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension.
# https://github.com/mozilla/addons-linter/issues/1361
"sByDic": self.byDic.hex() if bBinaryDictAsHexString else [ e for e in self.byDic ],
"l2grams": list(self.a2grams)
}, ensure_ascii=False))
if bInJSModule:
|