150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
|
sName, nCompressionMethod, sHeader, lArcVal, nArcVal, sByDic, sLang, nChar, nBytesArc, nBytesNodeAddress,
nEntry, nNode, nArc, nAff, cStemming, nTag, dChar, nBytesOffset,
*/
if (!(this.sHeader.startsWith("/grammalecte-fsa/") || this.sHeader.startsWith("/pyfsa/"))) {
throw TypeError("# Error. Not a grammalecte-fsa binary dictionary. Header: " + this.sHeader);
}
if (!(this.nCompressionMethod == 1 || this.nCompressionMethod == 2 || this.nCompressionMethod == 3)) {
throw RangeError("# Error. Unknown dictionary compression method: " + this.nCompressionMethod);
}
// <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
this.dChar = helpers.objectToMap(this.dChar);
this.dCharVal = this.dChar.gl_reverse();
this.a2grams = (this.l2grams) ? new Set(this.l2grams) : null;
if (this.cStemming == "S") {
this.funcStemming = str_transform.changeWordWithSuffixCode;
} else if (this.cStemming == "A") {
this.funcStemming = str_transform.changeWordWithAffixCode;
} else {
this.funcStemming = str_transform.noStemming;
}
/*
Bug workaround.
Mozilla’s JS parser sucks. Can’t read file bigger than 4 Mb!
So we convert huge hexadecimal string to list of numbers…
https://github.com/mozilla/addons-linter/issues/1361
*/
/*
Performance trick:
Instead of converting bytes to integers each times we parse the binary dictionary,
we do it once, then parse the array
*/
this.lByDic = [];
let nAcc = 0;
let lBytesBuffer = [];
let nDivisor = (this.nBytesArc + this.nBytesNodeAddress) / 2;
for (let i = 0; i < this.sByDic.length; i+=2) {
lBytesBuffer.push(parseInt(this.sByDic.slice(i, i+2), 16));
if (nAcc == (this.nBytesArc - 1)) {
this.lByDic.push(this._convBytesToInteger(lBytesBuffer));
lBytesBuffer = [];
}
else if (nAcc == (this.nBytesArc + this.nBytesNodeAddress - 1)) {
this.lByDic.push(Math.round(this._convBytesToInteger(lBytesBuffer) / nDivisor)); // Math.round should be useless, BUT with JS who knowns what can happen…
lBytesBuffer = [];
nAcc = -1;
}
nAcc = nAcc + 1;
}
/* end of bug workaround */
this._arcMask = (2 ** ((this.nBytesArc * 8) - 3)) - 1;
this._finalNodeMask = 1 << ((this.nBytesArc * 8) - 1);
this._lastArcMask = 1 << ((this.nBytesArc * 8) - 2);
//console.log(this.getInfo());
this.bAcronymValid = true;
this.bNumAtLastValid = false;
// lexicographer module ?
this.lexicographer = null;
// JS still sucks: we’ll try importation when importation will be available in Workers. Still waiting...
|
<
<
<
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
|
sName, nCompressionMethod, sHeader, lArcVal, nArcVal, sByDic, sLang, nChar, nBytesArc, nBytesNodeAddress,
nEntry, nNode, nArc, nAff, cStemming, nTag, dChar, nBytesOffset,
*/
if (!(this.sHeader.startsWith("/grammalecte-fsa/") || this.sHeader.startsWith("/pyfsa/"))) {
throw TypeError("# Error. Not a grammalecte-fsa binary dictionary. Header: " + this.sHeader);
}
// <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
this.dChar = helpers.objectToMap(this.dChar);
this.dCharVal = this.dChar.gl_reverse();
this.a2grams = (this.l2grams) ? new Set(this.l2grams) : null;
if (!this.hasOwnProperty("lByDic")) {
// old dictionary version
if (!this.hasOwnProperty("sByDic")) {
throw TypeError("# Error. No usable data in the dictionary.");
}
this.lByDic = [];
let nAcc = 0;
let lBytesBuffer = [];
let nDivisor = (this.nBytesArc + this.nBytesNodeAddress) / 2;
for (let i = 0; i < this.sByDic.length; i+=2) {
lBytesBuffer.push(parseInt(this.sByDic.slice(i, i+2), 16));
if (nAcc == (this.nBytesArc - 1)) {
this.lByDic.push(this._convBytesToInteger(lBytesBuffer));
lBytesBuffer = [];
}
else if (nAcc == (this.nBytesArc + this.nBytesNodeAddress - 1)) {
this.lByDic.push(Math.round(this._convBytesToInteger(lBytesBuffer) / nDivisor)); // Math.round should be useless, BUT with JS who knowns what can happen…
lBytesBuffer = [];
nAcc = -1;
}
nAcc = nAcc + 1;
}
}
// masks
this._arcMask = (2 ** ((this.nBytesArc * 8) - 3)) - 1;
this._finalNodeMask = 1 << ((this.nBytesArc * 8) - 1);
this._lastArcMask = 1 << ((this.nBytesArc * 8) - 2);
// function to decode the affix/suffix code
if (this.cStemming == "S") {
this.funcStemming = str_transform.changeWordWithSuffixCode;
} else if (this.cStemming == "A") {
this.funcStemming = str_transform.changeWordWithAffixCode;
} else {
this.funcStemming = str_transform.noStemming;
}
//console.log(this.getInfo());
this.bAcronymValid = true;
this.bNumAtLastValid = false;
// lexicographer module ?
this.lexicographer = null;
// JS still sucks: we’ll try importation when importation will be available in Workers. Still waiting...
|