︙ | | |
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
|
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
|
-
+
|
}
}
}
* _getCharArcs (iAddr) {
// generator: yield all chars and addresses from node at address <iAddr>
for (let [nVal, jAddr] of this._getArcs(iAddr)) {
if (nVal < this.nChar) {
if (nVal <= this.nChar) {
yield [this.dCharVal.get(nVal), jAddr];
}
}
}
* _getSimilarCharArcs (cChar, iAddr) {
// generator: yield similar char of <cChar> and address of the following node
|
︙ | | |
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
|
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
|
-
+
|
}
}
_getTails (iAddr, sTail="", n=2) {
// return a list of suffixes ending at a distance of <n> from <iAddr>
let aTails = new Set();
for (let [nVal, jAddr] of this._getArcs(iAddr)) {
if (nVal < this.nChar) {
if (nVal <= this.nChar) {
if (this._convBytesToInteger(this.byDic.slice(jAddr, jAddr+this.nBytesArc)) & this._finalNodeMask) {
aTails.add(sTail + this.dCharVal.get(nVal));
}
if (n && aTails.size == 0) {
aTails.gl_update(this._getTails(jAddr, sTail+this.dCharVal.get(nVal), n-1));
}
}
|
︙ | | |
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
|
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
|
-
+
|
if (this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask) {
let l = [];
let nRawArc = 0;
while (!(nRawArc & this._lastArcMask)) {
let iEndArcAddr = iAddr + this.nBytesArc;
nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr));
let nArc = nRawArc & this._arcMask;
if (nArc >= this.nChar) {
if (nArc > this.nChar) {
// This value is not a char, this is a stemming code
let sStem = ">" + this.funcStemming(sWord, this.lArcVal[nArc]);
// Now , we go to the next node and retrieve all following arcs values, all of them are tags
let iAddr2 = this._convBytesToInteger(this.byDic.slice(iEndArcAddr, iEndArcAddr+this.nBytesNodeAddress));
let nRawArc2 = 0;
while (!(nRawArc2 & this._lastArcMask)) {
let iEndArcAddr2 = iAddr2 + this.nBytesArc;
|
︙ | | |
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
|
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
|
-
+
|
if (this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask) {
let l = [];
let nRawArc = 0;
while (!(nRawArc & this._lastArcMask)) {
let iEndArcAddr = iAddr + this.nBytesArc;
nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr));
let nArc = nRawArc & this._arcMask;
if (nArc >= this.nChar) {
if (nArc > this.nChar) {
// This value is not a char, this is a stemming code
l.push(this.funcStemming(sWord, this.lArcVal[nArc]));
}
iAddr = iEndArcAddr + this.nBytesNodeAddress;
}
return l;
}
|
︙ | | |