115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
|
for (let i = 0; i < this.sByDic.length; i+=2) {
lTemp.push(parseInt(this.sByDic.slice(i, i+2), 16));
}
this.byDic = lTemp;
//this.byDic = new Uint8Array(lTemp); // not quicker, even slower
/* end of bug workaround */
if (!this.sHeader.startsWith("/pyfsa/")) {
throw TypeError("# Error. Not a pyfsa binary dictionary. Header: " + this.sHeader);
}
if (!(this.nCompressionMethod == 1 || this.nCompressionMethod == 2 || this.nCompressionMethod == 3)) {
throw RangeError("# Error. Unknown dictionary compression method: " + this.nCompressionMethod);
}
// <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
this.dChar = helpers.objectToMap(this.dChar);
this.dCharVal = this.dChar.gl_reverse();
|
|
|
|
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
|
for (let i = 0; i < this.sByDic.length; i+=2) {
lTemp.push(parseInt(this.sByDic.slice(i, i+2), 16));
}
this.byDic = lTemp;
//this.byDic = new Uint8Array(lTemp); // not quicker, even slower
/* end of bug workaround */
if (!this.sHeader.startsWith("/grammalecte-fsa/")) {
throw TypeError("# Error. Not a grammalecte-fsa binary dictionary. Header: " + this.sHeader);
}
if (!(this.nCompressionMethod == 1 || this.nCompressionMethod == 2 || this.nCompressionMethod == 3)) {
throw RangeError("# Error. Unknown dictionary compression method: " + this.nCompressionMethod);
}
// <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
this.dChar = helpers.objectToMap(this.dChar);
this.dCharVal = this.dChar.gl_reverse();
|
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
|
` Arcs values: ${this.nArcVal} = ${this.nChar} characters, ${this.nAff} affixes, ${this.nTag} tags\n` +
` Dictionary: ${this.nEntry} entries, ${this.nNode} nodes, ${this.nArc} arcs\n` +
` Address size: ${this.nBytesNodeAddress} bytes, Arc size: ${this.nBytesArc} bytes\n`;
}
getJSON () {
let oJSON = {
"sHeader": "/pyfsa/",
"sLangCode": this.sLangCode,
"sLangName": this.sLangName,
"sDicName": this.sDicName,
"sFileName": this.sFileName,
"sDate": this.sDate,
"nEntry": this.nEntry,
"nChar": this.nChar,
|
|
|
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
|
` Arcs values: ${this.nArcVal} = ${this.nChar} characters, ${this.nAff} affixes, ${this.nTag} tags\n` +
` Dictionary: ${this.nEntry} entries, ${this.nNode} nodes, ${this.nArc} arcs\n` +
` Address size: ${this.nBytesNodeAddress} bytes, Arc size: ${this.nBytesArc} bytes\n`;
}
getJSON () {
let oJSON = {
"sHeader": "/grammalecte-fsa/",
"sLangCode": this.sLangCode,
"sLangName": this.sLangName,
"sDicName": this.sDicName,
"sFileName": this.sFileName,
"sDate": this.sDate,
"nEntry": this.nEntry,
"nChar": this.nChar,
|
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
|
}
return aTails;
}
// morph (sWord) {
// is defined in constructor
// }
* select (sPattern="") {
// generator: returns all entries which morphology fits <sPattern>
let zPattern = null;
if (sPattern !== "") {
try {
zPattern = new RegExp(sPattern);
}
catch (e) {
console.log("Error in regex pattern");
console.log(e.message);
}
}
yield* this._select1(zPattern, 0, "");
}
// VERSION 1
* _select1 (zPattern, iAddr, sWord) {
// recursive generator
for (let [nVal, jAddr] of this._getArcs1(iAddr)) {
if (nVal <= this.nChar) {
// simple character
yield* this._select1(zPattern, jAddr, sWord + this.lArcVal[nVal]);
} else {
let sEntry = sWord + "\t" + this.funcStemming(sWord, this.lArcVal[nVal]);
for (let [nMorphVal, _] of this._getArcs1(jAddr)) {
if (!zPattern || zPattern.test(this.lArcVal[nMorphVal])) {
yield sEntry + "\t" + this.lArcVal[nMorphVal];
}
}
}
}
}
_morph1 (sWord) {
|
>
>
>
>
|
>
>
>
>
>
>
>
>
>
|
>
|
|
|
|
|
>
|
|
|
|
|
<
|
|
|
>
|
|
|
|
>
|
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
|
}
return aTails;
}
// morph (sWord) {
// is defined in constructor
// }
getSimilarEntries (sWord, nSuggLimit=10) {
// return a list of tuples (similar word, stem, morphology)
if (sWord == "") {
return [];
}
let lResult = [];
for (let sSimilar of this.suggest(sWord, nSuggLimit)) {
for (let sMorph of this.getMorph(sSimilar)) {
let nCut = sMorph.indexOf(" ");
lResult.push( [sSimilar, sMorph.slice(1, nCut), sMorph.slice(nCut+1)] );
}
}
return lResult;
}
* select (sFlexPattern="", sTagsPattern="") {
// generator: returns all entries which flexion fits <sFlexPattern> and morphology fits <sTagsPattern>
let zFlexPattern = null;
let zTagsPattern = null;
try {
zFlexPattern = (sFlexPattern !== "") ? new RegExp(sFlexPattern) : null;
zTagsPattern = (sTagsPattern !== "") ? new RegExp(sTagsPattern) : null;
}
catch (e) {
console.log("Error in regex pattern");
console.log(e.message);
}
yield* this._select1(zFlexPattern, zTagsPattern, 0, "");
}
// VERSION 1
* _select1 (zFlexPattern, zTagsPattern, iAddr, sWord) {
// recursive generator
for (let [nVal, jAddr] of this._getArcs1(iAddr)) {
if (nVal <= this.nChar) {
// simple character
yield* this._select1(zFlexPattern, zTagsPattern, jAddr, sWord + this.lArcVal[nVal]);
} else {
if (!zFlexPattern || zFlexPattern.test(sWord)) {
let sStem = this.funcStemming(sWord, this.lArcVal[nVal]);
for (let [nMorphVal, _] of this._getArcs1(jAddr)) {
if (!zTagsPattern || zTagsPattern.test(this.lArcVal[nMorphVal])) {
yield [sWord, sStem, this.lArcVal[nMorphVal]];
}
}
}
}
}
}
_morph1 (sWord) {
|