Grammalecte  Diff

Differences From Artifact [73e27f350e]:

To Artifact [ab8a6cc9bc]:


115
116
117
118
119
120
121
122
123


124
125
126
127
128
129
130
115
116
117
118
119
120
121


122
123
124
125
126
127
128
129
130







-
-
+
+







        for (let i = 0;  i < this.sByDic.length;  i+=2) {
            lTemp.push(parseInt(this.sByDic.slice(i, i+2), 16));
        }
        this.byDic = lTemp;
        //this.byDic = new Uint8Array(lTemp);  // not quicker, even slower
        /* end of bug workaround */

        if (!this.sHeader.startsWith("/pyfsa/")) {
            throw TypeError("# Error. Not a pyfsa binary dictionary. Header: " + this.sHeader);
        if (!(this.sHeader.startsWith("/grammalecte-fsa/") || this.sHeader.startsWith("/pyfsa/"))) {
            throw TypeError("# Error. Not a grammalecte-fsa binary dictionary. Header: " + this.sHeader);
        }
        if (!(this.nCompressionMethod == 1 || this.nCompressionMethod == 2 || this.nCompressionMethod == 3)) {
            throw RangeError("# Error. Unknown dictionary compression method: " + this.nCompressionMethod);
        }
        // <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
        this.dChar = helpers.objectToMap(this.dChar);
        this.dCharVal = this.dChar.gl_reverse();
179
180
181
182
183
184
185
186

187
188
189
190
191
192
193
179
180
181
182
183
184
185

186
187
188
189
190
191
192
193







-
+







                `  Arcs values:  ${this.nArcVal} = ${this.nChar} characters,  ${this.nAff} affixes,  ${this.nTag} tags\n` +
                `  Dictionary: ${this.nEntry} entries,    ${this.nNode} nodes,   ${this.nArc} arcs\n` +
                `  Address size: ${this.nBytesNodeAddress} bytes,  Arc size: ${this.nBytesArc} bytes\n`;
    }

    getJSON () {
        let oJSON = {
            "sHeader": "/pyfsa/",
            "sHeader": "/grammalecte-fsa/",
            "sLangCode": this.sLangCode,
            "sLangName": this.sLangName,
            "sDicName": this.sDicName,
            "sFileName": this.sFileName,
            "sDate": this.sDate,
            "nEntry": this.nEntry,
            "nChar": this.nChar,
413
414
415
416
417
418
419




420
421
422
423
424
425
426
427
428
429
430
431























432
433

434
435
436
437
438

439
440
441
442
443

444

445
446
447
448





449
450
451
452
453
454
455
413
414
415
416
417
418
419
420
421
422
423












424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446


447
448
449
450
451

452
453
454
455
456

457
458
459




460
461
462
463
464
465
466
467
468
469
470
471







+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
+




-
+




-
+

+
-
-
-
-
+
+
+
+
+







        }
        return aTails;
    }

    // morph (sWord) {
    //     is defined in constructor
    // }
    getSimilarEntries (sWord, nSuggLimit=10) {
        // return a list of tuples (similar word, stem, morphology)
        if (sWord == "") {
            return [];
    
    * select (sPattern="") {
        // generator: returns all entries which morphology fits <sPattern>
        let zPattern = null;
        if (sPattern !== "") {
            try {
                zPattern = new RegExp(sPattern);
            }
            catch (e) {
                console.log("Error in regex pattern");
                console.log(e.message);
            }
        }
        let lResult = [];
        for (let sSimilar of this.suggest(sWord, nSuggLimit)) {
            for (let sMorph of this.getMorph(sSimilar)) {
                let nCut = sMorph.indexOf(" ");
                lResult.push( [sSimilar, sMorph.slice(1, nCut), sMorph.slice(nCut+1)] );
            }
        }
        return lResult;
    }

    * select (sFlexPattern="", sTagsPattern="") {
        // generator: returns all entries which flexion fits <sFlexPattern> and morphology fits <sTagsPattern>
        let zFlexPattern = null;
        let zTagsPattern = null;
        try {
            zFlexPattern = (sFlexPattern !== "") ? new RegExp(sFlexPattern) : null;
            zTagsPattern = (sTagsPattern !== "") ? new RegExp(sTagsPattern) : null;
        }
        catch (e) {
            console.log("Error in regex pattern");
            console.log(e.message);
        }
        }
        yield* this._select1(zPattern, 0, "");
        yield* this._select1(zFlexPattern, zTagsPattern, 0, "");
    }

    // VERSION 1

    * _select1 (zPattern, iAddr, sWord) {
    * _select1 (zFlexPattern, zTagsPattern, iAddr, sWord) {
        // recursive generator
        for (let [nVal, jAddr] of this._getArcs1(iAddr)) {
            if (nVal <= this.nChar) {
                // simple character
                yield* this._select1(zPattern, jAddr, sWord + this.lArcVal[nVal]);
                yield* this._select1(zFlexPattern, zTagsPattern, jAddr, sWord + this.lArcVal[nVal]);
            } else {
                if (!zFlexPattern || zFlexPattern.test(sWord)) {
                let sEntry = sWord + "\t" + this.funcStemming(sWord, this.lArcVal[nVal]);
                for (let [nMorphVal, _] of this._getArcs1(jAddr)) {
                    if (!zPattern || zPattern.test(this.lArcVal[nMorphVal])) {
                        yield sEntry + "\t" + this.lArcVal[nMorphVal];
                    let sStem = this.funcStemming(sWord, this.lArcVal[nVal]);
                    for (let [nMorphVal, _] of this._getArcs1(jAddr)) {
                        if (!zTagsPattern || zTagsPattern.test(this.lArcVal[nMorphVal])) {
                            yield [sWord, sStem, this.lArcVal[nMorphVal]];
                        }
                    }
                }
            }
        }
    }            

    _morph1 (sWord) {