Grammalecte  Diff

Differences From Artifact [c2ec626548]:

To Artifact [287e75028f]:


24
25
26
27
28
29
30
31

32
33
34
35
36
37
38
24
25
26
27
28
29
30

31
32
33
34
35
36
37
38







-
+







        This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)
        We store suffix/affix codes and tags within the graph after the “real” word.
        A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags]
        Each arc is an index in this.lArcVal, where are stored characters, suffix/affix codes for stemming and tags.
        Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final.
    */

    constructor (lEntrySrc, sLang, cStemming, xProgressBarNode=null) {
    constructor (lEntrySrc, sLangCode, sLangName, sDicName, cStemming, xProgressBarNode=null) {
        console.log("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton =====");
        let funcStemmingGen = null;
        switch (cStemming.toUpperCase()) {
            case "A":
                funcStemmingGen = str_transform.defineAffixCode; break;
            case "S":
                funcStemmingGen = str_transform.defineSuffixCode; break;
100
101
102
103
104
105
106
107
108



109
110
111
112
113
114
115
100
101
102
103
104
105
106


107
108
109
110
111
112
113
114
115
116







-
-
+
+
+







        let lKeyVal = [];
        for (let c of dChar.keys()) { lKeyVal.push([dChar.get(c), dCharOccur.get(c)]); }
        for (let sAff of dAff.keys()) { lKeyVal.push([dAff.get(sAff)+nChar, dAffOccur.get(sAff)]); }
        for (let sTag of dTag.keys()) { lKeyVal.push([dTag.get(sTag)+nChar+nAff, dTagOccur.get(sTag)]); }
        let dValOccur = new Map(lKeyVal);
        lKeyVal.length = 0; // clear the array

        this.sHeader = "/pyfsa/";
        this.sLang = sLang;
        this.sLangCode = sLangCode;
        this.sLangName = sLangName;
        this.sDicName = sDicName;
        this.nEntry = lWord.length;
        this.aPreviousEntry = [];
        oNodeCounter.reset();
        this.oRoot = new DawgNode();
        this.lUncheckedNodes = [];          // list of nodes that have not been checked for duplication.
        this.dMinimizedNodes = new Map();   // list of unique nodes that have been checked for duplication.
        this.nNode = 0;
367
368
369
370
371
372
373



374

375

376
377
378
379
380
381

382
383
384
385




386
387
388
389
390
391
392







393
394
395
396
397
398
399
368
369
370
371
372
373
374
375
376
377

378

379
380





381
382



383
384
385
386
387
388





389
390
391
392
393
394
395
396
397
398
399
400
401
402







+
+
+
-
+
-
+

-
-
-
-
-
+

-
-
-
+
+
+
+


-
-
-
-
-
+
+
+
+
+
+
+







        if (nCompressionMethod == 1) {
            sByDic = this.oRoot.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
            for (let oNode of this.dMinimizedNodes.values()) {
                sByDic += oNode.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
            }
        }
        let oJSON = {
            "sHeader": "/pyfsa/",
            "sLangCode": this.sLangCode,
            "sLangName": this.sLangName,
            "sName": this.sName,
            "sDicName": this.sDicName,
            "nCompressionMethod": nCompressionMethod,
            "sFileName": "[none]",
            "sDate": this._getDate(),
            "sHeader": this.sHeader + nCompressionMethod + "/",
            "lArcVal": this.lArcVal,
            "nArcVal": this.nArcVal,
            "byDic": sByDic,  // binary word graph
            "sLang": this.sLang,
            "nEntries": this.nEntry,
            "nChar": this.nChar,
            "nBytesArc": this.nBytesArc,
            "nBytesNodeAddress": this.nBytesNodeAddress,
            "nEntries": this.nEntry,
            "nAff": this.nAff,
            "nTag": this.nTag,
            "cStemming": this.cStemming,
            "dChar": helpers.mapToObject(this.dChar),
            "nNode": this.nNode,
            "nArc": this.nArc,
            "nAff": this.nAff,
            "cStemming": this.cStemming,
            "nTag": this.nTag,
            "dChar": helpers.mapToObject(this.dChar),
            "nBytesOffset": this.nBytesOffset
            "lArcVal": this.lArcVal,
            "nArcVal": this.nArcVal,
            "nCompressionMethod": nCompressionMethod,
            "nBytesArc": this.nBytesArc,
            "nBytesNodeAddress": this.nBytesNodeAddress,
            "nBytesOffset": this.nBytesOffset,
            "sByDic": sByDic    // binary word graph
        };
        return oJSON;
    },

    _getDate () {
        let oDate = new Date();
        let sMonth = (oDate.getMonth() + 1).toString().padStart(2, "0"); // Month+1: Because JS always sucks somehow.