Grammalecte  Check-in [29e54c7426]

Overview
Comment:[graphspell][js] update for dawg: export as JSON
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | graphspell
Files: files | file ages | folders
SHA3-256: 29e54c742609084011a70825b98405617c85ba0d84c58a85830b2933405df033
User & Date: olr on 2018-02-07 16:38:17
Other Links: manifest | tags
Context
2018-02-07
16:39
[fx] update: lexicon editor check-in: 00eca59ea8 user: olr tags: trunk, fx
16:38
[graphspell][js] update for dawg: export as JSON check-in: 29e54c7426 user: olr tags: trunk, graphspell
16:15
[graphspell][js] dawg: convert Map to Object when exporting to JSON check-in: e1707a65be user: olr tags: trunk, graphspell
Changes

Modified graphspell-js/dawg.js from [ba2376ee6f] to [a299d0be58].

24
25
26
27
28
29
30
31

32
33
34
35
36
37
38
24
25
26
27
28
29
30

31
32
33
34
35
36
37
38







-
+







        This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)
        We store suffix/affix codes and tags within the graph after the “real” word.
        A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags]
        Each arc is an index in this.lArcVal, where are stored characters, suffix/affix codes for stemming and tags.
        Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final.
    */

    constructor (lEntrySrc, sLangName, cStemming, xProgressBarNode=null) {
    constructor (lEntrySrc, sLang, cStemming, xProgressBarNode=null) {
        console.log("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton =====");
        let funcStemmingGen = null;
        switch (cStemming.toUpperCase()) {
            case "A":
                funcStemmingGen = str_transform.defineAffixCode; break;
            case "S":
                funcStemmingGen = str_transform.defineSuffixCode; break;
100
101
102
103
104
105
106

107

108
109
110
111
112
113
114
100
101
102
103
104
105
106
107

108
109
110
111
112
113
114
115







+
-
+







        let lKeyVal = [];
        for (let c of dChar.keys()) { lKeyVal.push([dChar.get(c), dCharOccur.get(c)]); }
        for (let sAff of dAff.keys()) { lKeyVal.push([dAff.get(sAff)+nChar, dAffOccur.get(sAff)]); }
        for (let sTag of dTag.keys()) { lKeyVal.push([dTag.get(sTag)+nChar+nAff, dTagOccur.get(sTag)]); }
        let dValOccur = new Map(lKeyVal);
        lKeyVal.length = 0; // clear the array

        this.sHeader = "/pyfsa/";
        this.sLang = sLangName;
        this.sLang = sLang;
        this.nEntry = lWord.length;
        this.aPreviousEntry = [];
        oNodeCounter.reset();
        this.oRoot = new DawgNode();
        this.lUncheckedNodes = [];          // list of nodes that have not been checked for duplication.
        this.dMinimizedNodes = new Map();   // list of unique nodes that have been checked for duplication.
        this.nNode = 0;
364
365
366
367
368
369
370
371
372


373
374
375
376
377
378
379
365
366
367
368
369
370
371


372
373
374
375
376
377
378
379
380







-
-
+
+







            sByDic = this.oRoot.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
            for (let oNode of this.dMinimizedNodes.values()) {
                sByDic += oNode.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
            }
        }
        let oJSON = {
            "sName": this.sName,
            "nVersion": this.nMethod,
            "sHeader": this.sHeader,
            "nVersion": nMethod,
            "sHeader": this.sHeader + nMethod + "/",
            "lArcVal": this.lArcVal,
            "nArcVal": this.nArcVal,
            "byDic": sByDic,
            "sLang": this.sLang,
            "nChar": this.nChar,
            "nBytesArc": this.nBytesArc,
            "nBytesNodeAddress": this.nBytesNodeAddress,