Grammalecte  Check-in [29e54c7426]

Overview
Comment:[graphspell][js] update for dawg: export as JSON
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | graphspell
Files: files | file ages | folders
SHA3-256: 29e54c742609084011a70825b98405617c85ba0d84c58a85830b2933405df033
User & Date: olr on 2018-02-07 16:38:17
Other Links: manifest | tags
Context
2018-02-07
16:39
[fx] update: lexicon editor check-in: 00eca59ea8 user: olr tags: trunk, fx
16:38
[graphspell][js] update for dawg: export as JSON check-in: 29e54c7426 user: olr tags: trunk, graphspell
16:15
[graphspell][js] dawg: convert Map to Object when exporting to JSON check-in: e1707a65be user: olr tags: trunk, graphspell
Changes

Modified graphspell-js/dawg.js from [ba2376ee6f] to [a299d0be58].

24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
        This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)
        We store suffix/affix codes and tags within the graph after the “real” word.
        A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags]
        Each arc is an index in this.lArcVal, where are stored characters, suffix/affix codes for stemming and tags.
        Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final.
    */

    constructor (lEntrySrc, sLangName, cStemming, xProgressBarNode=null) {
        console.log("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton =====");
        let funcStemmingGen = null;
        switch (cStemming.toUpperCase()) {
            case "A":
                funcStemmingGen = str_transform.defineAffixCode; break;
            case "S":
                funcStemmingGen = str_transform.defineSuffixCode; break;







|







24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
        This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)
        We store suffix/affix codes and tags within the graph after the “real” word.
        A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags]
        Each arc is an index in this.lArcVal, where are stored characters, suffix/affix codes for stemming and tags.
        Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final.
    */

    constructor (lEntrySrc, sLang, cStemming, xProgressBarNode=null) {
        console.log("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton =====");
        let funcStemmingGen = null;
        switch (cStemming.toUpperCase()) {
            case "A":
                funcStemmingGen = str_transform.defineAffixCode; break;
            case "S":
                funcStemmingGen = str_transform.defineSuffixCode; break;
100
101
102
103
104
105
106

107
108
109
110
111
112
113
114
        let lKeyVal = [];
        for (let c of dChar.keys()) { lKeyVal.push([dChar.get(c), dCharOccur.get(c)]); }
        for (let sAff of dAff.keys()) { lKeyVal.push([dAff.get(sAff)+nChar, dAffOccur.get(sAff)]); }
        for (let sTag of dTag.keys()) { lKeyVal.push([dTag.get(sTag)+nChar+nAff, dTagOccur.get(sTag)]); }
        let dValOccur = new Map(lKeyVal);
        lKeyVal.length = 0; // clear the array


        this.sLang = sLangName;
        this.nEntry = lWord.length;
        this.aPreviousEntry = [];
        oNodeCounter.reset();
        this.oRoot = new DawgNode();
        this.lUncheckedNodes = [];          // list of nodes that have not been checked for duplication.
        this.dMinimizedNodes = new Map();   // list of unique nodes that have been checked for duplication.
        this.nNode = 0;







>
|







100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
        let lKeyVal = [];
        for (let c of dChar.keys()) { lKeyVal.push([dChar.get(c), dCharOccur.get(c)]); }
        for (let sAff of dAff.keys()) { lKeyVal.push([dAff.get(sAff)+nChar, dAffOccur.get(sAff)]); }
        for (let sTag of dTag.keys()) { lKeyVal.push([dTag.get(sTag)+nChar+nAff, dTagOccur.get(sTag)]); }
        let dValOccur = new Map(lKeyVal);
        lKeyVal.length = 0; // clear the array

        this.sHeader = "/pyfsa/";
        this.sLang = sLang;
        this.nEntry = lWord.length;
        this.aPreviousEntry = [];
        oNodeCounter.reset();
        this.oRoot = new DawgNode();
        this.lUncheckedNodes = [];          // list of nodes that have not been checked for duplication.
        this.dMinimizedNodes = new Map();   // list of unique nodes that have been checked for duplication.
        this.nNode = 0;
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
            sByDic = this.oRoot.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
            for (let oNode of this.dMinimizedNodes.values()) {
                sByDic += oNode.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
            }
        }
        let oJSON = {
            "sName": this.sName,
            "nVersion": this.nMethod,
            "sHeader": this.sHeader,
            "lArcVal": this.lArcVal,
            "nArcVal": this.nArcVal,
            "byDic": sByDic,
            "sLang": this.sLang,
            "nChar": this.nChar,
            "nBytesArc": this.nBytesArc,
            "nBytesNodeAddress": this.nBytesNodeAddress,







|
|







365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
            sByDic = this.oRoot.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
            for (let oNode of this.dMinimizedNodes.values()) {
                sByDic += oNode.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
            }
        }
        let oJSON = {
            "sName": this.sName,
            "nVersion": nMethod,
            "sHeader": this.sHeader + nMethod + "/",
            "lArcVal": this.lArcVal,
            "nArcVal": this.nArcVal,
            "byDic": sByDic,
            "sLang": this.sLang,
            "nChar": this.nChar,
            "nBytesArc": this.nBytesArc,
            "nBytesNodeAddress": this.nBytesNodeAddress,