Overview
Comment: | [graphspell][js] update for dawg: export as JSON |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | graphspell |
Files: | files | file ages | folders |
SHA3-256: |
29e54c742609084011a70825b9840561 |
User & Date: | olr on 2018-02-07 16:38:17 |
Other Links: | manifest | tags |
Context
2018-02-07
| ||
16:39 | [fx] update: lexicon editor check-in: 00eca59ea8 user: olr tags: trunk, fx | |
16:38 | [graphspell][js] update for dawg: export as JSON check-in: 29e54c7426 user: olr tags: trunk, graphspell | |
16:15 | [graphspell][js] dawg: convert Map to Object when exporting to JSON check-in: e1707a65be user: olr tags: trunk, graphspell | |
Changes
Modified graphspell-js/dawg.js from [ba2376ee6f] to [a299d0be58].
︙ | ︙ | |||
24 25 26 27 28 29 30 | This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115) We store suffix/affix codes and tags within the graph after the “real” word. A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags] Each arc is an index in this.lArcVal, where are stored characters, suffix/affix codes for stemming and tags. Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final. */ | | | 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115) We store suffix/affix codes and tags within the graph after the “real” word. A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags] Each arc is an index in this.lArcVal, where are stored characters, suffix/affix codes for stemming and tags. Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final. */ constructor (lEntrySrc, sLang, cStemming, xProgressBarNode=null) { console.log("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton ====="); let funcStemmingGen = null; switch (cStemming.toUpperCase()) { case "A": funcStemmingGen = str_transform.defineAffixCode; break; case "S": funcStemmingGen = str_transform.defineSuffixCode; break; |
︙ | ︙ | |||
100 101 102 103 104 105 106 | let lKeyVal = []; for (let c of dChar.keys()) { lKeyVal.push([dChar.get(c), dCharOccur.get(c)]); } for (let sAff of dAff.keys()) { lKeyVal.push([dAff.get(sAff)+nChar, dAffOccur.get(sAff)]); } for (let sTag of dTag.keys()) { lKeyVal.push([dTag.get(sTag)+nChar+nAff, dTagOccur.get(sTag)]); } let dValOccur = new Map(lKeyVal); lKeyVal.length = 0; // clear the array | > | | 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | let lKeyVal = []; for (let c of dChar.keys()) { lKeyVal.push([dChar.get(c), dCharOccur.get(c)]); } for (let sAff of dAff.keys()) { lKeyVal.push([dAff.get(sAff)+nChar, dAffOccur.get(sAff)]); } for (let sTag of dTag.keys()) { lKeyVal.push([dTag.get(sTag)+nChar+nAff, dTagOccur.get(sTag)]); } let dValOccur = new Map(lKeyVal); lKeyVal.length = 0; // clear the array this.sHeader = "/pyfsa/"; this.sLang = sLang; this.nEntry = lWord.length; this.aPreviousEntry = []; oNodeCounter.reset(); this.oRoot = new DawgNode(); this.lUncheckedNodes = []; // list of nodes that have not been checked for duplication. this.dMinimizedNodes = new Map(); // list of unique nodes that have been checked for duplication. this.nNode = 0; |
︙ | ︙ | |||
364 365 366 367 368 369 370 | sByDic = this.oRoot.convToBytes1(this.nBytesArc, this.nBytesNodeAddress); for (let oNode of this.dMinimizedNodes.values()) { sByDic += oNode.convToBytes1(this.nBytesArc, this.nBytesNodeAddress); } } let oJSON = { "sName": this.sName, | | | | 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 | sByDic = this.oRoot.convToBytes1(this.nBytesArc, this.nBytesNodeAddress); for (let oNode of this.dMinimizedNodes.values()) { sByDic += oNode.convToBytes1(this.nBytesArc, this.nBytesNodeAddress); } } let oJSON = { "sName": this.sName, "nVersion": nMethod, "sHeader": this.sHeader + nMethod + "/", "lArcVal": this.lArcVal, "nArcVal": this.nArcVal, "byDic": sByDic, "sLang": this.sLang, "nChar": this.nChar, "nBytesArc": this.nBytesArc, "nBytesNodeAddress": this.nBytesNodeAddress, |
︙ | ︙ |