Overview
| Comment: | [graphspell][js] update for dawg: export as JSON |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | graphspell |
| Files: | files | file ages | folders |
| SHA3-256: |
29e54c742609084011a70825b9840561 |
| User & Date: | olr on 2018-02-07 16:38:17 |
| Other Links: | manifest | tags |
Context
|
2018-02-07
| ||
| 16:39 | [fx] update: lexicon editor check-in: 00eca59ea8 user: olr tags: trunk, fx | |
| 16:38 | [graphspell][js] update for dawg: export as JSON check-in: 29e54c7426 user: olr tags: trunk, graphspell | |
| 16:15 | [graphspell][js] dawg: convert Map to Object when exporting to JSON check-in: e1707a65be user: olr tags: trunk, graphspell | |
Changes
Modified graphspell-js/dawg.js from [ba2376ee6f] to [a299d0be58].
| ︙ | ︙ | |||
24 25 26 27 28 29 30 |
This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)
We store suffix/affix codes and tags within the graph after the “real” word.
A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags]
Each arc is an index in this.lArcVal, where are stored characters, suffix/affix codes for stemming and tags.
Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final.
*/
| | | 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)
We store suffix/affix codes and tags within the graph after the “real” word.
A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags]
Each arc is an index in this.lArcVal, where are stored characters, suffix/affix codes for stemming and tags.
Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final.
*/
constructor (lEntrySrc, sLang, cStemming, xProgressBarNode=null) {
console.log("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton =====");
let funcStemmingGen = null;
switch (cStemming.toUpperCase()) {
case "A":
funcStemmingGen = str_transform.defineAffixCode; break;
case "S":
funcStemmingGen = str_transform.defineSuffixCode; break;
|
| ︙ | ︙ | |||
100 101 102 103 104 105 106 |
let lKeyVal = [];
for (let c of dChar.keys()) { lKeyVal.push([dChar.get(c), dCharOccur.get(c)]); }
for (let sAff of dAff.keys()) { lKeyVal.push([dAff.get(sAff)+nChar, dAffOccur.get(sAff)]); }
for (let sTag of dTag.keys()) { lKeyVal.push([dTag.get(sTag)+nChar+nAff, dTagOccur.get(sTag)]); }
let dValOccur = new Map(lKeyVal);
lKeyVal.length = 0; // clear the array
| > | | 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
let lKeyVal = [];
for (let c of dChar.keys()) { lKeyVal.push([dChar.get(c), dCharOccur.get(c)]); }
for (let sAff of dAff.keys()) { lKeyVal.push([dAff.get(sAff)+nChar, dAffOccur.get(sAff)]); }
for (let sTag of dTag.keys()) { lKeyVal.push([dTag.get(sTag)+nChar+nAff, dTagOccur.get(sTag)]); }
let dValOccur = new Map(lKeyVal);
lKeyVal.length = 0; // clear the array
this.sHeader = "/pyfsa/";
this.sLang = sLang;
this.nEntry = lWord.length;
this.aPreviousEntry = [];
oNodeCounter.reset();
this.oRoot = new DawgNode();
this.lUncheckedNodes = []; // list of nodes that have not been checked for duplication.
this.dMinimizedNodes = new Map(); // list of unique nodes that have been checked for duplication.
this.nNode = 0;
|
| ︙ | ︙ | |||
364 365 366 367 368 369 370 |
sByDic = this.oRoot.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
for (let oNode of this.dMinimizedNodes.values()) {
sByDic += oNode.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
}
}
let oJSON = {
"sName": this.sName,
| | | | 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 |
sByDic = this.oRoot.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
for (let oNode of this.dMinimizedNodes.values()) {
sByDic += oNode.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
}
}
let oJSON = {
"sName": this.sName,
"nVersion": nMethod,
"sHeader": this.sHeader + nMethod + "/",
"lArcVal": this.lArcVal,
"nArcVal": this.nArcVal,
"byDic": sByDic,
"sLang": this.sLang,
"nChar": this.nChar,
"nBytesArc": this.nBytesArc,
"nBytesNodeAddress": this.nBytesNodeAddress,
|
| ︙ | ︙ |