24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
|
This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)
We store suffix/affix codes and tags within the graph after the “real” word.
A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags]
Each arc is an index in this.lArcVal, where are stored characters, suffix/affix codes for stemming and tags.
Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final.
*/
constructor (lEntrySrc, sLangName, cStemming, xProgressBarNode=null) {
console.log("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton =====");
let funcStemmingGen = null;
switch (cStemming.toUpperCase()) {
case "A":
funcStemmingGen = str_transform.defineAffixCode; break;
case "S":
funcStemmingGen = str_transform.defineSuffixCode; break;
|
|
|
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
|
This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)
We store suffix/affix codes and tags within the graph after the “real” word.
A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags]
Each arc is an index in this.lArcVal, where are stored characters, suffix/affix codes for stemming and tags.
Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final.
*/
constructor (lEntrySrc, sLang, cStemming, xProgressBarNode=null) {
console.log("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton =====");
let funcStemmingGen = null;
switch (cStemming.toUpperCase()) {
case "A":
funcStemmingGen = str_transform.defineAffixCode; break;
case "S":
funcStemmingGen = str_transform.defineSuffixCode; break;
|
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
let lKeyVal = [];
for (let c of dChar.keys()) { lKeyVal.push([dChar.get(c), dCharOccur.get(c)]); }
for (let sAff of dAff.keys()) { lKeyVal.push([dAff.get(sAff)+nChar, dAffOccur.get(sAff)]); }
for (let sTag of dTag.keys()) { lKeyVal.push([dTag.get(sTag)+nChar+nAff, dTagOccur.get(sTag)]); }
let dValOccur = new Map(lKeyVal);
lKeyVal.length = 0; // clear the array
this.sLang = sLangName;
this.nEntry = lWord.length;
this.aPreviousEntry = [];
oNodeCounter.reset();
this.oRoot = new DawgNode();
this.lUncheckedNodes = []; // list of nodes that have not been checked for duplication.
this.dMinimizedNodes = new Map(); // list of unique nodes that have been checked for duplication.
this.nNode = 0;
|
>
|
|
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
let lKeyVal = [];
for (let c of dChar.keys()) { lKeyVal.push([dChar.get(c), dCharOccur.get(c)]); }
for (let sAff of dAff.keys()) { lKeyVal.push([dAff.get(sAff)+nChar, dAffOccur.get(sAff)]); }
for (let sTag of dTag.keys()) { lKeyVal.push([dTag.get(sTag)+nChar+nAff, dTagOccur.get(sTag)]); }
let dValOccur = new Map(lKeyVal);
lKeyVal.length = 0; // clear the array
this.sHeader = "/pyfsa/";
this.sLang = sLang;
this.nEntry = lWord.length;
this.aPreviousEntry = [];
oNodeCounter.reset();
this.oRoot = new DawgNode();
this.lUncheckedNodes = []; // list of nodes that have not been checked for duplication.
this.dMinimizedNodes = new Map(); // list of unique nodes that have been checked for duplication.
this.nNode = 0;
|
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
|
sByDic = this.oRoot.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
for (let oNode of this.dMinimizedNodes.values()) {
sByDic += oNode.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
}
}
let oJSON = {
"sName": this.sName,
"nVersion": this.nMethod,
"sHeader": this.sHeader,
"lArcVal": this.lArcVal,
"nArcVal": this.nArcVal,
"byDic": sByDic,
"sLang": this.sLang,
"nChar": this.nChar,
"nBytesArc": this.nBytesArc,
"nBytesNodeAddress": this.nBytesNodeAddress,
|
|
|
|
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
|
sByDic = this.oRoot.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
for (let oNode of this.dMinimizedNodes.values()) {
sByDic += oNode.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
}
}
let oJSON = {
"sName": this.sName,
"nVersion": nMethod,
"sHeader": this.sHeader + nMethod + "/",
"lArcVal": this.lArcVal,
"nArcVal": this.nArcVal,
"byDic": sByDic,
"sLang": this.sLang,
"nChar": this.nChar,
"nBytesArc": this.nBytesArc,
"nBytesNodeAddress": this.nBytesNodeAddress,
|