24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
|
This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)
We store suffix/affix codes and tags within the graph after the “real” word.
A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags]
Each arc is an index in this.lArcVal, where are stored characters, suffix/affix codes for stemming and tags.
Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final.
*/
constructor (lEntrySrc, sLangCode, sLangName, sDicName, cStemming, xProgressBarNode=null) {
console.log("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton =====");
let funcStemmingGen = null;
switch (cStemming.toUpperCase()) {
case "A":
funcStemmingGen = str_transform.defineAffixCode; break;
case "S":
funcStemmingGen = str_transform.defineSuffixCode; break;
|
|
|
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
|
This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)
We store suffix/affix codes and tags within the graph after the “real” word.
A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags]
Each arc is an index in this.lArcVal, where are stored characters, suffix/affix codes for stemming and tags.
Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final.
*/
constructor (lEntrySrc, cStemming, sLangCode, sLangName="", sDicName="", xProgressBarNode=null) {
console.log("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton =====");
let funcStemmingGen = null;
switch (cStemming.toUpperCase()) {
case "A":
funcStemmingGen = str_transform.defineAffixCode; break;
case "S":
funcStemmingGen = str_transform.defineSuffixCode; break;
|
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
|
let oJSON = {
"sHeader": "/pyfsa/",
"sLangCode": this.sLangCode,
"sLangName": this.sLangName,
"sDicName": this.sDicName,
"sFileName": "[none]",
"sDate": this._getDate(),
"nEntries": this.nEntry,
"nChar": this.nChar,
"nAff": this.nAff,
"nTag": this.nTag,
"cStemming": this.cStemming,
"dChar": helpers.mapToObject(this.dChar),
"nNode": this.nNode,
"nArc": this.nArc,
|
|
|
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
|
let oJSON = {
"sHeader": "/pyfsa/",
"sLangCode": this.sLangCode,
"sLangName": this.sLangName,
"sDicName": this.sDicName,
"sFileName": "[none]",
"sDate": this._getDate(),
"nEntry": this.nEntry,
"nChar": this.nChar,
"nAff": this.nAff,
"nTag": this.nTag,
"cStemming": this.cStemming,
"dChar": helpers.mapToObject(this.dChar),
"nNode": this.nNode,
"nArc": this.nArc,
|