25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
|
We store suffix/affix codes and tags within the graph after the “real” word.
A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags]
Each arc is an index in this.lArcVal, where are stored characters, suffix/affix codes for stemming and tags.
Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final.
*/
constructor (lEntrySrc, sLangName, cStemming, xProgressBarNode=null) {
console.log("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton =====")
switch (cStemming.toUpperCase()) {
case "A":
funcStemmingGen = str_transform.defineAffixCode; break;
case "S":
funcStemmingGen = str_transform.defineSuffixCode; break;
case "N":
funcStemmingGen = str_transform.noStemming; break;
default:
throw "Error. Unknown stemming code: " + cStemming;
}
let lEntry = [];
let lChar = [''], Char = new Map(), nChar = 1, dCharOccur = new Map();
let lAff = [], Aff = new Map(), nAff = 0, dAffOccur = new Map();
let lTag = [], Tag = new Map(), nTag = 0, dTagOccur = new Map();
let nErr = 0;
// read lexicon
for (let [sFlex, sStem, sTag] of lEntrySrc) {
addWordToCharDict(sFlex);
// chars
for (let c of sFlex) {
if (!dChar.get(c)) {
dChar.set(c, nChar);
lChar.push(c);
nChar += 1;
}
dCharOccur.set(c, dCharOccur.gl_get(c, 0) + 1);
}
// affixes to find stem from flexion
sAff = funcStemmingGen(sFlex, sStem)
if (!dAff.get(sAff)) {
dAff.set(sAff, nAff);
lAff.push(sAff);
nAff += 1;
}
dAffOccur.set(sAff, dCharOccur.gl_get(sAff, 0) + 1);
// tags
|
|
>
|
|
|
|
|
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
We store suffix/affix codes and tags within the graph after the “real” word.
A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags]
Each arc is an index in this.lArcVal, where are stored characters, suffix/affix codes for stemming and tags.
Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final.
*/
constructor (lEntrySrc, sLangName, cStemming, xProgressBarNode=null) {
console.log("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton =====");
let funcStemmingGen = null;
switch (cStemming.toUpperCase()) {
case "A":
funcStemmingGen = str_transform.defineAffixCode; break;
case "S":
funcStemmingGen = str_transform.defineSuffixCode; break;
case "N":
funcStemmingGen = str_transform.noStemming; break;
default:
throw "Error. Unknown stemming code: " + cStemming;
}
let lEntry = [];
let lChar = [''], dChar = new Map(), nChar = 1, dCharOccur = new Map();
let lAff = [], dAff = new Map(), nAff = 0, dAffOccur = new Map();
let lTag = [], dTag = new Map(), nTag = 0, dTagOccur = new Map();
let nErr = 0;
// read lexicon
for (let [sFlex, sStem, sTag] of lEntrySrc) {
addWordToCharDict(sFlex);
// chars
for (let c of sFlex) {
if (!dChar.get(c)) {
dChar.set(c, nChar);
lChar.push(c);
nChar += 1;
}
dCharOccur.set(c, dCharOccur.gl_get(c, 0) + 1);
}
// affixes to find stem from flexion
sAff = funcStemmingGen(sFlex, sStem);
if (!dAff.get(sAff)) {
dAff.set(sAff, nAff);
lAff.push(sAff);
nAff += 1;
}
dAffOccur.set(sAff, dCharOccur.gl_get(sAff, 0) + 1);
// tags
|