55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
|
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
|
-
+
-
|
this.sLang = "default";
}
this.aRules = aTkzPatterns[this.sLang];
}
* genTokens (sText, bStartEndToken=false, bWithSpaces=false) {
let m;
let iToken;
let iToken = 0;
let iNext = 0;
let iEnd = sText.length;
if (bStartEndToken) {
yield { "i": 0, "sType": "INFO", "sValue": "<start>", "nStart": 0, "nEnd": 0, "lMorph": ["<start>"] };
}
while (sText) {
let iCut = 1;
let iToken = 0;
for (let [zRegex, sType] of this.aRules) {
if (sType !== "SPACE" || bWithSpaces) {
try {
if ((m = zRegex.exec(sText)) !== null) {
iToken += 1;
yield { "i": iToken, "sType": sType, "sValue": m[0], "nStart": iNext, "nEnd": iNext + m[0].length }
iCut = m[0].length;
|