1
2
3
4
5
6
7
8
9
10
11
|
// JavaScript
// Very simple tokenizer
/*jslint esversion: 6*/
/*global require,exports*/
"use strict";
const aTkzPatterns = {
// All regexps must start with ^.
"default":
|
>
>
|
|
|
1
2
3
4
5
6
7
8
9
10
11
12
13
|
// JavaScript
// Very simple tokenizer
/* jshint esversion:6, -W097 */
/* jslint esversion:6 */
/*global require, exports, console*/
"use strict";
const aTkzPatterns = {
// All regexps must start with ^.
"default":
|
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
while (sText) {
let iCut = 1;
for (let [zRegex, sType] of this.aRules) {
if (sType !== "SPACE" || bWithSpaces) {
try {
if ((m = zRegex.exec(sText)) !== null) {
iToken += 1;
yield { "i": iToken, "sType": sType, "sValue": m[0], "nStart": iNext, "nEnd": iNext + m[0].length }
iCut = m[0].length;
break;
}
}
catch (e) {
console.error(e);
}
}
}
iNext += iCut;
sText = sText.slice(iCut);
}
if (bStartEndToken) {
yield { "i": iToken+1, "sType": "INFO", "sValue": "<end>", "nStart": iEnd, "nEnd": iEnd, "lMorph": ["<end>"] };
}
}
}
if (typeof(exports) !== 'undefined') {
exports.Tokenizer = Tokenizer;
}
|
|
|
|
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
while (sText) {
let iCut = 1;
for (let [zRegex, sType] of this.aRules) {
if (sType !== "SPACE" || bWithSpaces) {
try {
if ((m = zRegex.exec(sText)) !== null) {
iToken += 1;
yield { "i": iToken, "sType": sType, "sValue": m[0], "nStart": iNext, "nEnd": iNext + m[0].length };
iCut = m[0].length;
break;
}
}
catch (e) {
console.error(e);
}
}
}
iNext += iCut;
sText = sText.slice(iCut);
}
if (bStartEndToken) {
yield { "i": iToken+1, "sType": "INFO", "sValue": "<end>", "nStart": iEnd, "nEnd": iEnd, "lMorph": ["<end>"] };
}
}
}
if (typeof exports !== 'undefined') {
exports.Tokenizer = Tokenizer;
}
|