Changes In Branch nodejs Through [a3687f4fd3] Excluding Merge-Ins
This is equivalent to a diff from 4d2953e2f6 to a3687f4fd3
2018-10-10
| ||
09:36 | Minimal file to use in node check-in: b79bb889b9 user: IllusionPerdu tags: graphspell, njs, nodejs | |
09:19 | Some change to javascript to work in node check-in: a3687f4fd3 user: IllusionPerdu tags: graphspell, njs, nodejs | |
09:03 | [fr] test faux positif check-in: bdcc97b45b user: olr tags: trunk, fr | |
2018-10-09
| ||
11:07 | [graphspell][js] useless comments check-in: 4d2953e2f6 user: olr tags: trunk, graphspell | |
08:54 | [graphspell][js] performance tests check-in: 14ed269c7c user: olr tags: trunk, graphspell | |
Modified graphspell-js/char_player.js from [626d7d161b] to [27827e9132].
1 2 3 4 5 6 7 8 9 10 | // list of similar chars // useful for suggestion mechanism ${map} var char_player = { _xTransCharsForSpelling: new Map([ ['ſ', 's'], ['ffi', 'ffi'], ['ffl', 'ffl'], ['ff', 'ff'], ['ſt', 'ft'], ['fi', 'fi'], ['fl', 'fl'], ['st', 'st'] | > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 | // list of similar chars // useful for suggestion mechanism /* jshint esversion:6 */ /* jslint esversion:6 */ ${map} var char_player = { _xTransCharsForSpelling: new Map([ ['ſ', 's'], ['ffi', 'ffi'], ['ffl', 'ffl'], ['ff', 'ff'], ['ſt', 'ft'], ['fi', 'fi'], ['fl', 'fl'], ['st', 'st'] |
︙ | ︙ | |||
21 22 23 24 25 26 27 | _xTransCharsForSimplification: new Map([ ['à', 'a'], ['é', 'e'], ['î', 'i'], ['ô', 'o'], ['û', 'u'], ['ÿ', 'i'], ['y', 'i'], ['â', 'a'], ['è', 'e'], ['ï', 'i'], ['ö', 'o'], ['ù', 'u'], ['ŷ', 'i'], ['ä', 'a'], ['ê', 'e'], ['í', 'i'], ['ó', 'o'], ['ü', 'u'], ['ý', 'i'], ['á', 'a'], ['ë', 'e'], ['ì', 'i'], ['ò', 'o'], ['ú', 'u'], ['ỳ', 'i'], ['ā', 'a'], ['ē', 'e'], ['ī', 'i'], ['ō', 'o'], ['ū', 'u'], ['ȳ', 'i'], ['ç', 'c'], ['ñ', 'n'], ['k', 'q'], ['w', 'v'], | | | 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | _xTransCharsForSimplification: new Map([ ['à', 'a'], ['é', 'e'], ['î', 'i'], ['ô', 'o'], ['û', 'u'], ['ÿ', 'i'], ['y', 'i'], ['â', 'a'], ['è', 'e'], ['ï', 'i'], ['ö', 'o'], ['ù', 'u'], ['ŷ', 'i'], ['ä', 'a'], ['ê', 'e'], ['í', 'i'], ['ó', 'o'], ['ü', 'u'], ['ý', 'i'], ['á', 'a'], ['ë', 'e'], ['ì', 'i'], ['ò', 'o'], ['ú', 'u'], ['ỳ', 'i'], ['ā', 'a'], ['ē', 'e'], ['ī', 'i'], ['ō', 'o'], ['ū', 'u'], ['ȳ', 'i'], ['ç', 'c'], ['ñ', 'n'], ['k', 'q'], ['w', 'v'], ['œ', 'oe'], ['æ', 'ae'], ['ſ', 's'], ['ffi', 'ffi'], ['ffl', 'ffl'], ['ff', 'ff'], ['ſt', 'ft'], ['fi', 'fi'], ['fl', 'fl'], ['st', 'st'] ]), simplifyWord: function (sWord) { // word simplication before calculating distance between words sWord = sWord.toLowerCase(); sWord = [...sWord].map(c => this._xTransCharsForSimplification.gl_get(c, c)).join(''); |
︙ | ︙ | |||
100 101 102 103 104 105 106 | ["Ë", "EeÉéÈèÊêËëĒēŒœ"], ["f", "fF"], ["F", "Ff"], ["g", "gGjJĵĴ"], ["G", "GgJjĴĵ"], | | | 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 | ["Ë", "EeÉéÈèÊêËëĒēŒœ"], ["f", "fF"], ["F", "Ff"], ["g", "gGjJĵĴ"], ["G", "GgJjĴĵ"], ["h", "hH"], ["H", "Hh"], ["i", "iIîÎïÏyYíÍìÌīĪÿŸ"], ["I", "IiÎîÏïYyÍíÌìĪīŸÿ"], ["î", "iIîÎïÏyYíÍìÌīĪÿŸ"], ["Î", "IiÎîÏïYyÍíÌìĪīŸÿ"], |
︙ | ︙ | |||
378 379 380 381 382 383 384 | }, // Other functions filterSugg: function (aSugg) { return aSugg.filter((sSugg) => { return !sSugg.endsWith("è") && !sSugg.endsWith("È"); }); } | | | | 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 | }, // Other functions filterSugg: function (aSugg) { return aSugg.filter((sSugg) => { return !sSugg.endsWith("è") && !sSugg.endsWith("È"); }); } }; if (typeof exports !== 'undefined') { exports._xTransCharsForSpelling = char_player._xTransCharsForSpelling; exports.spellingNormalization = char_player.spellingNormalization; exports._xTransCharsForSimplification = char_player._xTransCharsForSimplification; exports.simplifyWord = char_player.simplifyWord; exports.aVowel = char_player.aVowel; exports.aConsonant = char_player.aConsonant; exports.aDouble = char_player.aDouble; |
︙ | ︙ |
Modified graphspell-js/dawg.js from [d94e6b7163] to [56edbc7aa3].
1 2 3 4 5 6 7 8 9 10 11 12 | // JavaScript // FSA DICTIONARY BUILDER // // by Olivier R. // License: MPL 2 // // This tool encodes lexicon into an indexable binary dictionary // Input files MUST be encoded in UTF-8. "use strict"; | > > > > | > | | < | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | // JavaScript // FSA DICTIONARY BUILDER // // by Olivier R. // License: MPL 2 // // This tool encodes lexicon into an indexable binary dictionary // Input files MUST be encoded in UTF-8. /* jshint esversion:6, -W097 */ /* jslint esversion:6 */ /* global require, exports, console, helpers */ "use strict"; if(typeof process !== 'undefined') { var str_transform = require('./str_transform.js'); } else if (typeof require !== 'undefined') { var str_transform = require('resource://grammalecte/graphspell/str_transform.js'); } ${map} class DAWG { /* DIRECT ACYCLIC WORD GRAPH This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115) |
︙ | ︙ | |||
96 97 98 99 100 101 102 | let lWord = []; for (let [sFlex, iAff, iTag] of lEntry) { let lTemp = []; for (let c of sFlex) { lTemp.push(dChar.get(c)); } lTemp.push(iAff+nChar); | | | 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 | let lWord = []; for (let [sFlex, iAff, iTag] of lEntry) { let lTemp = []; for (let c of sFlex) { lTemp.push(dChar.get(c)); } lTemp.push(iAff+nChar); lTemp.push(iTag+nChar+nAff); lWord.push(lTemp); } lEntry.length = 0; // clear the array // Dictionary of arc values occurrency, to sort arcs of each node let lKeyVal = []; for (let c of dChar.keys()) { lKeyVal.push([dChar.get(c), dCharOccur.get(c)]); } |
︙ | ︙ | |||
426 427 428 429 430 431 432 | this.nNextId += 1; return this.nNextId-1; }, reset: function () { this.nNextId = 0; } | | | 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 | this.nNextId += 1; return this.nNextId-1; }, reset: function () { this.nNextId = 0; } }; class DawgNode { constructor () { this.i = oNodeCounter.getId(); this.final = false; |
︙ | ︙ | |||
540 541 542 543 544 545 546 | convValueToHexString (nVal, nByte) { // nVal: value to convert, nByte: number of bytes let sHexVal = nVal.toString(16); // conversion to hexadecimal string //console.log(`value: ${nVal} in ${nByte} bytes`); if (sHexVal.length < (nByte*2)) { return "0".repeat((nByte*2) - sHexVal.length) + sHexVal; } else if (sHexVal.length == (nByte*2)) { | | | 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 | convValueToHexString (nVal, nByte) { // nVal: value to convert, nByte: number of bytes let sHexVal = nVal.toString(16); // conversion to hexadecimal string //console.log(`value: ${nVal} in ${nByte} bytes`); if (sHexVal.length < (nByte*2)) { return "0".repeat((nByte*2) - sHexVal.length) + sHexVal; } else if (sHexVal.length == (nByte*2)) { return sHexVal; } else { throw "Conversion to byte string: value bigger than allowed."; } } } |
︙ | ︙ |
Modified graphspell-js/helpers.js from [6ccb02c60d] to [2ed0ee7e88].
|
| < > > | | > > > > > | | | | | | | > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | // HELPERS /* jshint esversion:6, -W097 */ /* jslint esversion:6 */ /* global require, exports, console, XMLHttpRequest */ "use strict"; var helpers = { inspect: function (o) { let sMsg = "__inspect__: " + typeof o; for (let sParam in o) { sMsg += "\n" + sParam + ": " + o.sParam; } sMsg += "\n" + JSON.stringify(o) + "\n__end__"; console.log(sMsg); }, loadFile: function (spf) { // load ressources in workers (suggested by Mozilla extensions reviewers) // for more options have a look here: https://gist.github.com/Noitidart/ec1e6b9a593ec7e3efed // if not in workers, use sdk/data.load() instead try { if(typeof process !== 'undefined' && typeof require !== 'undefined') { //console.log('loadFile(disque): ' + spf); let fs = require('fs'); return fs.readFileSync(spf, 'utf8'); } else { //console.log('loadFile: ' + spf); let xRequest; xRequest = new XMLHttpRequest(); xRequest.open('GET', spf, false); // 3rd arg is false for synchronous, sync is acceptable in workers xRequest.overrideMimeType('text/json'); xRequest.send(); return xRequest.responseText; } } catch (e) { console.error(e); return null; } }, |
︙ | ︙ | |||
57 58 59 60 61 62 63 | obj[k] = v; } return obj; } }; | | | 64 65 66 67 68 69 70 71 72 73 74 75 76 | obj[k] = v; } return obj; } }; if (typeof exports !== 'undefined') { exports.inspect = helpers.inspect; exports.loadFile = helpers.loadFile; exports.objectToMap = helpers.objectToMap; exports.mapToObject = helpers.mapToObject; } |
Modified graphspell-js/ibdawg.js from [26e5034903] to [ad47fc175d].
|
| | > > | | | > > > | | | | < | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | // IBDAWG /* jshint esversion:6, -W097 */ /* jslint esversion:6 */ /* global require, exports, console*/ "use strict"; if(typeof process !== 'undefined') { var str_transform = require('./str_transform.js'); var helpers = require('./helpers.js'); var char_player = require('./char_player.js'); } else if (typeof require !== 'undefined') { var str_transform = require('resource://grammalecte/graphspell/str_transform.js'); var helpers = require('resource://grammalecte/graphspell/helpers.js'); var char_player = require('resource://grammalecte/graphspell/char_player.js'); } // Don’t remove <string>. Necessary in TB. ${string} ${map} ${set} |
︙ | ︙ | |||
219 220 221 222 223 224 225 | "l2grams": this.l2grams }; return oJSON; } isValidToken (sToken) { // checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked) | | | 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 | "l2grams": this.l2grams }; return oJSON; } isValidToken (sToken) { // checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked) sToken = char_player.spellingNormalization(sToken); if (this.isValid(sToken)) { return true; } if (sToken.includes("-")) { if (sToken.gl_count("-") > 4) { return true; } |
︙ | ︙ | |||
296 297 298 299 300 301 302 | } } return Boolean(this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask); } getMorph (sWord) { // retrieves morphologies list, different casing allowed | | | | | < | | 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 | } } return Boolean(this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask); } getMorph (sWord) { // retrieves morphologies list, different casing allowed sWord = char_player.spellingNormalization(sWord); let l = this.morph(sWord); if (sWord[0].gl_isUpperCase()) { l.push(...this.morph(sWord.toLowerCase())); if (sWord.gl_isUpperCase() && sWord.length > 1) { l.push(...this.morph(sWord.gl_toCapitalize())); } } return l; } suggest (sWord, nSuggLimit=10) { // returns a array of suggestions for <sWord> //console.time("Suggestions for " + sWord + "); sWord = char_player.spellingNormalization(sWord); let sPfx = ""; let sSfx = ""; [sPfx, sWord, sSfx] = char_player.cut(sWord); let nMaxSwitch = Math.max(Math.floor(sWord.length / 3), 1); let nMaxDel = Math.floor(sWord.length / 5); let nMaxHardRepl = Math.max(Math.floor((sWord.length - 5) / 4), 1); let nMaxJump = Math.max(Math.floor(sWord.length / 4), 1); let oSuggResult = new SuggResult(sWord); this._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump); let aSugg = oSuggResult.getSuggestions(nSuggLimit); if (sSfx || sPfx) { // we add what we removed return aSugg.map( (sSugg) => { return sPfx + sSugg + sSfx; } ); } //console.timeEnd("Suggestions for " + sWord + "); return aSugg; } _suggest (oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nMaxJump=0, nDist=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=false) { // returns a set of suggestions // recursive function if (sRemain == "") { |
︙ | ︙ | |||
587 588 589 590 591 592 593 | } iAddr = iEndArcAddr + this.nBytesNodeAddress; } } } * _getArcs1 (iAddr) { | | | 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 | } iAddr = iEndArcAddr + this.nBytesNodeAddress; } } } * _getArcs1 (iAddr) { // generator: return all arcs at <iAddr> as tuples of (nVal, iAddr) while (true) { let iEndArcAddr = iAddr+this.nBytesArc; let nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr)); yield [nRawArc & this._arcMask, this._convBytesToInteger(this.byDic.slice(iEndArcAddr, iEndArcAddr+this.nBytesNodeAddress))]; if (nRawArc & this._lastArcMask) { break; } |
︙ | ︙ | |||
628 629 630 631 632 633 634 | _lookupArcNode3 (nVal, iAddr) { // to do } } | | | 631 632 633 634 635 636 637 638 639 640 | _lookupArcNode3 (nVal, iAddr) { // to do } } if (typeof exports !== 'undefined') { exports.IBDAWG = IBDAWG; } |
Modified graphspell-js/spellchecker.js from [b26374afa0] to [6b69161b99].
1 2 3 4 5 6 7 8 9 10 11 12 13 | // Spellchecker // Wrapper for the IBDAWG class. // Useful to check several dictionaries at once. // To avoid iterating over a pile of dictionaries, it is assumed that 3 are enough: // - the main dictionary, bundled with the package // - the extended dictionary // - the community dictionary, added by an organization // - the personal dictionary, created by the user for its own convenience "use strict"; | > > > | > > | | | < | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | // Spellchecker // Wrapper for the IBDAWG class. // Useful to check several dictionaries at once. // To avoid iterating over a pile of dictionaries, it is assumed that 3 are enough: // - the main dictionary, bundled with the package // - the extended dictionary // - the community dictionary, added by an organization // - the personal dictionary, created by the user for its own convenience /* jshint esversion:6, -W097 */ /* jslint esversion:6 */ /* global require, exports, console, IBDAWG, Tokenizer */ "use strict"; if(typeof process !== 'undefined') { var ibdawg = require('./ibdawg.js'); var tokenizer = require('./tokenizer.js'); } else if (typeof require !== 'undefined') { var ibdawg = require('resource://grammalecte/graphspell/ibdawg.js'); var tokenizer = require('resource://grammalecte/graphspell/tokenizer.js'); } ${map} const dDefaultDictionaries = new Map([ ["fr", "fr-allvars.json"], ["en", "en.json"] |
︙ | ︙ | |||
62 63 64 65 66 67 68 | } } catch (e) { let sfDictionary = (typeof(dictionary) == "string") ? dictionary : dictionary.sLangName + "/" + dictionary.sFileName; if (bNecessary) { throw "Error: <" + sfDictionary + "> not loaded. " + e.message; } | | | 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | } } catch (e) { let sfDictionary = (typeof(dictionary) == "string") ? dictionary : dictionary.sLangName + "/" + dictionary.sFileName; if (bNecessary) { throw "Error: <" + sfDictionary + "> not loaded. " + e.message; } console.log("Error: <" + sfDictionary + "> not loaded."); console.log(e.message); return null; } } loadTokenizer () { if (typeof(tokenizer) !== 'undefined') { |
︙ | ︙ | |||
193 194 195 196 197 198 199 | // checks if sWord is valid (different casing tested if the first letter is a capital) if (this.oMainDic.isValid(sWord)) { return true; } if (this.bExtendedDic && this.oExtendedDic.isValid(sWord)) { return true; } | | | | 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 | // checks if sWord is valid (different casing tested if the first letter is a capital) if (this.oMainDic.isValid(sWord)) { return true; } if (this.bExtendedDic && this.oExtendedDic.isValid(sWord)) { return true; } if (this.bCommunityDic && this.oCommunityDic.isValid(sWord)) { return true; } if (this.bPersonalDic && this.oPersonalDic.isValid(sWord)) { return true; } return false; } lookup (sWord) { // checks if sWord is in dictionary as is (strict verification) if (this.oMainDic.lookup(sWord)) { return true; } if (this.bExtendedDic && this.oExtendedDic.lookup(sWord)) { return true; } if (this.bCommunityDic && this.oCommunityDic.lookup(sWord)) { return true; } if (this.bPersonalDic && this.oPersonalDic.lookup(sWord)) { return true; } return false; } |
︙ | ︙ | |||
269 270 271 272 273 274 275 | if (this.bPersonalDic) { yield this.oPersonalDic.suggest(sWord, nSuggLimit); } } * select (sFlexPattern="", sTagsPattern="") { // generator: returns all entries which flexion fits <sFlexPattern> and morphology fits <sTagsPattern> | | | 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 | if (this.bPersonalDic) { yield this.oPersonalDic.suggest(sWord, nSuggLimit); } } * select (sFlexPattern="", sTagsPattern="") { // generator: returns all entries which flexion fits <sFlexPattern> and morphology fits <sTagsPattern> yield* this.oMainDic.select(sFlexPattern, sTagsPattern); if (this.bExtendedDic) { yield* this.oExtendedDic.select(sFlexPattern, sTagsPattern); } if (this.bCommunityDic) { yield* this.oCommunityDic.select(sFlexPattern, sTagsPattern); } if (this.bPersonalDic) { |
︙ | ︙ | |||
297 298 299 300 301 302 303 | if (this.bPersonalDic) { lResult.push(...this.oPersonalDic.getSimilarEntries(sWord, nSuggLimit)); } return lResult; } } | | | 301 302 303 304 305 306 307 308 309 310 | if (this.bPersonalDic) { lResult.push(...this.oPersonalDic.getSimilarEntries(sWord, nSuggLimit)); } return lResult; } } if (typeof exports !== 'undefined') { exports.SpellChecker = SpellChecker; } |
Modified graphspell-js/str_transform.js from [63ae767339] to [2ff1a56a97].
|
| | > > | > | 1 2 3 4 5 6 7 8 9 10 11 12 | // STRING TRANSFORMATION /* jshint esversion:6, -W097 */ /* jslint esversion:6 */ /* global exports, console */ "use strict"; // Note: 48 is the ASCII code for "0" var str_transform = { |
︙ | ︙ | |||
216 217 218 219 220 221 222 | let [sPfxCode, sSfxCode] = sAffCode.split('/'); sWord = sPfxCode.slice(1) + sWord.slice(sPfxCode.charCodeAt(0)-48); return sSfxCode[0] == '0' ? sWord + sSfxCode.slice(1) : sWord.slice(0, -(sSfxCode.charCodeAt(0)-48)) + sSfxCode.slice(1); } }; | | | 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 | let [sPfxCode, sSfxCode] = sAffCode.split('/'); sWord = sPfxCode.slice(1) + sWord.slice(sPfxCode.charCodeAt(0)-48); return sSfxCode[0] == '0' ? sWord + sSfxCode.slice(1) : sWord.slice(0, -(sSfxCode.charCodeAt(0)-48)) + sSfxCode.slice(1); } }; if (typeof exports !== 'undefined') { exports.longestCommonSubstring = str_transform.longestCommonSubstring; exports.distanceDamerauLevenshtein = str_transform.distanceDamerauLevenshtein; exports.distanceDamerauLevenshtein2 = str_transform.distanceDamerauLevenshtein2; exports.showDistance = str_transform.showDistance; exports.changeWordWithSuffixCode = str_transform.changeWordWithSuffixCode; exports.changeWordWithAffixCode = str_transform.changeWordWithAffixCode; exports.defineAffixCode = str_transform.defineAffixCode; exports.defineSuffixCode = str_transform.defineSuffixCode; } |
Modified graphspell-js/tokenizer.js from [88bacac87d] to [541689c69f].
1 2 | // JavaScript // Very simple tokenizer | > > | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 | // JavaScript // Very simple tokenizer /* jshint esversion:6, -W097 */ /* jslint esversion:6 */ /*global require, exports, console*/ "use strict"; const aTkzPatterns = { // All regexps must start with ^. "default": |
︙ | ︙ | |||
68 69 70 71 72 73 74 | while (sText) { let iCut = 1; for (let [zRegex, sType] of this.aRules) { if (sType !== "SPACE" || bWithSpaces) { try { if ((m = zRegex.exec(sText)) !== null) { iToken += 1; | | | | 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 | while (sText) { let iCut = 1; for (let [zRegex, sType] of this.aRules) { if (sType !== "SPACE" || bWithSpaces) { try { if ((m = zRegex.exec(sText)) !== null) { iToken += 1; yield { "i": iToken, "sType": sType, "sValue": m[0], "nStart": iNext, "nEnd": iNext + m[0].length }; iCut = m[0].length; break; } } catch (e) { console.error(e); } } } iNext += iCut; sText = sText.slice(iCut); } if (bStartEndToken) { yield { "i": iToken+1, "sType": "INFO", "sValue": "<end>", "nStart": iEnd, "nEnd": iEnd, "lMorph": ["<end>"] }; } } } if (typeof exports !== 'undefined') { exports.Tokenizer = Tokenizer; } |