| 
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186 | 
159
160
161
162
163
164
165
166
167
168
169
170
171
172
 | 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 | 
    },
    //// Parsing
    parse: function (sText, sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false) {
        let oText = new TextParser(sText);
        return oText.parse(sCountry, bDebug, dOptions, bContext);
}
};
class TextParser {
    constructor (sText) {    },    _zEndOfSentence: new RegExp ('([.?!:;…][   .?!…»«“”"‘’)–—]+(?=[A-ZÉÈÎÔ])|.$)', "g"),    _zBeginOfParagraph: new RegExp ("^[-  –—.,;?!…]*", "ig"),    _zEndOfParagraph: new RegExp ("[-  .,;?!…–—]*$", "ig"),    getSentenceBoundaries: function* (sText) {        let mBeginOfSentence = this._zBeginOfParagraph.exec(sText);        let iStart = this._zBeginOfParagraph.lastIndex;        let m;        while ((m = this._zEndOfSentence.exec(sText)) !== null) {            yield [iStart, this._zEndOfSentence.lastIndex];            iStart = this._zEndOfSentence.lastIndex;        } | 
| 
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253 | 
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
 | 
-
+
 | 
            this.sText = this.sText.replace(/‑/g, "-"); // nobreakdash
        }
        if (this.sText.includes("@@")) {
            this.sText = this.sText.replace(/@@+/g, "");
        }
        // parse sentence
        for (let [iStart, iEnd] of for (let [iStart, iEnd] of text.getSentenceBoundaries(this.sText)) {
            try {
                this.sSentence = this.sText.slice(iStart, iEnd);
                this.sSentence0 = this.sText0.slice(iStart, iEnd);
                this.nOffsetWithinParagraph = iStart;
                this.lToken = Array.from(_oTokenizer.genTokens(this.sSentence, true));
                this.dTokenPos.clear();
                for (let dToken of this.lToken) {gc_engine.getSentenceBoundaries(this.sText)) { |