Grammalecte  Diff

Differences From Artifact [842c50636b]:

To Artifact [0ae80cfb32]:


159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
    },

    //// Parsing

    parse: function (sText, sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false) {
        let oText = new TextParser(sText);
        return oText.parse(sCountry, bDebug, dOptions, bContext);
    },

    _zEndOfSentence: new RegExp ('([.?!:;…][   .?!…»«“”"‘’)–—]+(?=[A-ZÉÈÎÔ])|.$)', "g"),
    _zBeginOfParagraph: new RegExp ("^[-  –—.,;?!…]*", "ig"),
    _zEndOfParagraph: new RegExp ("[-  .,;?!…–—]*$", "ig"),

    getSentenceBoundaries: function* (sText) {
        let mBeginOfSentence = this._zBeginOfParagraph.exec(sText);
        let iStart = this._zBeginOfParagraph.lastIndex;
        let m;
        while ((m = this._zEndOfSentence.exec(sText)) !== null) {
            yield [iStart, this._zEndOfSentence.lastIndex];
            iStart = this._zEndOfSentence.lastIndex;
        }
    }
};


class TextParser {

    constructor (sText) {







<
<
<
<
<
<
<
<
<
<
<
<
<
<







159
160
161
162
163
164
165














166
167
168
169
170
171
172
    },

    //// Parsing

    parse: function (sText, sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false) {
        let oText = new TextParser(sText);
        return oText.parse(sCountry, bDebug, dOptions, bContext);














    }
};


class TextParser {

    constructor (sText) {
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
            this.sText = this.sText.replace(/‑/g, "-"); // nobreakdash
        }
        if (this.sText.includes("@@")) {
            this.sText = this.sText.replace(/@@+/g, "");
        }

        // parse sentence
        for (let [iStart, iEnd] of gc_engine.getSentenceBoundaries(this.sText)) {
            try {
                this.sSentence = this.sText.slice(iStart, iEnd);
                this.sSentence0 = this.sText0.slice(iStart, iEnd);
                this.nOffsetWithinParagraph = iStart;
                this.lToken = Array.from(_oTokenizer.genTokens(this.sSentence, true));
                this.dTokenPos.clear();
                for (let dToken of this.lToken) {







|







225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
            this.sText = this.sText.replace(/‑/g, "-"); // nobreakdash
        }
        if (this.sText.includes("@@")) {
            this.sText = this.sText.replace(/@@+/g, "");
        }

        // parse sentence
        for (let [iStart, iEnd] of text.getSentenceBoundaries(this.sText)) {
            try {
                this.sSentence = this.sText.slice(iStart, iEnd);
                this.sSentence0 = this.sText0.slice(iStart, iEnd);
                this.nOffsetWithinParagraph = iStart;
                this.lToken = Array.from(_oTokenizer.genTokens(this.sSentence, true));
                this.dTokenPos.clear();
                for (let dToken of this.lToken) {