Index: gc_core/js/lang_core/gc_engine.js ================================================================== --- gc_core/js/lang_core/gc_engine.js +++ gc_core/js/lang_core/gc_engine.js @@ -158,13 +158,14 @@ _dOptions = gc_options.getOptions(_sAppContext).gl_shallowCopy(); }, //// Parsing - parse: function (sText, sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false) { + parse: function (sText, sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false, bFullInfo=false) { + // init point to analyse and returns an iterable of errors or (with option ) a list of sentences with tokens and errors let oText = new TextParser(sText); - return oText.parse(sCountry, bDebug, dOptions, bContext); + return oText.parse(sCountry, bDebug, dOptions, bContext, bFullInfo); } }; class TextParser { @@ -174,14 +175,15 @@ this.sText0 = sText; this.sSentence = ""; this.sSentence0 = ""; this.nOffsetWithinParagraph = 0; this.lToken = []; - this.dTokenPos = new Map(); - this.dTags = new Map(); - this.dError = new Map(); - this.dErrorPriority = new Map(); // Key = position; value = priority + this.dTokenPos = new Map(); // {position: token} + this.dTags = new Map(); // {position: tags} + this.dError = new Map(); // {position: error} + this.dSentenceError = new Map(); // {position: error} (for the current sentence only) + this.dErrorPriority = new Map(); // {position: priority of the current error} } asString () { let s = "===== TEXT =====\n"; s += "sentence: " + this.sSentence0 + "\n"; @@ -197,59 +199,79 @@ s += "\n"; } return s; } - parse (sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false) { - // analyses the paragraph sText and returns list of errors + parse (sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false, bFullInfo=false) { + // analyses and returns an iterable of errors or (with option ) a list of sentences with tokens and errors let dOpt = dOptions || _dOptions; let bShowRuleId = option('idrule'); // parse paragraph try { this.parseText(this.sText, this.sText0, true, 0, sCountry, dOpt, bShowRuleId, bDebug, bContext); } catch (e) { console.error(e); } - - // cleanup - if (this.sText.includes(" ")) { - this.sText = this.sText.replace(/ /g, ' '); // nbsp - } - if (this.sText.includes(" ")) { - this.sText = this.sText.replace(/ /g, ' '); // snbsp - } - if (this.sText.includes("'")) { - this.sText = this.sText.replace(/'/g, "’"); - } - if (this.sText.includes("‑")) { - this.sText = this.sText.replace(/‑/g, "-"); // nobreakdash - } - if (this.sText.includes("@@")) { - this.sText = this.sText.replace(/@@+/g, ""); - } - // parse sentence - for (let [iStart, iEnd] of text.getSentenceBoundaries(this.sText)) { + let sText = this._getCleanText(); + let lSentences = []; + let oSentence = null; + for (let [iStart, iEnd] of text.getSentenceBoundaries(sText)) { try { - this.sSentence = this.sText.slice(iStart, iEnd); + this.sSentence = sText.slice(iStart, iEnd); this.sSentence0 = this.sText0.slice(iStart, iEnd); this.nOffsetWithinParagraph = iStart; this.lToken = Array.from(_oTokenizer.genTokens(this.sSentence, true)); this.dTokenPos.clear(); for (let dToken of this.lToken) { if (dToken["sType"] != "INFO") { this.dTokenPos.set(dToken["nStart"], dToken); } } + if (bFullInfo) { + oSentence = { "nStart": iStart, "nEnd": iEnd, "sSentence": this.sSentence, "lToken": Array.from(this.lToken) }; + // the list of tokens is duplicated, to keep all tokens from being deleted when analysis + } this.parseText(this.sSentence, this.sSentence0, false, iStart, sCountry, dOpt, bShowRuleId, bDebug, bContext); + if (bFullInfo) { + oSentence["aGrammarErrors"] = Array.from(this.dSentenceError.values()); + lSentences.push(oSentence); + this.dSentenceError.clear(); + } } catch (e) { console.error(e); } } - return Array.from(this.dError.values()); + if (bFullInfo) { + // Grammar checking and sentence analysis + return lSentences; + } else { + // Grammar checking only + return Array.from(this.dError.values()); + } + } + + _getCleanText () { + let sText = this.sText; + if (sText.includes(" ")) { + sText = sText.replace(/ /g, ' '); // nbsp + } + if (sText.includes(" ")) { + sText = sText.replace(/ /g, ' '); // snbsp + } + if (sText.includes("'")) { + sText = sText.replace(/'/g, "’"); + } + if (sText.includes("‑")) { + sText = sText.replace(/‑/g, "-"); // nobreakdash + } + if (sText.includes("@@")) { + sText = sText.replace(/@@+/g, ""); + } + return sText; } parseText (sText, sText0, bParagraph, nOffset, sCountry, dOptions, bShowRuleId, bDebug, bContext) { let bChange = false; let m; @@ -286,10 +308,11 @@ //console.log("-> error detected in " + sLineId + "\nzRegex: " + zRegex.source); let nErrorStart = nOffset + m.start[eAct[0]]; if (!this.dError.has(nErrorStart) || nPriority > this.dErrorPriority.get(nErrorStart)) { this.dError.set(nErrorStart, this._createErrorFromRegex(sText, sText0, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bShowRuleId, sOption, bContext)); this.dErrorPriority.set(nErrorStart, nPriority); + this.dSentenceError.set(nErrorStart, this.dError.get(nErrorStart)); } break; case "~": // text processor //console.log("-> text processor by " + sLineId + "\nzRegex: " + zRegex.source); @@ -610,10 +633,11 @@ let nErrorStart = this.nOffsetWithinParagraph + ((cStartLimit == "<") ? this.lToken[nTokenErrorStart]["nStart"] : this.lToken[nTokenErrorStart]["nEnd"]); let nErrorEnd = this.nOffsetWithinParagraph + ((cEndLimit == ">") ? this.lToken[nTokenErrorEnd]["nEnd"] : this.lToken[nTokenErrorEnd]["nStart"]); if (!this.dError.has(nErrorStart) || nPriority > this.dErrorPriority.gl_get(nErrorStart, -1)) { this.dError.set(nErrorStart, this._createErrorFromTokens(sWhat, nTokenOffset, nLastToken, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, bCaseSvty, sMessage, sURL, bShowRuleId, sOption, bContext)); this.dErrorPriority.set(nErrorStart, nPriority); + this.dSentenceError.set(nErrorStart, this.dError.get(nErrorStart)); if (bDebug) { console.log(" NEW_ERROR: ", this.dError.get(nErrorStart)); } } }