Index: gc_core/js/lang_core/gc_engine.js ================================================================== --- gc_core/js/lang_core/gc_engine.js +++ gc_core/js/lang_core/gc_engine.js @@ -195,10 +195,12 @@ this.parseText(this.sText, this.sText0, true, 0, sCountry, dOpt, bShowRuleId, bDebug, bContext); } catch (e) { console.error(e); } + this.lTokens = null; + this.lTokens0 = null; let lParagraphErrors = null; if (bFullInfo) { lParagraphErrors = Array.from(this.dError.values()); this.dSentenceError.clear(); } @@ -209,30 +211,38 @@ for (let [iStart, iEnd] of text.getSentenceBoundaries(sText)) { try { this.sSentence = sText.slice(iStart, iEnd); this.sSentence0 = this.sText0.slice(iStart, iEnd); this.nOffsetWithinParagraph = iStart; - this.lToken = Array.from(gc_engine.oTokenizer.genTokens(this.sSentence, true)); + this.lTokens = Array.from(gc_engine.oTokenizer.genTokens(this.sSentence, true)); this.dTokenPos.clear(); - for (let dToken of this.lToken) { + for (let dToken of this.lTokens) { if (dToken["sType"] != "INFO") { this.dTokenPos.set(dToken["nStart"], dToken); } } if (bFullInfo) { - oSentence = { "nStart": iStart, "nEnd": iEnd, "sSentence": this.sSentence, "lToken": Array.from(this.lToken) }; - for (let oToken of oSentence["lToken"]) { - if (oToken["sType"] == "WORD") { - oToken["bValidToken"] = gc_engine.oSpellChecker.isValidToken(oToken["sValue"]); - } - } + this.lTokens0 = Array.from(this.lTokens); // the list of tokens is duplicated, to keep all tokens from being deleted when analysis } this.parseText(this.sSentence, this.sSentence0, false, iStart, sCountry, dOpt, bShowRuleId, bDebug, bContext); if (bFullInfo) { - oSentence["lGrammarErrors"] = Array.from(this.dSentenceError.values()); - lSentences.push(oSentence); + for (let oToken of this.lTokens0) { + if (oToken["sType"] == "WORD") { + oToken["bValidToken"] = gc_engine.oSpellChecker.isValidToken(oToken["sValue"]); + } + if (!oToken.hasOwnProperty("lMorph")) { + oToken["lMorph"] = gc_engine.oSpellChecker.getMorph(oToken["sValue"]); + } + } + lSentences.push({ + "nStart": iStart, + "nEnd": iEnd, + "sSentence": this.sSentence0, + "lTokens": this.lTokens0, + "lGrammarErrors": Array.from(this.dSentenceError.values()) + }); this.dSentenceError.clear(); } } catch (e) { console.error(e); @@ -372,13 +382,13 @@ } if (this.dTokenPos.gl_get(oToken["nStart"], {}).hasOwnProperty("aTags")) { oToken["aTags"] = this.dTokenPos.get(oToken["nStart"])["aTags"]; } } - this.lToken = lNewToken; + this.lTokens = lNewToken; this.dTokenPos.clear(); - for (let oToken of this.lToken) { + for (let oToken of this.lTokens) { if (oToken["sType"] != "INFO") { this.dTokenPos.set(oToken["nStart"], oToken); } } if (bDebug) { @@ -614,11 +624,11 @@ parseGraph (oGraph, sCountry="${country_default}", dOptions=null, bShowRuleId=false, bDebug=false, bContext=false) { // parse graph with tokens from the text and execute actions encountered let lPointer = []; let bTagAndRewrite = false; try { - for (let [iToken, oToken] of this.lToken.entries()) { + for (let [iToken, oToken] of this.lTokens.entries()) { if (bDebug) { console.log("TOKEN: " + oToken["sValue"]); } // check arcs for each existing pointer let lNextPointer = []; @@ -666,21 +676,21 @@ // Disambiguator [ option, condition, "=", replacement/suggestion/action ] // Tag [ option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ] // Immunity [ option, condition, "!", "", iTokenStart, iTokenEnd ] // Test [ option, condition, ">", "" ] if (!sOption || dOptions.gl_get(sOption, false)) { - bCondMemo = !sFuncCond || gc_functions[sFuncCond](this.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, this.dTags, this.sSentence, this.sSentence0); - //bCondMemo = !sFuncCond || oEvalFunc[sFuncCond](this.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, this.dTags, this.sSentence, this.sSentence0); + bCondMemo = !sFuncCond || gc_functions[sFuncCond](this.lTokens, nTokenOffset, nLastToken, sCountry, bCondMemo, this.dTags, this.sSentence, this.sSentence0); + //bCondMemo = !sFuncCond || oEvalFunc[sFuncCond](this.lTokens, nTokenOffset, nLastToken, sCountry, bCondMemo, this.dTags, this.sSentence, this.sSentence0); if (bCondMemo) { if (cActionType == "-") { // grammar error let [iTokenStart, iTokenEnd, cStartLimit, cEndLimit, bCaseSvty, nPriority, sMessage, iURL] = eAct; let nTokenErrorStart = (iTokenStart > 0) ? nTokenOffset + iTokenStart : nLastToken + iTokenStart; - if (!this.lToken[nTokenErrorStart].hasOwnProperty("bImmune")) { + if (!this.lTokens[nTokenErrorStart].hasOwnProperty("bImmune")) { let nTokenErrorEnd = (iTokenEnd > 0) ? nTokenOffset + iTokenEnd : nLastToken + iTokenEnd; - let nErrorStart = this.nOffsetWithinParagraph + ((cStartLimit == "<") ? this.lToken[nTokenErrorStart]["nStart"] : this.lToken[nTokenErrorStart]["nEnd"]); - let nErrorEnd = this.nOffsetWithinParagraph + ((cEndLimit == ">") ? this.lToken[nTokenErrorEnd]["nEnd"] : this.lToken[nTokenErrorEnd]["nStart"]); + let nErrorStart = this.nOffsetWithinParagraph + ((cStartLimit == "<") ? this.lTokens[nTokenErrorStart]["nStart"] : this.lTokens[nTokenErrorStart]["nEnd"]); + let nErrorEnd = this.nOffsetWithinParagraph + ((cEndLimit == ">") ? this.lTokens[nTokenErrorEnd]["nEnd"] : this.lTokens[nTokenErrorEnd]["nStart"]); if (!this.dError.has(nErrorStart) || nPriority > this.dErrorPriority.gl_get(nErrorStart, -1)) { this.dError.set(nErrorStart, this._createErrorFromTokens(sWhat, nTokenOffset, nLastToken, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, bCaseSvty, sMessage, gc_rules_graph.dURL[iURL], bShowRuleId, sOption, bContext)); this.dErrorPriority.set(nErrorStart, nPriority); this.dSentenceError.set(nErrorStart, this.dError.get(nErrorStart)); @@ -695,19 +705,19 @@ let nTokenStart = (eAct[0] > 0) ? nTokenOffset + eAct[0] : nLastToken + eAct[0]; let nTokenEnd = (eAct[1] > 0) ? nTokenOffset + eAct[1] : nLastToken + eAct[1]; this._tagAndPrepareTokenForRewriting(sWhat, nTokenStart, nTokenEnd, nTokenOffset, nLastToken, eAct[2], bDebug); bChange = true; if (bDebug) { - console.log(` TEXT_PROCESSOR: [${this.lToken[nTokenStart]["sValue"]}:${this.lToken[nTokenEnd]["sValue"]}] > ${sWhat}`); + console.log(` TEXT_PROCESSOR: [${this.lTokens[nTokenStart]["sValue"]}:${this.lTokens[nTokenEnd]["sValue"]}] > ${sWhat}`); } } else if (cActionType == "=") { // disambiguation - gc_functions[sWhat](this.lToken, nTokenOffset, nLastToken); - //oEvalFunc[sWhat](this.lToken, nTokenOffset, nLastToken); + gc_functions[sWhat](this.lTokens, nTokenOffset, nLastToken); + //oEvalFunc[sWhat](this.lTokens, nTokenOffset, nLastToken); if (bDebug) { - console.log(` DISAMBIGUATOR: (${sWhat}) [${this.lToken[nTokenOffset+1]["sValue"]}:${this.lToken[nLastToken]["sValue"]}]`); + console.log(` DISAMBIGUATOR: (${sWhat}) [${this.lTokens[nTokenOffset+1]["sValue"]}:${this.lTokens[nLastToken]["sValue"]}]`); } } else if (cActionType == ">") { // we do nothing, this test is just a condition to apply all following actions if (bDebug) { @@ -717,18 +727,18 @@ else if (cActionType == "/") { // Tag let nTokenStart = (eAct[0] > 0) ? nTokenOffset + eAct[0] : nLastToken + eAct[0]; let nTokenEnd = (eAct[1] > 0) ? nTokenOffset + eAct[1] : nLastToken + eAct[1]; for (let i = nTokenStart; i <= nTokenEnd; i++) { - if (this.lToken[i].hasOwnProperty("aTags")) { - this.lToken[i]["aTags"].add(...sWhat.split("|")) + if (this.lTokens[i].hasOwnProperty("aTags")) { + this.lTokens[i]["aTags"].add(...sWhat.split("|")) } else { - this.lToken[i]["aTags"] = new Set(sWhat.split("|")); + this.lTokens[i]["aTags"] = new Set(sWhat.split("|")); } } if (bDebug) { - console.log(` TAG: ${sWhat} > [${this.lToken[nTokenStart]["sValue"]}:${this.lToken[nTokenEnd]["sValue"]}]`); + console.log(` TAG: ${sWhat} > [${this.lTokens[nTokenStart]["sValue"]}:${this.lTokens[nTokenEnd]["sValue"]}]`); } for (let sTag of sWhat.split("|")) { if (!this.dTags.has(sTag)) { this.dTags.set(sTag, [nTokenStart, nTokenEnd]); } else { @@ -742,19 +752,19 @@ console.log(" IMMUNITY: " + sLineId + " / " + sRuleId); } let nTokenStart = (eAct[0] > 0) ? nTokenOffset + eAct[0] : nLastToken + eAct[0]; let nTokenEnd = (eAct[1] > 0) ? nTokenOffset + eAct[1] : nLastToken + eAct[1]; if (nTokenEnd - nTokenStart == 0) { - this.lToken[nTokenStart]["bImmune"] = true; - let nErrorStart = this.nOffsetWithinParagraph + this.lToken[nTokenStart]["nStart"]; + this.lTokens[nTokenStart]["bImmune"] = true; + let nErrorStart = this.nOffsetWithinParagraph + this.lTokens[nTokenStart]["nStart"]; if (this.dError.has(nErrorStart)) { this.dError.delete(nErrorStart); } } else { for (let i = nTokenStart; i <= nTokenEnd; i++) { - this.lToken[i]["bImmune"] = true; - let nErrorStart = this.nOffsetWithinParagraph + this.lToken[i]["nStart"]; + this.lTokens[i]["bImmune"] = true; + let nErrorStart = this.nOffsetWithinParagraph + this.lTokens[i]["nStart"]; if (this.dError.has(nErrorStart)) { this.dError.delete(nErrorStart); } } } @@ -808,24 +818,24 @@ _createErrorFromTokens (sSugg, nTokenOffset, nLastToken, iFirstToken, nStart, nEnd, sLineId, sRuleId, bCaseSvty, sMsg, sURL, bShowRuleId, sOption, bContext) { // suggestions let lSugg = []; if (sSugg.startsWith("=")) { - sSugg = gc_functions[sSugg.slice(1)](this.lToken, nTokenOffset, nLastToken); - //sSugg = oEvalFunc[sSugg.slice(1)](this.lToken, nTokenOffset, nLastToken); + sSugg = gc_functions[sSugg.slice(1)](this.lTokens, nTokenOffset, nLastToken); + //sSugg = oEvalFunc[sSugg.slice(1)](this.lTokens, nTokenOffset, nLastToken); lSugg = (sSugg) ? sSugg.split("|") : []; } else if (sSugg == "_") { lSugg = []; } else { lSugg = this._expand(sSugg, nTokenOffset, nLastToken).split("|"); } - if (bCaseSvty && lSugg.length > 0 && this.lToken[iFirstToken]["sValue"].slice(0,1).gl_isUpperCase()) { + if (bCaseSvty && lSugg.length > 0 && this.lTokens[iFirstToken]["sValue"].slice(0,1).gl_isUpperCase()) { lSugg = (this.sSentence.slice(nStart, nEnd).gl_isUpperCase()) ? lSugg.map((s) => s.toUpperCase()) : capitalizeArray(lSugg); } // Message - let sMessage = (sMsg.startsWith("=")) ? gc_functions[sMsg.slice(1)](this.lToken, nTokenOffset, nLastToken) : this._expand(sMsg, nTokenOffset, nLastToken); - //let sMessage = (sMsg.startsWith("=")) ? oEvalFunc[sMsg.slice(1)](this.lToken, nTokenOffset, nLastToken) : this._expand(sMsg, nTokenOffset, nLastToken); + let sMessage = (sMsg.startsWith("=")) ? gc_functions[sMsg.slice(1)](this.lTokens, nTokenOffset, nLastToken) : this._expand(sMsg, nTokenOffset, nLastToken); + //let sMessage = (sMsg.startsWith("=")) ? oEvalFunc[sMsg.slice(1)](this.lTokens, nTokenOffset, nLastToken) : this._expand(sMsg, nTokenOffset, nLastToken); if (bShowRuleId) { sMessage += " #" + sLineId + " / " + sRuleId; } // return this._createError(nStart, nEnd, sLineId, sRuleId, sOption, sMessage, lSugg, sURL, bContext); @@ -853,13 +863,13 @@ _expand (sText, nTokenOffset, nLastToken) { let m; while ((m = /\\(-?[0-9]+)/.exec(sText)) !== null) { if (m[1].slice(0,1) == "-") { - sText = sText.replace(m[0], this.lToken[nLastToken+parseInt(m[1],10)+1]["sValue"]); + sText = sText.replace(m[0], this.lTokens[nLastToken+parseInt(m[1],10)+1]["sValue"]); } else { - sText = sText.replace(m[0], this.lToken[nTokenOffset+parseInt(m[1],10)]["sValue"]); + sText = sText.replace(m[0], this.lTokens[nTokenOffset+parseInt(m[1],10)]["sValue"]); } } return sText; } @@ -894,45 +904,45 @@ _tagAndPrepareTokenForRewriting (sWhat, nTokenRewriteStart, nTokenRewriteEnd, nTokenOffset, nLastToken, bCaseSvty, bDebug) { // text processor: rewrite tokens between and position if (sWhat === "*") { // purge text if (nTokenRewriteEnd - nTokenRewriteStart == 0) { - this.lToken[nTokenRewriteStart]["bToRemove"] = true; + this.lTokens[nTokenRewriteStart]["bToRemove"] = true; } else { for (let i = nTokenRewriteStart; i <= nTokenRewriteEnd; i++) { - this.lToken[i]["bToRemove"] = true; + this.lTokens[i]["bToRemove"] = true; } } } else if (sWhat === "␣") { // merge tokens - this.lToken[nTokenRewriteStart]["nMergeUntil"] = nTokenRewriteEnd; + this.lTokens[nTokenRewriteStart]["nMergeUntil"] = nTokenRewriteEnd; } else if (sWhat === "_") { // neutralized token if (nTokenRewriteEnd - nTokenRewriteStart == 0) { - this.lToken[nTokenRewriteStart]["sNewValue"] = "_"; + this.lTokens[nTokenRewriteStart]["sNewValue"] = "_"; } else { for (let i = nTokenRewriteStart; i <= nTokenRewriteEnd; i++) { - this.lToken[i]["sNewValue"] = "_"; + this.lTokens[i]["sNewValue"] = "_"; } } } else { if (sWhat.startsWith("=")) { - sWhat = gc_functions[sWhat.slice(1)](this.lToken, nTokenOffset, nLastToken); - //sWhat = oEvalFunc[sWhat.slice(1)](this.lToken, nTokenOffset, nLastToken); + sWhat = gc_functions[sWhat.slice(1)](this.lTokens, nTokenOffset, nLastToken); + //sWhat = oEvalFunc[sWhat.slice(1)](this.lTokens, nTokenOffset, nLastToken); } else { sWhat = this._expand(sWhat, nTokenOffset, nLastToken); } - let bUppercase = bCaseSvty && this.lToken[nTokenRewriteStart]["sValue"].slice(0,1).gl_isUpperCase(); + let bUppercase = bCaseSvty && this.lTokens[nTokenRewriteStart]["sValue"].slice(0,1).gl_isUpperCase(); if (nTokenRewriteEnd - nTokenRewriteStart == 0) { // one token if (bUppercase) { sWhat = sWhat.gl_toCapitalize(); } - this.lToken[nTokenRewriteStart]["sNewValue"] = sWhat; + this.lTokens[nTokenRewriteStart]["sNewValue"] = sWhat; } else { // several tokens let lTokenValue = sWhat.split("|"); if (lTokenValue.length != (nTokenRewriteEnd - nTokenRewriteStart + 1)) { @@ -943,16 +953,16 @@ } let j = 0; for (let i = nTokenRewriteStart; i <= nTokenRewriteEnd; i++) { let sValue = lTokenValue[j]; if (!sValue || sValue === "*") { - this.lToken[i]["bToRemove"] = true; + this.lTokens[i]["bToRemove"] = true; } else { if (bUppercase) { sValue = sValue.gl_toCapitalize(); } - this.lToken[i]["sNewValue"] = sValue; + this.lTokens[i]["sNewValue"] = sValue; } j++; } } } @@ -962,22 +972,25 @@ // rewrite the sentence, modify tokens, purge the token list if (bDebug) { console.log("REWRITE"); } let lNewToken = []; + let lNewTokens0 = []; let nMergeUntil = 0; let oMergingToken = null; - for (let [iToken, oToken] of this.lToken.entries()) { + for (let [iToken, oToken] of this.lTokens.entries()) { let bKeepToken = true; + let bKeepToken0 = true; if (oToken["sType"] != "INFO") { if (nMergeUntil && iToken <= nMergeUntil) { oMergingToken["sValue"] += " ".repeat(oToken["nStart"] - oMergingToken["nEnd"]) + oToken["sValue"]; oMergingToken["nEnd"] = oToken["nEnd"]; if (bDebug) { console.log(" MERGED TOKEN: " + oMergingToken["sValue"]); } bKeepToken = false; + bKeepToken0 = false; } if (oToken.hasOwnProperty("nMergeUntil")) { if (iToken > nMergeUntil) { // this token is not already merged with a previous token oMergingToken = oToken; } @@ -1017,16 +1030,23 @@ catch (e) { console.log(this.asString()); console.log(oToken); } } + if (this.lTokens0 !== null && bKeepToken0) { + lNewTokens0.push(oToken); + } } if (bDebug) { console.log(" TEXT REWRITED: " + this.sSentence); } - this.lToken.length = 0; - this.lToken = lNewToken; + this.lTokens.length = 0; + this.lTokens = lNewToken; + if (this.lTokens0 !== null) { + this.lTokens0.length = 0; + this.lTokens0 = lNewTokens0; + } } }; if (typeof(exports) !== 'undefined') { Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -233,11 +233,11 @@ self.sText = sText self.sText0 = sText self.sSentence = "" self.sSentence0 = "" self.nOffsetWithinParagraph = 0 - self.lToken = [] + self.lTokens = [] self.dTokenPos = {} # {position: token} self.dTags = {} # {position: tags} self.dError = {} # {position: error} self.dSentenceError = {} # {position: error} (for the current sentence only) self.dErrorPriority = {} # {position: priority of the current error} @@ -244,11 +244,11 @@ def __str__ (self): s = "===== TEXT =====\n" s += "sentence: " + self.sSentence0 + "\n" s += "now: " + self.sSentence + "\n" - for dToken in self.lToken: + for dToken in self.lTokens: s += '#{i}\t{nStart}:{nEnd}\t{sValue}\t{sType}'.format(**dToken) if "lMorph" in dToken: s += "\t" + str(dToken["lMorph"]) if "aTags" in dToken: s += "\t" + str(dToken["aTags"]) @@ -265,10 +265,12 @@ # parse paragraph try: self.parseText(self.sText, self.sText0, True, 0, sCountry, dOpt, bShowRuleId, bDebug, bContext) except: raise + self.lTokens = None + self.lTokens0 = None if bFullInfo: lParagraphErrors = list(self.dError.values()) lSentences = [] self.dSentenceError.clear() # parse sentences @@ -277,22 +279,28 @@ if 4 < (iEnd - iStart) < 2000: try: self.sSentence = sText[iStart:iEnd] self.sSentence0 = self.sText0[iStart:iEnd] self.nOffsetWithinParagraph = iStart - self.lToken = list(_oTokenizer.genTokens(self.sSentence, True)) - self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken if dToken["sType"] != "INFO" } + self.lTokens = list(_oTokenizer.genTokens(self.sSentence, True)) + self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lTokens if dToken["sType"] != "INFO" } + if bFullInfo: + self.lTokens0 = list(self.lTokens) # the list of tokens is duplicated, to keep all tokens from being deleted when analysis + self.parseText(self.sSentence, self.sSentence0, False, iStart, sCountry, dOpt, bShowRuleId, bDebug, bContext) if bFullInfo: - dSentence = { "nStart": iStart, "nEnd": iEnd, "sSentence": self.sSentence, "lToken": list(self.lToken) } - for dToken in dSentence["lToken"]: + for dToken in self.lTokens0: if dToken["sType"] == "WORD": dToken["bValidToken"] = _oSpellChecker.isValidToken(dToken["sValue"]) - # the list of tokens is duplicated, to keep all tokens from being deleted when analysis - self.parseText(self.sSentence, self.sSentence0, False, iStart, sCountry, dOpt, bShowRuleId, bDebug, bContext) - if bFullInfo: - dSentence["lGrammarErrors"] = list(self.dSentenceError.values()) - lSentences.append(dSentence) + if "lMorph" not in dToken: + dToken["lMorph"] = _oSpellChecker.getMorph(dToken["sValue"]) + lSentences.append({ + "nStart": iStart, + "nEnd": iEnd, + "sSentence": self.sSentence0, + "lTokens": self.lTokens0, + "lGrammarErrors": list(self.dSentenceError.values()) + }) self.dSentenceError.clear() except: raise if bFullInfo: # Grammar checking and sentence analysis @@ -379,18 +387,18 @@ self.sSentence = sText def update (self, sSentence, bDebug=False): "update and retokenize" self.sSentence = sSentence - lNewToken = list(_oTokenizer.genTokens(sSentence, True)) - for dToken in lNewToken: + lNewTokens = list(_oTokenizer.genTokens(sSentence, True)) + for dToken in lNewTokens: if "lMorph" in self.dTokenPos.get(dToken["nStart"], {}): dToken["lMorph"] = self.dTokenPos[dToken["nStart"]]["lMorph"] if "aTags" in self.dTokenPos.get(dToken["nStart"], {}): dToken["aTags"] = self.dTokenPos[dToken["nStart"]]["aTags"] - self.lToken = lNewToken - self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken if dToken["sType"] != "INFO" } + self.lTokens = lNewTokens + self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lTokens if dToken["sType"] != "INFO" } if bDebug: echo("UPDATE:") echo(self) def _getNextPointers (self, dToken, dGraph, dPointer, bDebug=False): @@ -550,11 +558,11 @@ def parseGraph (self, dGraph, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False): "parse graph with tokens from the text and execute actions encountered" lPointer = [] bTagAndRewrite = False - for iToken, dToken in enumerate(self.lToken): + for iToken, dToken in enumerate(self.lTokens): if bDebug: echo("TOKEN: " + dToken["sValue"]) # check arcs for each existing pointer lNextPointer = [] for dPointer in lPointer: @@ -591,20 +599,20 @@ # Disambiguator [ option, condition, "=", replacement/suggestion/action ] # Tag [ option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ] # Immunity [ option, condition, "!", "", iTokenStart, iTokenEnd ] # Test [ option, condition, ">", "" ] if not sOption or dOptions.get(sOption, False): - bCondMemo = not sFuncCond or getattr(gc_functions, sFuncCond)(self.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, self.dTags, self.sSentence, self.sSentence0) + bCondMemo = not sFuncCond or getattr(gc_functions, sFuncCond)(self.lTokens, nTokenOffset, nLastToken, sCountry, bCondMemo, self.dTags, self.sSentence, self.sSentence0) if bCondMemo: if cActionType == "-": # grammar error iTokenStart, iTokenEnd, cStartLimit, cEndLimit, bCaseSvty, nPriority, sMessage, iURL = eAct nTokenErrorStart = nTokenOffset + iTokenStart if iTokenStart > 0 else nLastToken + iTokenStart - if "bImmune" not in self.lToken[nTokenErrorStart]: + if "bImmune" not in self.lTokens[nTokenErrorStart]: nTokenErrorEnd = nTokenOffset + iTokenEnd if iTokenEnd > 0 else nLastToken + iTokenEnd - nErrorStart = self.nOffsetWithinParagraph + (self.lToken[nTokenErrorStart]["nStart"] if cStartLimit == "<" else self.lToken[nTokenErrorStart]["nEnd"]) - nErrorEnd = self.nOffsetWithinParagraph + (self.lToken[nTokenErrorEnd]["nEnd"] if cEndLimit == ">" else self.lToken[nTokenErrorEnd]["nStart"]) + nErrorStart = self.nOffsetWithinParagraph + (self.lTokens[nTokenErrorStart]["nStart"] if cStartLimit == "<" else self.lTokens[nTokenErrorStart]["nEnd"]) + nErrorEnd = self.nOffsetWithinParagraph + (self.lTokens[nTokenErrorEnd]["nEnd"] if cEndLimit == ">" else self.lTokens[nTokenErrorEnd]["nStart"]) if nErrorStart not in self.dError or nPriority > self.dErrorPriority.get(nErrorStart, -1): self.dError[nErrorStart] = self._createErrorFromTokens(sWhat, nTokenOffset, nLastToken, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, bCaseSvty, \ sMessage, _rules_graph.dURL.get(iURL, ""), bShowRuleId, sOption, bContext) self.dErrorPriority[nErrorStart] = nPriority self.dSentenceError[nErrorStart] = self.dError[nErrorStart] @@ -615,31 +623,31 @@ nTokenStart = nTokenOffset + eAct[0] if eAct[0] > 0 else nLastToken + eAct[0] nTokenEnd = nTokenOffset + eAct[1] if eAct[1] > 0 else nLastToken + eAct[1] self._tagAndPrepareTokenForRewriting(sWhat, nTokenStart, nTokenEnd, nTokenOffset, nLastToken, eAct[2], bDebug) bChange = True if bDebug: - echo(" TEXT_PROCESSOR: [{}:{}] > {}".format(self.lToken[nTokenStart]["sValue"], self.lToken[nTokenEnd]["sValue"], sWhat)) + echo(" TEXT_PROCESSOR: [{}:{}] > {}".format(self.lTokens[nTokenStart]["sValue"], self.lTokens[nTokenEnd]["sValue"], sWhat)) elif cActionType == "=": # disambiguation - getattr(gc_functions, sWhat)(self.lToken, nTokenOffset, nLastToken) + getattr(gc_functions, sWhat)(self.lTokens, nTokenOffset, nLastToken) if bDebug: - echo(" DISAMBIGUATOR: ({}) [{}:{}]".format(sWhat, self.lToken[nTokenOffset+1]["sValue"], self.lToken[nLastToken]["sValue"])) + echo(" DISAMBIGUATOR: ({}) [{}:{}]".format(sWhat, self.lTokens[nTokenOffset+1]["sValue"], self.lTokens[nLastToken]["sValue"])) elif cActionType == ">": # we do nothing, this test is just a condition to apply all following actions if bDebug: echo(" COND_OK") elif cActionType == "/": # Tag nTokenStart = nTokenOffset + eAct[0] if eAct[0] > 0 else nLastToken + eAct[0] nTokenEnd = nTokenOffset + eAct[1] if eAct[1] > 0 else nLastToken + eAct[1] for i in range(nTokenStart, nTokenEnd+1): - if "aTags" in self.lToken[i]: - self.lToken[i]["aTags"].update(sWhat.split("|")) + if "aTags" in self.lTokens[i]: + self.lTokens[i]["aTags"].update(sWhat.split("|")) else: - self.lToken[i]["aTags"] = set(sWhat.split("|")) + self.lTokens[i]["aTags"] = set(sWhat.split("|")) if bDebug: - echo(" TAG: {} > [{}:{}]".format(sWhat, self.lToken[nTokenStart]["sValue"], self.lToken[nTokenEnd]["sValue"])) + echo(" TAG: {} > [{}:{}]".format(sWhat, self.lTokens[nTokenStart]["sValue"], self.lTokens[nTokenEnd]["sValue"])) for sTag in sWhat.split("|"): if sTag not in self.dTags: self.dTags[sTag] = [nTokenStart, nTokenEnd] else: self.dTags[sTag][0] = min(nTokenStart, self.dTags[sTag][0]) @@ -649,18 +657,18 @@ if bDebug: echo(" IMMUNITY: " + sLineId + " / " + sRuleId) nTokenStart = nTokenOffset + eAct[0] if eAct[0] > 0 else nLastToken + eAct[0] nTokenEnd = nTokenOffset + eAct[1] if eAct[1] > 0 else nLastToken + eAct[1] if nTokenEnd - nTokenStart == 0: - self.lToken[nTokenStart]["bImmune"] = True - nErrorStart = self.nOffsetWithinParagraph + self.lToken[nTokenStart]["nStart"] + self.lTokens[nTokenStart]["bImmune"] = True + nErrorStart = self.nOffsetWithinParagraph + self.lTokens[nTokenStart]["nStart"] if nErrorStart in self.dError: del self.dError[nErrorStart] else: for i in range(nTokenStart, nTokenEnd+1): - self.lToken[i]["bImmune"] = True - nErrorStart = self.nOffsetWithinParagraph + self.lToken[i]["nStart"] + self.lTokens[i]["bImmune"] = True + nErrorStart = self.nOffsetWithinParagraph + self.lTokens[i]["nStart"] if nErrorStart in self.dError: del self.dError[nErrorStart] else: echo("# error: unknown action at " + sLineId) elif cActionType == ">": @@ -694,20 +702,20 @@ return self._createErrorAsDict(nStart, nEnd, sLineId, sRuleId, sOption, sMessage, lSugg, sURL, bContext) def _createErrorFromTokens (self, sSugg, nTokenOffset, nLastToken, iFirstToken, nStart, nEnd, sLineId, sRuleId, bCaseSvty, sMsg, sURL, bShowRuleId, sOption, bContext): # suggestions if sSugg[0:1] == "=": - sSugg = getattr(gc_functions, sSugg[1:])(self.lToken, nTokenOffset, nLastToken) + sSugg = getattr(gc_functions, sSugg[1:])(self.lTokens, nTokenOffset, nLastToken) lSugg = sSugg.split("|") if sSugg else [] elif sSugg == "_": lSugg = [] else: lSugg = self._expand(sSugg, nTokenOffset, nLastToken).split("|") - if bCaseSvty and lSugg and self.lToken[iFirstToken]["sValue"][0:1].isupper(): + if bCaseSvty and lSugg and self.lTokens[iFirstToken]["sValue"][0:1].isupper(): lSugg = list(map(lambda s: s.upper(), lSugg)) if self.sSentence[nStart:nEnd].isupper() else list(map(lambda s: s[0:1].upper()+s[1:], lSugg)) # Message - sMessage = getattr(gc_functions, sMsg[1:])(self.lToken, nTokenOffset, nLastToken) if sMsg[0:1] == "=" else self._expand(sMsg, nTokenOffset, nLastToken) + sMessage = getattr(gc_functions, sMsg[1:])(self.lTokens, nTokenOffset, nLastToken) if sMsg[0:1] == "=" else self._expand(sMsg, nTokenOffset, nLastToken) if bShowRuleId: sMessage += " #" + sLineId + " / " + sRuleId # if _bWriterError: return self._createErrorForWriter(nStart, nEnd - nStart, sRuleId, sOption, sMessage, lSugg, sURL) @@ -752,13 +760,13 @@ return dErr def _expand (self, sText, nTokenOffset, nLastToken): for m in re.finditer(r"\\(-?[0-9]+)", sText): if m.group(1)[0:1] == "-": - sText = sText.replace(m.group(0), self.lToken[nLastToken+int(m.group(1))+1]["sValue"]) + sText = sText.replace(m.group(0), self.lTokens[nLastToken+int(m.group(1))+1]["sValue"]) else: - sText = sText.replace(m.group(0), self.lToken[nTokenOffset+int(m.group(1))]["sValue"]) + sText = sText.replace(m.group(0), self.lTokens[nTokenOffset+int(m.group(1))]["sValue"]) return sText def rewriteText (self, sText, sRepl, iGroup, m, bUppercase): "text processor: write in at position" nLen = m.end(iGroup) - m.start(iGroup) @@ -781,80 +789,86 @@ def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, nTokenOffset, nLastToken, bCaseSvty, bDebug): "text processor: rewrite tokens between and position" if sWhat == "*": # purge text if nTokenRewriteEnd - nTokenRewriteStart == 0: - self.lToken[nTokenRewriteStart]["bToRemove"] = True + self.lTokens[nTokenRewriteStart]["bToRemove"] = True else: for i in range(nTokenRewriteStart, nTokenRewriteEnd+1): - self.lToken[i]["bToRemove"] = True + self.lTokens[i]["bToRemove"] = True elif sWhat == "␣": # merge tokens - self.lToken[nTokenRewriteStart]["nMergeUntil"] = nTokenRewriteEnd + self.lTokens[nTokenRewriteStart]["nMergeUntil"] = nTokenRewriteEnd elif sWhat == "_": # neutralized token if nTokenRewriteEnd - nTokenRewriteStart == 0: - self.lToken[nTokenRewriteStart]["sNewValue"] = "_" + self.lTokens[nTokenRewriteStart]["sNewValue"] = "_" else: for i in range(nTokenRewriteStart, nTokenRewriteEnd+1): - self.lToken[i]["sNewValue"] = "_" + self.lTokens[i]["sNewValue"] = "_" else: if sWhat.startswith("="): - sWhat = getattr(gc_functions, sWhat[1:])(self.lToken, nTokenOffset, nLastToken) + sWhat = getattr(gc_functions, sWhat[1:])(self.lTokens, nTokenOffset, nLastToken) else: sWhat = self._expand(sWhat, nTokenOffset, nLastToken) - bUppercase = bCaseSvty and self.lToken[nTokenRewriteStart]["sValue"][0:1].isupper() + bUppercase = bCaseSvty and self.lTokens[nTokenRewriteStart]["sValue"][0:1].isupper() if nTokenRewriteEnd - nTokenRewriteStart == 0: # one token if bUppercase: sWhat = sWhat[0:1].upper() + sWhat[1:] - self.lToken[nTokenRewriteStart]["sNewValue"] = sWhat + self.lTokens[nTokenRewriteStart]["sNewValue"] = sWhat else: # several tokens lTokenValue = sWhat.split("|") if len(lTokenValue) != (nTokenRewriteEnd - nTokenRewriteStart + 1): if (bDebug): echo("Error. Text processor: number of replacements != number of tokens.") return for i, sValue in zip(range(nTokenRewriteStart, nTokenRewriteEnd+1), lTokenValue): if not sValue or sValue == "*": - self.lToken[i]["bToRemove"] = True + self.lTokens[i]["bToRemove"] = True else: if bUppercase: sValue = sValue[0:1].upper() + sValue[1:] - self.lToken[i]["sNewValue"] = sValue + self.lTokens[i]["sNewValue"] = sValue def rewriteFromTags (self, bDebug=False): "rewrite the sentence, modify tokens, purge the token list" if bDebug: echo("REWRITE") - lNewToken = [] + lNewTokens = [] + lNewTokens0 = [] nMergeUntil = 0 dTokenMerger = {} - for iToken, dToken in enumerate(self.lToken): + for iToken, dToken in enumerate(self.lTokens): bKeepToken = True + bKeepToken0 = True if dToken["sType"] != "INFO": if nMergeUntil and iToken <= nMergeUntil: + # token to merge dTokenMerger["sValue"] += " " * (dToken["nStart"] - dTokenMerger["nEnd"]) + dToken["sValue"] dTokenMerger["nEnd"] = dToken["nEnd"] if bDebug: echo(" MERGED TOKEN: " + dTokenMerger["sValue"]) bKeepToken = False + bKeepToken0 = False if "nMergeUntil" in dToken: - if iToken > nMergeUntil: # this token is not already merged with a previous token + # first token to be merge with + if iToken > nMergeUntil: # this token is not to be merged with a previous token dTokenMerger = dToken if dToken["nMergeUntil"] > nMergeUntil: nMergeUntil = dToken["nMergeUntil"] del dToken["nMergeUntil"] elif "bToRemove" in dToken: + # deletion required if bDebug: echo(" REMOVED: " + dToken["sValue"]) self.sSentence = self.sSentence[:dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[dToken["nEnd"]:] bKeepToken = False # if bKeepToken: - lNewToken.append(dToken) + lNewTokens.append(dToken) if "sNewValue" in dToken: # rewrite token and sentence if bDebug: echo(dToken["sValue"] + " -> " + dToken["sNewValue"]) dToken["sRealValue"] = dToken["sValue"] @@ -867,9 +881,14 @@ try: del self.dTokenPos[dToken["nStart"]] except KeyError: echo(self) echo(dToken) + if self.lTokens0 is not None and bKeepToken0: + lNewTokens0.append(dToken) if bDebug: echo(" TEXT REWRITED: " + self.sSentence) - self.lToken.clear() - self.lToken = lNewToken + self.lTokens.clear() + self.lTokens = lNewTokens + if self.lTokens0 is not None: + self.lTokens0.clear() + self.lTokens0 = lNewTokens0 Index: gc_lang/fr/webext/content_scripts/init.js ================================================================== --- gc_lang/fr/webext/content_scripts/init.js +++ gc_lang/fr/webext/content_scripts/init.js @@ -342,16 +342,16 @@ parseAndSpellcheck1: function (sText, sDestination, sParagraphId) { this.send("parseAndSpellcheck1", { sText: sText, sCountry: "FR", bDebug: false, bContext: false }, { sDestination: sDestination, sParagraphId: sParagraphId }); }, - getListOfTokens: function (sText) { - this.send("getListOfTokens", { sText: sText }, {}); + parseFull: function (sText, sDestination, sParagraphId) { + this.send("parseFull", { sText: sText, sCountry: "FR", bDebug: false, bContext: false }, { sDestination: sDestination }); }, - parseFull: function (sText) { - this.send("parseFull", { sText: sText, sCountry: "FR", bDebug: false, bContext: false }, {}); + getListOfTokens: function (sText, sDestination) { + this.send("getListOfTokens", { sText: sText }, { sDestination: sDestination }); }, getVerb: function (sVerb, bStart=true, bPro=false, bNeg=false, bTpsCo=false, bInt=false, bFem=false) { this.send("getVerb", { sVerb: sVerb, bPro: bPro, bNeg: bNeg, bTpsCo: bTpsCo, bInt: bInt, bFem: bFem }, { bStart: bStart }); }, @@ -420,18 +420,22 @@ if (oInfo.sDestination == "__GrammalectePanel__") { oGrammalecte.oGCPanel.refreshParagraph(oInfo.sParagraphId, result); } break; case "parseFull": - // TODO + if (oInfo.sDestination == "__GrammalectePanel__") { + oGrammalecte.oGCPanel.showParagraphAnalysis(result); + } break; case "getListOfTokens": - if (!bEnd) { - oGrammalecte.oGCPanel.addListOfTokens(result); - } else { - oGrammalecte.oGCPanel.stopWaitIcon(); - oGrammalecte.oGCPanel.endTimer(); + if (oInfo.sDestination == "__GrammalectePanel__") { + if (!bEnd) { + oGrammalecte.oGCPanel.addListOfTokens(result); + } else { + oGrammalecte.oGCPanel.stopWaitIcon(); + oGrammalecte.oGCPanel.endTimer(); + } } break; case "getSpellSuggestions": if (oInfo.sDestination == "__GrammalectePanel__") { oGrammalecte.oGCPanel.oTooltip.setSpellSuggestionsFor(result.sWord, result.aSugg, result.iSuggBlock, oInfo.sErrorId); Index: gc_lang/fr/webext/content_scripts/panel_gc.css ================================================================== --- gc_lang/fr/webext/content_scripts/panel_gc.css +++ gc_lang/fr/webext/content_scripts/panel_gc.css @@ -9,25 +9,23 @@ overflow: auto; } div.grammalecte_paragraph_block { margin: 5px 5px 0 5px; + background-color: hsl(0, 0%, 96%); + border-radius: 2px; } - p.grammalecte_paragraph { margin: 0; padding: 12px; - background-color: hsl(0, 0%, 96%); - border-radius: 2px; line-height: 1.3; text-align: left; font-size: 14px; font-family: "Courier New", Courier, "Lucida Sans Typewriter", "Lucida Typewriter", monospace; color: hsl(0, 0%, 0%); hyphens: none; } - /* Action buttons */ div.grammalecte_paragraph_actions { @@ -45,10 +43,17 @@ font-size: 14px; color: hsl(0, 0%, 96%); border-radius: 2px; } +div.grammalecte_paragraph_actions .grammalecte_blue { + color: hsl(0, 0%, 80%); +} +div.grammalecte_paragraph_actions .grammalecte_blue:hover { + background-color: hsl(210, 50%, 40%); + color: hsl(0, 0%, 100%); +} div.grammalecte_paragraph_actions .grammalecte_green { color: hsl(0, 0%, 80%); } div.grammalecte_paragraph_actions .grammalecte_green:hover { background-color: hsl(120, 50%, 40%); Index: gc_lang/fr/webext/content_scripts/panel_gc.js ================================================================== --- gc_lang/fr/webext/content_scripts/panel_gc.js +++ gc_lang/fr/webext/content_scripts/panel_gc.js @@ -3,19 +3,22 @@ /* jshint esversion:6, -W097 */ /* jslint esversion:6 */ /* global GrammalectePanel, oGrammalecte, oGrammalecteBackgroundPort, showError, window, document, console */ "use strict"; + function onGrammalecteGCPanelClick (xEvent) { try { let xElem = xEvent.target; if (xElem.id) { if (xElem.id.startsWith("grammalecte_sugg")) { oGrammalecte.oGCPanel.applySuggestion(xElem.id); } else if (xElem.id === "grammalecte_tooltip_ignore") { oGrammalecte.oGCPanel.ignoreError(xElem.id); + } else if (xElem.id.startsWith("grammalecte_analysis")) { + oGrammalecte.oGCPanel.sendParagraphToGrammaticalAnalysis(parseInt(xElem.dataset.para_num, 10)); } else if (xElem.id.startsWith("grammalecte_check")) { oGrammalecte.oGCPanel.recheckParagraph(parseInt(xElem.dataset.para_num, 10)); } else if (xElem.id.startsWith("grammalecte_hide")) { xElem.parentNode.parentNode.style.display = "none"; } else if (xElem.id.startsWith("grammalecte_err") @@ -64,10 +67,22 @@ this.iLastEditedParagraph = -1; this.nParagraph = 0; // Lexicographer this.nLxgCount = 0; this.xLxgPanelContent = oGrammalecte.createNode("div", {id: "grammalecte_lxg_panel_content"}); + this.xLxgInputBlock = oGrammalecte.createNode("div", {id: "grammalecte_lxg_input_block"}); + this.xLxgInput = oGrammalecte.createNode("div", {id: "grammalecte_lxg_input", lang: "fr", contentEditable: "true"}); + this.xLxgInputButton = oGrammalecte.createNode("div", {id: "grammalecte_lxg_input_button", textContent: "Analyse grammaticale"}); + this.xLxgInputButton.addEventListener("click", () => { this.grammaticalAnalysis(); }, false); + this.xLxgInputButton2 = oGrammalecte.createNode("div", {id: "grammalecte_lxg_input_button", textContent: "Analyse lexicale"}); + this.xLxgInputButton2.addEventListener("click", () => { this.getListOfTokens(); }, false); + this.xLxgInputBlock.appendChild(this.xLxgInput); + this.xLxgInputBlock.appendChild(this.xLxgInputButton); + this.xLxgInputBlock.appendChild(this.xLxgInputButton2); + this.xLxgPanelContent.appendChild(this.xLxgInputBlock); + this.xLxgResultZone = oGrammalecte.createNode("div", {id: "grammalecte_lxg_result_zone"}); + this.xLxgPanelContent.appendChild(this.xLxgResultZone); this.xPanelContent.appendChild(this.xLxgPanelContent); // Conjugueur this.xConjPanelContent = oGrammalecte.createNode("div", {id: "grammalecte_conj_panel_content"}); this.xConjPanelContent.innerHTML = sGrammalecteConjugueurHTML; // @Reviewers: sGrammalecteConjugueurHTML is a const value defined in this.xPanelContent.appendChild(this.xConjPanelContent); @@ -112,14 +127,10 @@ this.setAutoRefreshButton(); } this.xLxgButton.onclick = () => { if (!this.bWorking) { this.showLexicographer(); - this.clearLexicographer(); - this.startWaitIcon(); - oGrammalecteBackgroundPort.getListOfTokens(this.oTextControl.getText()); - //oGrammalecteBackgroundPort.parseFull(this.oTextControl.getText()) } }; this.xConjButton.onclick = () => { if (!this.bWorking) { this.showConjugueur(); @@ -238,14 +249,14 @@ addParagraphResult (oResult) { try { this.resetTimer(); if (oResult && (oResult.sParagraph.trim() !== "" || oResult.aGrammErr.length > 0 || oResult.aSpellErr.length > 0)) { - let xNodeDiv = oGrammalecte.createNode("div", {className: "grammalecte_paragraph_block"}); // actions let xActionsBar = oGrammalecte.createNode("div", {className: "grammalecte_paragraph_actions"}); xActionsBar.appendChild(oGrammalecte.createNode("div", {id: "grammalecte_check" + oResult.iParaNum, className: "grammalecte_paragraph_button grammalecte_green", textContent: "↻", title: "Réanalyser…"}, {para_num: oResult.iParaNum})); + xActionsBar.appendChild(oGrammalecte.createNode("div", {id: "grammalecte_analysis" + oResult.iParaNum, className: "grammalecte_paragraph_button grammalecte_blue", textContent: "»", title: "Analyse grammaticale…"}, {para_num: oResult.iParaNum})); xActionsBar.appendChild(oGrammalecte.createNode("div", {id: "grammalecte_hide" + oResult.iParaNum, className: "grammalecte_paragraph_button grammalecte_red", textContent: "×", title: "Cacher", style: "font-weight: bold;"})); // paragraph let xParagraph = oGrammalecte.createNode("p", {id: "grammalecte_paragraph"+oResult.iParaNum, className: "grammalecte_paragraph", lang: "fr", contentEditable: "true"}, {para_num: oResult.iParaNum}); xParagraph.setAttribute("spellcheck", "false"); // doesn’t seem possible to use “spellcheck” as a common attribute. xParagraph.dataset.timer_id = "0"; @@ -260,13 +271,14 @@ this.oTextControl.setParagraph(parseInt(xEvent.target.dataset.para_num, 10), xEvent.target.textContent); }.bind(this) , true); this._tagParagraph(xParagraph, oResult.sParagraph, oResult.iParaNum, oResult.aGrammErr, oResult.aSpellErr); // creation - xNodeDiv.appendChild(xActionsBar); - xNodeDiv.appendChild(xParagraph); - this.xParagraphList.appendChild(xNodeDiv); + let xParagraphBlock = oGrammalecte.createNode("div", {className: "grammalecte_paragraph_block"}); + xParagraphBlock.appendChild(xActionsBar); + xParagraphBlock.appendChild(xParagraph); + this.xParagraphList.appendChild(xParagraphBlock); this.nParagraph += 1; } } catch (e) { showError(e); @@ -530,36 +542,130 @@ // Lexicographer clearLexicographer () { this.nLxgCount = 0; - while (this.xLxgPanelContent.firstChild) { - this.xLxgPanelContent.removeChild(this.xLxgPanelContent.firstChild); - } - } - - addLxgSeparator (sText) { - if (this.xLxgPanelContent.textContent !== "") { - this.xLxgPanelContent.appendChild(oGrammalecte.createNode("div", {className: "grammalecte_lxg_separator", textContent: sText})); - } - } - - addMessageToLxgPanel (sMessage) { - let xNode = oGrammalecte.createNode("div", {className: "grammalecte_panel_flow_message", textContent: sMessage}); - this.xLxgPanelContent.appendChild(xNode); - } - - addListOfTokens (lToken) { + while (this.xLxgResultZone.firstChild) { + this.xLxgResultZone.removeChild(this.xLxgResultZone.firstChild); + } + } + + // Grammatical analysis + + sendParagraphToGrammaticalAnalysis (iParaNum) { + let xParagraph = this.xParent.getElementById("grammalecte_paragraph" + iParaNum); + this.xLxgInput.textContent = xParagraph.textContent; + this.grammaticalAnalysis(); + this.showLexicographer(); + } + + grammaticalAnalysis (iParaNum) { + if (!this.bOpened || this.bWorking) { + return; + } + this.startWaitIcon(); + this.clearLexicographer(); + let sText = this.xLxgInput.innerText.replace(/\n/g, " "); + console.log(sText); + oGrammalecteBackgroundPort.parseFull(sText, "__GrammalectePanel__"); + } + + showParagraphAnalysis (oResult) { + if (!this.bOpened || oResult === null) { + return; + } + try { + for (let oSentence of oResult.lSentences) { + this.nLxgCount += 1; + if (oSentence.sSentence.trim() !== "") { + let xSentenceBlock = oGrammalecte.createNode("div", {className: "grammalecte_lxg_paragraph_sentence_block"}); + xSentenceBlock.appendChild(oGrammalecte.createNode("div", {className: "grammalecte_lxg_list_num", textContent: this.nLxgCount})); + xSentenceBlock.appendChild(oGrammalecte.createNode("p", {className: "grammalecte_lxg_paragraph_sentence", textContent: oSentence.sSentence})); + let xTokenList = oGrammalecte.createNode("div", {className: "grammalecte_lxg_list_of_tokens"}); + for (let oToken of oSentence.lTokens) { + if (oToken["sType"] != "INFO") { + xTokenList.appendChild(this._createTokenBlock2(oToken)); + } + } + xSentenceBlock.appendChild(xTokenList); + this.xLxgResultZone.appendChild(xSentenceBlock); + } + } + } + catch (e) { + showError(e); + } + this.stopWaitIcon(); + } + + _createTokenBlock2 (oToken) { + let xTokenBlock = oGrammalecte.createNode("div", {className: "grammalecte_lxg_token_block"}); + // token description + xTokenBlock.appendChild(this._createTokenDescr2(oToken)); + // subtoken description + if (oToken.aSubElem) { + let xSubBlock = oGrammalecte.createNode("div", {className: "grammalecte_lxg_token_subblock"}); + for (let oSubElem of oToken.aSubElem) { + xSubBlock.appendChild(this._createTokenDescr2(oSubElem)); + } + xTokenBlock.appendChild(xSubBlock); + } + return xTokenBlock; + } + + _createTokenDescr2 (oToken) { + try { + let xTokenDescr = oGrammalecte.createNode("div", {className: "grammalecte_lxg_token_descr"}); + if (oToken.sType == "LOCP") { + xTokenDescr.appendChild(oGrammalecte.createNode("div", {className: "grammalecte_lxg_token_also", textContent: "possiblement › "})); + } + xTokenDescr.appendChild(oGrammalecte.createNode("div", {className: "grammalecte_lxg_token grammalecte_lxg_token_" + oToken.sType, textContent: oToken.sValue})); + xTokenDescr.appendChild(oGrammalecte.createNode("div", {className: "grammalecte_lxg_token_colon", textContent: ":"})); + if (oToken.lMorph.length < 2) { + // one morphology only + xTokenDescr.appendChild(oGrammalecte.createNode("div", {className: "grammalecte_lxg_morph_elem_inline", textContent: oToken.lMorph[0]})); + } else { + // several morphology + let xMorphList = oGrammalecte.createNode("div", {className: "grammalecte_lxg_morph_list"}); + for (let sLabel of oToken.lMorph) { + xMorphList.appendChild(oGrammalecte.createNode("div", {className: "grammalecte_lxg_morph_elem", textContent: "• " + sLabel})); + } + xTokenDescr.appendChild(xMorphList); + } + return xTokenDescr; + } + catch (e) { + showError(e); + } + } + + // Lexical analysis + + getListOfTokens () { + if (!this.bOpened || this.bWorking) { + return; + } + this.startWaitIcon(); + this.clearLexicographer(); + let sText = this.xLxgInput.innerText; // to get carriage return (\n) + console.log(sText); + oGrammalecteBackgroundPort.getListOfTokens(sText, "__GrammalectePanel__"); + } + + addListOfTokens (oResult) { try { - if (lToken) { + if (oResult && oResult.sParagraph != "") { this.nLxgCount += 1; + let xSentenceBlock = oGrammalecte.createNode("div", {className: "grammalecte_lxg_paragraph_sentence_block"}); + xSentenceBlock.appendChild(oGrammalecte.createNode("div", {className: "grammalecte_lxg_list_num", textContent: this.nLxgCount})); + xSentenceBlock.appendChild(oGrammalecte.createNode("p", {className: "grammalecte_lxg_paragraph_sentence", textContent: oResult.sParagraph})); let xTokenList = oGrammalecte.createNode("div", {className: "grammalecte_lxg_list_of_tokens"}); - xTokenList.appendChild(oGrammalecte.createNode("div", {className: "grammalecte_lxg_list_num", textContent: this.nLxgCount})); - for (let oToken of lToken) { + for (let oToken of oResult.lTokens) { xTokenList.appendChild(this._createTokenBlock(oToken)); } - this.xLxgPanelContent.appendChild(xTokenList); + xSentenceBlock.appendChild(xTokenList); + this.xLxgResultZone.appendChild(xSentenceBlock); } } catch (e) { showError(e); } @@ -600,16 +706,10 @@ catch (e) { showError(e); } } - setHidden (sClass, bHidden) { - let xPanelContent = this.xParent.getElementById('grammalecte_panel_content'); - for (let xNode of xPanelContent.getElementsByClassName(sClass)) { - xNode.hidden = bHidden; - } - } // Conjugueur listenConj () { if (!this.bListenConj) { Index: gc_lang/fr/webext/content_scripts/panel_lxg.css ================================================================== --- gc_lang/fr/webext/content_scripts/panel_lxg.css +++ gc_lang/fr/webext/content_scripts/panel_lxg.css @@ -6,21 +6,66 @@ position: absolute; height: 100%; width: 100%; font-size: 13px; } + +div#grammalecte_lxg_input_block { + padding: 10px; + /*background-color: hsl(210, 50%, 95%);*/ + /*border-bottom: solid 1px hsl(210, 0%, 90%);*/ + text-align: right; +} + +div#grammalecte_lxg_result_zone { + +} + +div#grammalecte_lxg_input { + min-height: 100px; + padding: 10px; + background-color: hsl(210, 0%, 100%); + border: solid 1px hsl(210, 20%, 80%); + border-radius: 3px; + font-family: "Courier New", Courier, "Lucida Sans Typewriter", "Lucida Typewriter", monospace; + text-align: left; +} + +div#grammalecte_lxg_input_button { + display: inline-block; + margin: 0 10px 0 0; + padding: 3px 10px; + background-color: hsl(210, 50%, 50%); + color: hsl(210, 50%, 98%); + text-align: center; + cursor: pointer; + border-radius: 0 0 3px 3px; +} + +div.grammalecte_lxg_paragraph_sentence_block { + margin: 5px 0px 20px 0px; + background-color: hsl(210, 50%, 95%); + border-radius: 3px; + border-top: solid 1px hsl(210, 50%, 90%); + border-bottom: solid 1px hsl(210, 50%, 90%); + hyphens: none; +} +p.grammalecte_lxg_paragraph_sentence { + padding: 3px 10px; + font-weight: bold; + color: hsl(210, 50%, 40%); +} div.grammalecte_lxg_list_of_tokens { - margin: 10px 5px 0 5px; padding: 10px; - background-color: hsla(0, 0%, 95%, 1); + background-color: hsl(210, 50%, 99%); border-radius: 5px; } div.grammalecte_lxg_list_num { float: right; - margin: -12px 0 5px 10px; + margin: -2px 5px 5px 10px; padding: 5px 10px; font-family: "Trebuchet MS", "Fira Sans", "Ubuntu Condensed", "Liberation Sans", sans-serif; font-size: 14px; font-weight: bold; border-radius: 0 0 4px 4px; Index: gc_lang/fr/webext/gce_worker.js ================================================================== --- gc_lang/fr/webext/gce_worker.js +++ gc_lang/fr/webext/gce_worker.js @@ -229,29 +229,24 @@ let aGrammErr = gc_engine.parse(sParagraph, sCountry, bDebug, null, bContext); let aSpellErr = oSpellChecker.parseParagraph(sParagraph); postMessage(createResponse("parseAndSpellcheck1", {sParagraph: sParagraph, aGrammErr: aGrammErr, aSpellErr: aSpellErr}, oInfo, true)); } -function parseFull (sText, sCountry, bDebug, bContext, oInfo={}) { - let i = 0; - sText = sText.replace(/­/g, "").normalize("NFC"); - for (let sParagraph of text.getParagraph(sText)) { - let lSentence = gc_engine.parse(sParagraph, sCountry, bDebug, null, bContext, true); - console.log("*", lSentence); - postMessage(createResponse("parseFull", {sParagraph: sParagraph, iParaNum: i, lSentence: lSentence}, oInfo, false)); - i += 1; - } - postMessage(createResponse("parseFull", null, oInfo, true)); +function parseFull (sParagraph, sCountry, bDebug, bContext, oInfo={}) { + sParagraph = sParagraph.replace(/­/g, "").normalize("NFC"); + let [lParagraphErrors, lSentences] = gc_engine.parse(sParagraph, sCountry, bDebug, null, bContext, true); + //console.log(lSentences); + postMessage(createResponse("parseFull", { lParagraphErrors: lParagraphErrors, lSentences: lSentences }, oInfo, true)); } function getListOfTokens (sText, oInfo={}) { // lexicographer try { sText = sText.replace(/­/g, "").normalize("NFC"); for (let sParagraph of text.getParagraph(sText)) { if (sParagraph.trim() !== "") { - postMessage(createResponse("getListOfTokens", lexgraph_fr.getListOfTokensReduc(sParagraph, true), oInfo, false)); + postMessage(createResponse("getListOfTokens", { sParagraph: sParagraph, lTokens: lexgraph_fr.getListOfTokensReduc(sParagraph, true) }, oInfo, false)); } } postMessage(createResponse("getListOfTokens", null, oInfo, true)); } catch (e) { Index: grammalecte-cli.py ================================================================== --- grammalecte-cli.py +++ grammalecte-cli.py @@ -340,11 +340,11 @@ lParagraphErrors, lSentences = oGrammarChecker.gce.parse(sParagraph, bDebug=xArgs.debug, bFullInfo=True) echo(txt.getReadableErrors(lParagraphErrors, xArgs.width)) for dSentence in lSentences: echo("{nStart}:{nEnd}".format(**dSentence)) echo(" <" + dSentence["sSentence"]+">") - for dToken in dSentence["lToken"]: + for dToken in dSentence["lTokens"]: echo(" {0[nStart]:>3}:{0[nEnd]:<3} {1} {0[sType]:<14} {2} {0[sValue]:<16} {3:<10} {4}".format(dToken, \ "×" if dToken.get("bToRemove", False) else " ", "!" if dToken["sType"] == "WORD" and not dToken.get("bValidToken", False) else " ", " ".join(dToken.get("lMorph", "")), \ "·".join(dToken.get("aTags", "")) ) ) Index: graphspell-js/lexgraph_fr.js ================================================================== --- graphspell-js/lexgraph_fr.js +++ graphspell-js/lexgraph_fr.js @@ -221,16 +221,16 @@ [':Ĉ', [" conjonction (él.),", "Conjonction (élément)"]], [':Cc', [" conjonction de coordination,", "Conjonction de coordination"]], [':Cs', [" conjonction de subordination,", "Conjonction de subordination"]], [':Ĉs', [" conjonction de subordination (él.),", "Conjonction de subordination (élément)"]], - [':Ñ', [" locution nominale (él.),", "Locution nominale (élément)"]], - [':Â', [" locution adjectivale (él.),", "Locution adjectivale (élément)"]], - [':Ṽ', [" locution verbale (él.),", "Locution verbale (élément)"]], - [':Ŵ', [" locution adverbiale (él.),", "Locution adverbiale (élément)"]], - [':Ŕ', [" locution prépositive (él.),", "Locution prépositive (élément)"]], - [':Ĵ', [" locution interjective (él.),", "Locution interjective (élément)"]], + [':ÉN', [" locution nominale (él.),", "Locution nominale (élément)"]], + [':ÉA', [" locution adjectivale (él.),", "Locution adjectivale (élément)"]], + [':ÉV', [" locution verbale (él.),", "Locution verbale (élément)"]], + [':ÉW', [" locution adverbiale (él.),", "Locution adverbiale (élément)"]], + [':ÉR', [" locution prépositive (él.),", "Locution prépositive (élément)"]], + [':ÉJ', [" locution interjective (él.),", "Locution interjective (élément)"]], [':Zp', [" préfixe,", "Préfixe"]], [':Zs', [" suffixe,", "Suffixe"]], [':H', ["", ""]], Index: graphspell-js/spellchecker.js ================================================================== --- graphspell-js/spellchecker.js +++ graphspell-js/spellchecker.js @@ -132,15 +132,47 @@ loadLexicographer (sLangCode) { // load default suggestion module for if (typeof(process) !== 'undefined') { this.lexicographer = require(`./lexgraph_${sLangCode}.js`); } - else if (typeof(require) !== 'undefined') { - this.lexicographer = require(`resource://grammalecte/graphspell/lexgraph_${sLangCode}.js`); + else if (self && self.hasOwnProperty("lexgraph_"+sLangCode)) { // self is the Worker + this.lexicographer = self["lexgraph_"+sLangCode]; + } + } + + analyze (sWord) { + // returns a list of words and their morphologies + if (!this.lexicographer) { + return []; + } + let lWordAndMorph = []; + for (let sElem of this.lexicographer.split(sWord)) { + if (sElem) { + let lMorph = this.getMorph(sElem); + let sLex = this.lexicographer.analyze(sElem) + let aRes = []; + if (sLex) { + aRes = [ [lMorph.join(" | "), sLex] ]; + } else { + for (let sMorph of lMorph) { + aRes.push([sMorph, this.lexicographer.formatTags(sMorph)]); + } + } + if (aRes.length > 0) { + lWordAndMorph.push([sElem, aRes]); + } + } } + return lWordAndMorph; } + readableMorph (sMorph) { + if (!this.lexicographer) { + return []; + } + return this.lexicographer.formatTags(sMorph); + } // Storage activateStorage () { this.bStorage = true; Index: graphspell/spellchecker.py ================================================================== --- graphspell/spellchecker.py +++ graphspell/spellchecker.py @@ -98,11 +98,11 @@ def deactivatePersonalDictionary (self): "deactivate personal dictionary" self.bPersonalDic = False - # Default suggestions + # Lexicographer def loadLexicographer (self, sLangCode): "load default suggestion module for " try: self.lexicographer = importlib.import_module(".lexgraph_"+sLangCode, "grammalecte.graphspell") @@ -125,10 +125,15 @@ aRes = [ (sMorph, self.lexicographer.formatTags(sMorph)) for sMorph in lMorph ] if aRes: lWordAndMorph.append((sElem, aRes)) return lWordAndMorph + def readableMorph (self, sMorph): + if not self.lexicographer: + return [] + return self.lexicographer.formatTags(sMorph) + # Storage def activateStorage (self): "store all lemmas and morphologies retrieved from the word graph" @@ -233,11 +238,11 @@ return self._dLemmas[sWord] return { s[1:s.find("/")] for s in self.getMorph(sWord) } def suggest (self, sWord, nSuggLimit=10): "generator: returns 1, 2 or 3 lists of suggestions" - if self.lexicographer.dSugg: + if self.lexicographer: if sWord in self.lexicographer.dSugg: yield self.lexicographer.dSugg[sWord].split("|") elif sWord.istitle() and sWord.lower() in self.lexicographer.dSugg: lRes = self.lexicographer.dSugg[sWord.lower()].split("|") yield list(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes))