Index: compile_rules_graph.py ================================================================== --- compile_rules_graph.py +++ compile_rules_graph.py @@ -36,12 +36,12 @@ if sCode[0:1] == "=": sCode = sCode[1:] sCode = sCode.replace("__also__", "bCondMemo") sCode = sCode.replace("__else__", "not bCondMemo") sCode = sCode.replace("sContext", "_sAppContext") - sCode = re.sub(r"\b(morph0?|morphVC|value|tag|meta|info)[(]\\(\d+)", 'g_\\1(lToken[nTokenOffset+\\2]', sCode) - sCode = re.sub(r"\b(morph0?|morphVC|value|tag|meta|info)[(]\\-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1]', sCode) + sCode = re.sub(r"\b(morph[0x]?|morphVC|value|tag|meta|info)[(]\\(\d+)", 'g_\\1(lToken[nTokenOffset+\\2]', sCode) + sCode = re.sub(r"\b(morph[0x]?|morphVC|value|tag|meta|info)[(]\\-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1]', sCode) sCode = re.sub(r"\b(select|define|definefrom|rewrite|addmorph|setmeta)[(][\\](\d+)", 'g_\\1(lToken[nTokenOffset+\\2]', sCode) sCode = re.sub(r"\b(select|define|definefrom|rewrite|addmorph|setmeta)[(][\\]-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1]', sCode) sCode = re.sub(r"\b(agreement|suggAgree)[(][\\](\d+), *[\\](\d+)", 'g_\\1(lToken[nTokenOffset+\\2], lToken[nTokenOffset+\\3]', sCode) sCode = re.sub(r"\b(agreement|suggAgree)[(][\\](\d+), *[\\]-(\d+)", 'g_\\1(lToken[nTokenOffset+\\2], lToken[nLastToken-\\3+1]', sCode) sCode = re.sub(r"\b(agreement|suggAgree)[(][\\]-(\d+), *[\\](\d+)", 'g_\\1(lToken[nLastToken-\\2+1], lToken[nTokenOffset+\\3]', sCode) @@ -285,11 +285,11 @@ sAction = sAction[m.end():].strip() if nPriority == -1: nPriority = self.dOptPriority.get(sOption, 4) # valid action? - m = re.search(r"(?P[-=~/!>])(?P-?\d+\.?|)(?P:\.?-?\d+|)(?P:|)>>", sAction) + m = re.search(r"(?P[-=~/!>&])(?P-?\d+\.?|)(?P:\.?-?\d+|)(?P:|)>>", sAction) if not m: print("\n# Error. No action found at: ", sLineId, sActionId) exit() # Condition @@ -373,11 +373,11 @@ if cAction == ">": ## no action, break loop if condition is False return [sLineId, sOption, sCondition, cAction, ""] - if not sAction and cAction != "!": + if not sAction and cAction not in "!#": print(f"\n# Error in action at line <{sLineId}/{sActionId}>: This action is empty.") exit() if sAction[0:1] != "=" and cAction != "=": checkIfThereIsCode(sAction, sActionId) @@ -404,11 +404,11 @@ if (iEndAction - iStartAction + 1) != nToken: print(f"\n# Error in action at line <{sLineId}/{sActionId}>: numbers of modified tokens modified.") elif iStartAction < 0 or iEndAction < 0 and iStartAction != iEndAction: print(f"\n# Warning in action at line <{sLineId}/{sActionId}>: rewriting with possible token position modified.") return [sLineId, sOption, sCondition, cAction, sAction, iStartAction, iEndAction, bCaseSensitivity] - if cAction in "!/": + if cAction in "!/&": ## tags return [sLineId, sOption, sCondition, cAction, sAction, iStartAction, iEndAction] if cAction == "=": ## disambiguator sAction = self.createFunction("da", sAction) @@ -543,21 +543,21 @@ iPrevLine, sPrevLine = lTokenLine[-1] lTokenLine[-1] = [iPrevLine, sPrevLine + " " + sLine.strip()[2:]] elif sLine.startswith(" <<- "): # actions lActions.append([iLine, sLine[12:].strip()]) - if not re.search(r"[-=~/!>](?:-?\d\.?(?::\.?-?\d+|)|):?>>", sLine): + if not re.search(r"[-=~/!>&](?:-?\d\.?(?::\.?-?\d+|)|):?>>", sLine): bActionBlock = True elif sLine.startswith(" && "): # action message iPrevLine, sPrevLine = lActions[-1] lActions[-1] = [iPrevLine, sPrevLine + sLine] elif sLine.startswith(" ") and bActionBlock: # action line continuation iPrevLine, sPrevLine = lActions[-1] lActions[-1] = [iPrevLine, sPrevLine + " " + sLine.strip()] - if re.search(r"[-=~/!>](?:-?\d\.?(?::\.?-?\d+|)|):?>>", sLine): + if re.search(r"[-=~/!>&](?:-?\d\.?(?::\.?-?\d+|)|):?>>", sLine): bActionBlock = False elif re.match("[  ]*$", sLine): # empty line to end merging if not lTokenLine: continue Index: darg.py ================================================================== --- darg.py +++ darg.py @@ -214,13 +214,14 @@ return self.__str__() == other.__str__() def getNodeAsDict (self): "returns the node as a dictionary structure" dNode = {} - dReValue = {} # regex for token values - dReMorph = {} # regex for morph - dMorph = {} # simple search in morph + dReValue = {} # regex for token values + dReMorph = {} # regex for morph + dMorph = {} # simple search in morph + dReMultiMorph = {} # regex for morph in multi-tokens dLemma = {} dPhonet = {} dMeta = {} dTag = {} dRule = {} @@ -227,10 +228,12 @@ for sArc, oNode in self.dArcs.items(): if sArc.startswith("@") and len(sArc) > 1: dReMorph[sArc[1:]] = oNode.__hash__() elif sArc.startswith("$") and len(sArc) > 1: dMorph[sArc[1:]] = oNode.__hash__() + elif sArc.startswith("&") and len(sArc) > 1: + dReMultiMorph[sArc[1:]] = oNode.__hash__() elif sArc.startswith("~") and len(sArc) > 1: dReValue[sArc[1:]] = oNode.__hash__() elif sArc.startswith(">") and len(sArc) > 1: dLemma[sArc[1:]] = oNode.__hash__() elif sArc.startswith("%") and len(sArc) > 1: @@ -245,10 +248,12 @@ dNode[sArc] = oNode.__hash__() if dReValue: dNode[""] = dReValue if dReMorph: dNode[""] = dReMorph + if dReMultiMorph: + dNode[""] = dReMultiMorph if dMorph: dNode[""] = dMorph if dLemma: dNode[""] = dLemma if dPhonet: Index: gc_core/js/lang_core/gc_engine.js ================================================================== --- gc_core/js/lang_core/gc_engine.js +++ gc_core/js/lang_core/gc_engine.js @@ -299,11 +299,11 @@ else if (!sOption || option(sOption)) { for (let [zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions, lGroups, lNegLookBefore] of lRuleGroup) { if (!gc_engine.aIgnoredRules.has(sRuleId)) { while ((m = zRegex.gl_exec2(sText, lGroups, lNegLookBefore)) !== null) { let bCondMemo = null; - for (let [sFuncCond, cActionType, sWhat, ...eAct] of lActions) { + for (let [sFuncCond, cActionType, sAction, ...eAct] of lActions) { // action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ] try { bCondMemo = (!sFuncCond || gc_functions[sFuncCond](sText, sText0, m, this.dTokenPos, sCountry, bCondMemo)); if (bCondMemo) { switch (cActionType) { @@ -310,28 +310,28 @@ case "-": // grammar error //console.log("-> error detected in " + sLineId + "\nzRegex: " + zRegex.source); let nErrorStart = nOffset + m.start[eAct[0]]; if (!this.dError.has(nErrorStart) || nPriority > this.dErrorPriority.get(nErrorStart)) { - this.dError.set(nErrorStart, this._createErrorFromRegex(sText, sText0, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bShowRuleId, sOption, bContext)); + this.dError.set(nErrorStart, this._createErrorFromRegex(sText, sText0, sAction, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bShowRuleId, sOption, bContext)); this.dErrorPriority.set(nErrorStart, nPriority); this.dSentenceError.set(nErrorStart, this.dError.get(nErrorStart)); } break; case "~": // text processor //console.log("-> text processor by " + sLineId + "\nzRegex: " + zRegex.source); - sText = this.rewriteText(sText, sWhat, eAct[0], m, bUppercase); + sText = this.rewriteText(sText, sAction, eAct[0], m, bUppercase); bChange = true; if (bDebug) { console.log("~ " + sText + " -- " + m[eAct[0]] + " # " + sLineId); } break; case "=": // disambiguation //console.log("-> disambiguation by " + sLineId + "\nzRegex: " + zRegex.source); - gc_functions[sWhat](sText, m, this.dTokenPos); + gc_functions[sAction](sText, m, this.dTokenPos); if (bDebug) { console.log("= " + m[0] + " # " + sLineId, "\nDA:", this.dTokenPos); } break; case ">": @@ -389,11 +389,11 @@ console.log("UPDATE:"); console.log(this.asString()); } } - * _getMatches (oGraph, oToken, oNode, bKeep=false) { + * _getNextNodes (oGraph, oToken, oNode, bKeep=false) { // generator: return matches where “values” match arcs try { let bTokenFound = false; // token value if (oNode.hasOwnProperty(oToken["sValue"])) { @@ -577,54 +577,72 @@ yield [null, "", -1]; } // JUMP // Warning! Recurssion! if (oNode.hasOwnProperty("<>")) { - yield* this._getMatches(oGraph, oToken, oGraph[oNode["<>"]], bKeep=true); + yield* this._getNextNodes(oGraph, oToken, oGraph[oNode["<>"]], bKeep=true); } } catch (e) { console.error(e); } } parseGraph (oGraph, sCountry="${country_default}", dOptions=null, bShowRuleId=false, bDebug=false, bContext=false) { // parse graph with tokens from the text and execute actions encountered - let lPointer = []; + let lPointers = []; let bTagAndRewrite = false; try { for (let [iToken, oToken] of this.lTokens.entries()) { if (bDebug) { console.log("TOKEN: " + oToken["sValue"]); } // check arcs for each existing pointer - let lNextPointer = []; - for (let oPointer of lPointer) { - for (let [cActionType, sMatch, iNode] of this._getMatches(oGraph, oToken, oGraph[oPointer["iNode"]])) { - if (cActionType === null) { - lNextPointer.push(oPointer); - continue; - } - if (bDebug) { - console.log(" MATCH: " + cActionType + sMatch); - } - lNextPointer.push({ "iToken1": oPointer["iToken1"], "iNode": iNode }); - } - } - lPointer = lNextPointer; - // check arcs of first nodes - for (let [cActionType, sMatch, iNode] of this._getMatches(oGraph, oToken, oGraph[0])) { - if (cActionType === null) { - continue; - } - if (bDebug) { - console.log(" MATCH: " + cActionType + sMatch); - } - lPointer.push({ "iToken1": iToken, "iNode": iNode }); - } - // check if there is rules to check for each pointer - for (let oPointer of lPointer) { + let lNextPointers = []; + for (let oPointer of lPointers) { + if (oPointer["nMultiEnd"] != -1) { + if (oToken["i"] <= oPointer["nMultiEnd"]) { + lNextPointers.push(oPointer); + } + if (oToken["i"] != oPointer["nMultiEnd"]) { + continue; + } + } + for (let [cNodeType, sMatch, iNode] of this._getNextNodes(oGraph, oToken, oGraph[oPointer["iNode"]])) { + if (cNodeType === null) { + lNextPointers.push(oPointer); + continue; + } + if (bDebug) { + console.log(" MATCH: " + cNodeType + sMatch); + } + let nMultiEnd = (cNodeType != "&") ? -1 : dToken["nMultiStartTo"]; + lNextPointers.push({ "iToken1": oPointer["iToken1"], "iNode": iNode, "nMultiEnd": nMultiEnd }); + } + } + lPointers = lNextPointers; + // check arcs of first nodes + for (let [cNodeType, sMatch, iNode] of this._getNextNodes(oGraph, oToken, oGraph[0])) { + if (cNodeType === null) { + continue; + } + if (bDebug) { + console.log(" MATCH: " + cNodeType + sMatch); + } + let nMultiEnd = (cNodeType != "&") ? -1 : dToken["nMultiStartTo"]; + lPointers.push({ "iToken1": iToken, "iNode": iNode, "nMultiEnd": nMultiEnd }); + } + // check if there is rules to check for each pointer + for (let oPointer of lPointers) { + if (oPointer["nMultiEnd"] != -1) { + if (oToken["i"] < oPointer["nMultiEnd"]) { + continue; + } + if (oToken["i"] == oPointer["nMultiEnd"]) { + oPointer["nMultiEnd"] = -1; + } + } if (oGraph[oPointer["iNode"]].hasOwnProperty("")) { let bChange = this._executeActions(oGraph, oGraph[oPointer["iNode"]][""], oPointer["iToken1"]-1, iToken, dOptions, sCountry, bShowRuleId, bDebug, bContext); if (bChange) { bTagAndRewrite = true; } @@ -651,16 +669,17 @@ for (let sRuleId of oGraph[nextNodeKey]) { try { if (bDebug) { console.log(" >TRY: " + sRuleId + " " + sLineId); } - let [_, sOption, sFuncCond, cActionType, sWhat, ...eAct] = gc_rules_graph.dRule[sRuleId]; + let [_, sOption, sFuncCond, cActionType, sAction, ...eAct] = gc_rules_graph.dRule[sRuleId]; // Suggestion [ option, condition, "-", replacement/suggestion/action, iTokenStart, iTokenEnd, cStartLimit, cEndLimit, bCaseSvty, nPriority, sMessage, iURL ] // TextProcessor [ option, condition, "~", replacement/suggestion/action, iTokenStart, iTokenEnd, bCaseSvty ] // Disambiguator [ option, condition, "=", replacement/suggestion/action ] // Tag [ option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ] // Immunity [ option, condition, "!", "", iTokenStart, iTokenEnd ] + // Immunity [ option, condition, "&", "", iTokenStart, iTokenEnd ] // Test [ option, condition, ">", "" ] if (!sOption || dOptions.gl_get(sOption, false)) { bCondMemo = !sFuncCond || gc_functions[sFuncCond](this.lTokens, nTokenOffset, nLastToken, sCountry, bCondMemo, this.dTags, this.sSentence, this.sSentence0); if (bCondMemo) { if (cActionType == "-") { @@ -670,11 +689,11 @@ if (!this.lTokens[nTokenErrorStart].hasOwnProperty("sImmunity") || (this.lTokens[nTokenErrorStart]["sImmunity"] != "*" && !this.lTokens[nTokenErrorStart]["sImmunity"].includes(sOption))) { let nTokenErrorEnd = (iTokenEnd > 0) ? nTokenOffset + iTokenEnd : nLastToken + iTokenEnd; let nErrorStart = this.nOffsetWithinParagraph + ((cStartLimit == "<") ? this.lTokens[nTokenErrorStart]["nStart"] : this.lTokens[nTokenErrorStart]["nEnd"]); let nErrorEnd = this.nOffsetWithinParagraph + ((cEndLimit == ">") ? this.lTokens[nTokenErrorEnd]["nEnd"] : this.lTokens[nTokenErrorEnd]["nStart"]); if (!this.dError.has(nErrorStart) || nPriority > this.dErrorPriority.gl_get(nErrorStart, -1)) { - this.dError.set(nErrorStart, this._createErrorFromTokens(sWhat, nTokenOffset, nLastToken, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, bCaseSvty, + this.dError.set(nErrorStart, this._createErrorFromTokens(sAction, nTokenOffset, nLastToken, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, bCaseSvty, sMessage, gc_rules_graph.dURL[iURL], bShowRuleId, sOption, bContext)); this.dErrorPriority.set(nErrorStart, nPriority); this.dSentenceError.set(nErrorStart, this.dError.get(nErrorStart)); if (bDebug) { console.log(" NEW_ERROR: ", this.dError.get(nErrorStart)); @@ -684,21 +703,21 @@ } else if (cActionType == "~") { // text processor let nTokenStart = (eAct[0] > 0) ? nTokenOffset + eAct[0] : nLastToken + eAct[0]; let nTokenEnd = (eAct[1] > 0) ? nTokenOffset + eAct[1] : nLastToken + eAct[1]; - this._tagAndPrepareTokenForRewriting(sWhat, nTokenStart, nTokenEnd, nTokenOffset, nLastToken, eAct[2], bDebug); + this._tagAndPrepareTokenForRewriting(sAction, nTokenStart, nTokenEnd, nTokenOffset, nLastToken, eAct[2], bDebug); bChange = true; if (bDebug) { - console.log(` TEXT_PROCESSOR: [${this.lTokens[nTokenStart]["sValue"]}:${this.lTokens[nTokenEnd]["sValue"]}] > ${sWhat}`); + console.log(` TEXT_PROCESSOR: [${this.lTokens[nTokenStart]["sValue"]}:${this.lTokens[nTokenEnd]["sValue"]}] > ${sAction}`); } } else if (cActionType == "=") { // disambiguation - gc_functions[sWhat](this.lTokens, nTokenOffset, nLastToken); + gc_functions[sAction](this.lTokens, nTokenOffset, nLastToken); if (bDebug) { - console.log(` DISAMBIGUATOR: (${sWhat}) [${this.lTokens[nTokenOffset+1]["sValue"]}:${this.lTokens[nLastToken]["sValue"]}]`); + console.log(` DISAMBIGUATOR: (${sAction}) [${this.lTokens[nTokenOffset+1]["sValue"]}:${this.lTokens[nLastToken]["sValue"]}]`); } } else if (cActionType == ">") { // we do nothing, this test is just a condition to apply all following actions if (bDebug) { @@ -709,19 +728,19 @@ // Tag let nTokenStart = (eAct[0] > 0) ? nTokenOffset + eAct[0] : nLastToken + eAct[0]; let nTokenEnd = (eAct[1] > 0) ? nTokenOffset + eAct[1] : nLastToken + eAct[1]; for (let i = nTokenStart; i <= nTokenEnd; i++) { if (this.lTokens[i].hasOwnProperty("aTags")) { - this.lTokens[i]["aTags"].add(...sWhat.split("|")) + this.lTokens[i]["aTags"].add(...sAction.split("|")) } else { - this.lTokens[i]["aTags"] = new Set(sWhat.split("|")); + this.lTokens[i]["aTags"] = new Set(sAction.split("|")); } } if (bDebug) { - console.log(` TAG: ${sWhat} > [${this.lTokens[nTokenStart]["sValue"]}:${this.lTokens[nTokenEnd]["sValue"]}]`); + console.log(` TAG: ${sAction} > [${this.lTokens[nTokenStart]["sValue"]}:${this.lTokens[nTokenEnd]["sValue"]}]`); } - for (let sTag of sWhat.split("|")) { + for (let sTag of sAction.split("|")) { if (!this.dTags.has(sTag)) { this.dTags.set(sTag, [nTokenStart, nTokenEnd]); } else { this.dTags.set(sTag, [Math.min(nTokenStart, this.dTags.get(sTag)[0]), Math.max(nTokenEnd, this.dTags.get(sTag)[1])]); } @@ -732,11 +751,11 @@ if (bDebug) { console.log(" IMMUNITY: " + sLineId + " / " + sRuleId); } let nTokenStart = (eAct[0] > 0) ? nTokenOffset + eAct[0] : nLastToken + eAct[0]; let nTokenEnd = (eAct[1] > 0) ? nTokenOffset + eAct[1] : nLastToken + eAct[1]; - let sImmunity = sWhat || "*"; + let sImmunity = sAction || "*"; if (nTokenEnd - nTokenStart == 0) { this.lTokens[nTokenStart]["sImmunity"] = sImmunity; let nErrorStart = this.nOffsetWithinParagraph + this.lTokens[nTokenStart]["nStart"]; if (this.dError.has(nErrorStart)) { this.dError.delete(nErrorStart); @@ -748,11 +767,27 @@ if (this.dError.has(nErrorStart)) { this.dError.delete(nErrorStart); } } } - } else { + } + else if (cActionType == "#") { + // multi-tokens + let nTokenStart = (eAct[0] > 0) ? nTokenOffset + eAct[0] : nLastToken + eAct[0]; + let nTokenEnd = (eAct[1] > 0) ? nTokenOffset + eAct[1] : nLastToken + eAct[1]; + let oMultiToken = { + "nTokenStart": nTokenStart, + "nTokenEnd": nTokenEnd, + "lTokens": this.lTokens.slice(nTokenStart, nTokenEnd+1), + "lMorph": (sAction) ? sAction.split("|") : [":HM"] + } + this.lTokens[nTokenStart]["nMultiStartTo"] = nTokenEnd; + this.lTokens[nTokenEnd]["nMultiEndFrom"] = nTokenStart; + this.lTokens[nTokenStart]["oMultiToken"] = oMultiToken; + this.lTokens[nTokenEnd]["oMultiToken"] = oMultiToken; + } + else { console.log("# error: unknown action at " + sLineId); } } else if (cActionType == ">") { if (bDebug) { @@ -876,32 +911,32 @@ } //console.log(sText+"\nstart: "+m.start[iGroup]+" end:"+m.end[iGroup]); return sText.slice(0, m.start[iGroup]) + sNew + sText.slice(m.end[iGroup]); } - _tagAndPrepareTokenForRewriting (sWhat, nTokenRewriteStart, nTokenRewriteEnd, nTokenOffset, nLastToken, bCaseSvty, bDebug) { + _tagAndPrepareTokenForRewriting (sAction, nTokenRewriteStart, nTokenRewriteEnd, nTokenOffset, nLastToken, bCaseSvty, bDebug) { // text processor: rewrite tokens between and position - if (sWhat === "*") { + if (sAction === "*") { // purge text if (nTokenRewriteEnd - nTokenRewriteStart == 0) { this.lTokens[nTokenRewriteStart]["bToRemove"] = true; } else { for (let i = nTokenRewriteStart; i <= nTokenRewriteEnd; i++) { this.lTokens[i]["bToRemove"] = true; } } } - else if (sWhat === "␣") { + else if (sAction === "␣") { // merge tokens this.lTokens[nTokenRewriteStart]["nMergeUntil"] = nTokenRewriteEnd; } - else if (sWhat.startsWith("␣")) { - sWhat = this._expand(sWhat, nTokenOffset, nLastToken); + else if (sAction.startsWith("␣")) { + sAction = this._expand(sAction, nTokenOffset, nLastToken); this.lTokens[nTokenRewriteStart]["nMergeUntil"] = nTokenRewriteEnd; - this.lTokens[nTokenRewriteStart]["sMergedValue"] = sWhat.slice(1); + this.lTokens[nTokenRewriteStart]["sMergedValue"] = sAction.slice(1); } - else if (sWhat === "_") { + else if (sAction === "_") { // neutralized token if (nTokenRewriteEnd - nTokenRewriteStart == 0) { this.lTokens[nTokenRewriteStart]["sNewValue"] = "_"; } else { for (let i = nTokenRewriteStart; i <= nTokenRewriteEnd; i++) { @@ -908,26 +943,26 @@ this.lTokens[i]["sNewValue"] = "_"; } } } else { - if (sWhat.startsWith("=")) { - sWhat = gc_functions[sWhat.slice(1)](this.lTokens, nTokenOffset, nLastToken); + if (sAction.startsWith("=")) { + sAction = gc_functions[sAction.slice(1)](this.lTokens, nTokenOffset, nLastToken); } else { - sWhat = this._expand(sWhat, nTokenOffset, nLastToken); + sAction = this._expand(sAction, nTokenOffset, nLastToken); } let bUppercase = bCaseSvty && this.lTokens[nTokenRewriteStart]["sValue"].slice(0,1).gl_isUpperCase(); if (nTokenRewriteEnd - nTokenRewriteStart == 0) { // one token if (bUppercase) { - sWhat = sWhat.gl_toCapitalize(); + sAction = sAction.gl_toCapitalize(); } - this.lTokens[nTokenRewriteStart]["sNewValue"] = sWhat; + this.lTokens[nTokenRewriteStart]["sNewValue"] = sAction; } else { // several tokens - let lTokenValue = sWhat.split("|"); + let lTokenValue = sAction.split("|"); if (lTokenValue.length != (nTokenRewriteEnd - nTokenRewriteStart + 1)) { if (bDebug) { console.log("Error. Text processor: number of replacements != number of tokens."); } return; Index: gc_core/js/lang_core/gc_functions.js ================================================================== --- gc_core/js/lang_core/gc_functions.js +++ gc_core/js/lang_core/gc_functions.js @@ -200,23 +200,20 @@ } } return false; } -function g_morph (oToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) { +function g_morph (oToken, sPattern, sNegPattern="", nLeft=null, nRight=null) { // analyse a token, return True if not in morphologies and in morphologies let lMorph; if (oToken.hasOwnProperty("lMorph")) { lMorph = oToken["lMorph"]; } else { if (nLeft !== null) { let sValue = (nRight !== null) ? oToken["sValue"].slice(nLeft, nRight) : oToken["sValue"].slice(nLeft); lMorph = gc_engine.oSpellChecker.getMorph(sValue); - if (bMemorizeMorph) { - oToken["lMorph"] = lMorph; - } } else { lMorph = gc_engine.oSpellChecker.getMorph(oToken["sValue"]); } } if (lMorph.length == 0) { @@ -236,19 +233,41 @@ } // search sPattern return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); } -function g_morph0 (oToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) { +function g_morphx (oToken, sPattern, sNegPattern="", nLeft=null, nRight=null) { + // analyse a multi-token, return True if not in morphologies and in morphologies + if (!oToken.hasOwnProperty("oMultiToken")) { + return false; + } + let lMorph = oToken["oMultiToken"]["lMorph"]; + if (lMorph.length == 0) { + return false; + } + // check negative condition + if (sNegPattern) { + if (sNegPattern == "*") { + // all morph must match sPattern + return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); + } + else { + if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { + return false; + } + } + } + // search sPattern + return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); +} + +function g_morph0 (oToken, sPattern, sNegPattern="", nLeft=null, nRight=null) { // analyse a token, return True if not in morphologies and in morphologies let lMorph; if (nLeft !== null) { let sValue = (nRight !== null) ? oToken["sValue"].slice(nLeft, nRight) : oToken["sValue"].slice(nLeft); lMorph = gc_engine.oSpellChecker.getMorph(sValue); - if (bMemorizeMorph) { - oToken["lMorph"] = lMorph; - } } else { lMorph = gc_engine.oSpellChecker.getMorph(oToken["sValue"]); } if (lMorph.length == 0) { return false; Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -340,11 +340,11 @@ # regex rules for zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions in lRuleGroup: if sRuleId not in _aIgnoredRules: for m in zRegex.finditer(sText): bCondMemo = None - for sFuncCond, cActionType, sWhat, *eAct in lActions: + for sFuncCond, cActionType, sAction, *eAct in lActions: # action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ] try: bCondMemo = not sFuncCond or getattr(gc_functions, sFuncCond)(sText, sText0, m, self.dTokenPos, sCountry, bCondMemo) if bCondMemo: if bDebug: @@ -351,23 +351,23 @@ echo("RULE: " + sLineId) if cActionType == "-": # grammar error nErrorStart = nOffset + m.start(eAct[0]) if nErrorStart not in self.dError or nPriority > self.dErrorPriority.get(nErrorStart, -1): - self.dError[nErrorStart] = self._createErrorFromRegex(sText, sText0, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bShowRuleId, sOption, bContext) + self.dError[nErrorStart] = self._createErrorFromRegex(sText, sText0, sAction, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bShowRuleId, sOption, bContext) self.dErrorPriority[nErrorStart] = nPriority self.dSentenceError[nErrorStart] = self.dError[nErrorStart] elif cActionType == "~": # text processor - sText = self.rewriteText(sText, sWhat, eAct[0], m, bUppercase) + sText = self.rewriteText(sText, sAction, eAct[0], m, bUppercase) bChange = True if bDebug: echo("~ " + sText + " -- " + m.group(eAct[0]) + " # " + sLineId) elif cActionType == "=": # disambiguation if not bParagraph: - getattr(gc_functions, sWhat)(sText, m, self.dTokenPos) + getattr(gc_functions, sAction)(sText, m, self.dTokenPos) if bDebug: echo("= " + m.group(0) + " # " + sLineId) elif cActionType == ">": # we do nothing, this test is just a condition to apply all following actions pass @@ -396,11 +396,11 @@ self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lTokens if dToken["sType"] != "INFO" } if bDebug: echo("UPDATE:") echo(self) - def _getMatches (self, dGraph, dToken, dNode, bKeep=False): + def _getNextNodes (self, dGraph, dToken, dNode, bKeep=False): "generator: return matches where “values” match arcs" bTokenFound = False # token value if dToken["sValue"] in dNode: yield (" ", dToken["sValue"], dNode[dToken["sValue"]]) @@ -507,10 +507,35 @@ if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in lMorph): continue if not sPattern or any(re.search(sPattern, sMorph) for sMorph in lMorph): yield ("@", sRegex, dNode[""][sRegex]) bTokenFound = True + # regex multi morph arcs + if "" in dNode: + if "nMultiStartTo" in dToken: + lMorph = dToken["dMultiToken"]["lMorph"] + for sRegex in dNode[""]: + if "¬" not in sRegex: + # no anti-pattern + if any(re.search(sRegex, sMorph) for sMorph in lMorph): + yield ("&", sRegex, dNode[""][sRegex]) + bTokenFound = True + else: + # there is an anti-pattern + sPattern, sNegPattern = sRegex.split("¬", 1) + if sNegPattern == "*": + # all morphologies must match with + if sPattern: + if all(re.search(sPattern, sMorph) for sMorph in lMorph): + yield ("&", sRegex, dNode[""][sRegex]) + bTokenFound = True + else: + if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in lMorph): + continue + if not sPattern or any(re.search(sPattern, sMorph) for sMorph in lMorph): + yield ("&", sRegex, dNode[""][sRegex]) + bTokenFound = True # token tags if "aTags" in dToken and "" in dNode: for sTag in dToken["aTags"]: if sTag in dNode[""]: yield ("/", sTag, dNode[""][sTag]) @@ -529,41 +554,51 @@ if not bTokenFound and bKeep: yield (None, "", -1) # JUMP # Warning! Recursion! if "<>" in dNode: - yield from self._getMatches(dGraph, dToken, dGraph[dNode["<>"]], bKeep=True) + yield from self._getNextNodes(dGraph, dToken, dGraph[dNode["<>"]], bKeep=True) def parseGraph (self, dGraph, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False): "parse graph with tokens from the text and execute actions encountered" - lPointer = [] + lPointers = [] bTagAndRewrite = False for iToken, dToken in enumerate(self.lTokens): if bDebug: echo("TOKEN: " + dToken["sValue"]) # check arcs for each existing pointer - lNextPointer = [] - for dPointer in lPointer: - for cActionType, sMatch, iNode in self._getMatches(dGraph, dToken, dGraph[dPointer["iNode"]]): - if cActionType is None: - lNextPointer.append(dPointer) - continue - if bDebug: - echo(" MATCH: " + cActionType + sMatch) - lNextPointer.append({ "iToken1": dPointer["iToken1"], "iNode": iNode }) - lPointer = lNextPointer - # check arcs of first nodes - for cActionType, sMatch, iNode in self._getMatches(dGraph, dToken, dGraph[0]): - if cActionType is None: - continue - if bDebug: - echo(" MATCH: " + cActionType + sMatch) - lPointer.append({ "iToken1": iToken, "iNode": iNode }) - # check if there is rules to check for each pointer - for dPointer in lPointer: - #if bDebug: - # echo("+", dPointer) + lNextPointers = [] + for dPointer in lPointers: + if dPointer["nMultiEnd"] != -1: + if dToken["i"] <= dPointer["nMultiEnd"]: + lNextPointers.append(dPointer) + if dToken["i"] != dPointer["nMultiEnd"]: + continue + for cNodeType, sMatch, iNode in self._getNextNodes(dGraph, dToken, dGraph[dPointer["iNode"]]): + if cNodeType is None: + lNextPointers.append(dPointer) + continue + if bDebug: + echo(" MATCH: " + cNodeType + sMatch) + nMultiEnd = -1 if cNodeType != "&" else dToken["nMultiStartTo"] + lNextPointers.append({ "iToken1": dPointer["iToken1"], "iNode": iNode, "nMultiEnd": nMultiEnd }) + lPointers = lNextPointers + # check arcs of first nodes + for cNodeType, sMatch, iNode in self._getNextNodes(dGraph, dToken, dGraph[0]): + if cNodeType is None: + continue + if bDebug: + echo(" MATCH: " + cNodeType + sMatch) + nMultiEnd = -1 if cNodeType != "&" else dToken["nMultiStartTo"] + lPointers.append({ "iToken1": iToken, "iNode": iNode, "nMultiEnd": nMultiEnd }) + # check if there is rules to check for each pointer + for dPointer in lPointers: + if dPointer["nMultiEnd"] != -1: + if dToken["i"] < dPointer["nMultiEnd"]: + continue + if dToken["i"] == dPointer["nMultiEnd"]: + dPointer["nMultiEnd"] = -1 if "" in dGraph[dPointer["iNode"]]: bChange = self._executeActions(dGraph, dGraph[dPointer["iNode"]][""], dPointer["iToken1"]-1, iToken, dOptions, sCountry, bShowRuleId, bDebug, bContext) if bChange: bTagAndRewrite = True if bTagAndRewrite: @@ -579,16 +614,17 @@ bCondMemo = None for sRuleId in dGraph[nextNodeKey]: try: if bDebug: echo(" >TRY: " + sRuleId + " " + sLineId) - _, sOption, sFuncCond, cActionType, sWhat, *eAct = _rules_graph.dRule[sRuleId] + _, sOption, sFuncCond, cActionType, sAction, *eAct = _rules_graph.dRule[sRuleId] # Suggestion [ option, condition, "-", replacement/suggestion/action, iTokenStart, iTokenEnd, cStartLimit, cEndLimit, bCaseSvty, nPriority, sMessage, iURL ] # TextProcessor [ option, condition, "~", replacement/suggestion/action, iTokenStart, iTokenEnd, bCaseSvty ] # Disambiguator [ option, condition, "=", replacement/suggestion/action ] # Tag [ option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ] # Immunity [ option, condition, "!", option, iTokenStart, iTokenEnd ] + # Multi-token [ option, condition, "&", morphologies, iTokenStart, iTokenEnd ] # Test [ option, condition, ">", "" ] if not sOption or dOptions.get(sOption, False): bCondMemo = not sFuncCond or getattr(gc_functions, sFuncCond)(self.lTokens, nTokenOffset, nLastToken, sCountry, bCondMemo, self.dTags, self.sSentence, self.sSentence0) if bCondMemo: if cActionType == "-": @@ -598,29 +634,29 @@ if "sImmunity" not in self.lTokens[nTokenErrorStart] or (self.lTokens[nTokenErrorStart]["sImmunity"] != "*" and sOption not in self.lTokens[nTokenErrorStart]["sImmunity"]): nTokenErrorEnd = nTokenOffset + iTokenEnd if iTokenEnd > 0 else nLastToken + iTokenEnd nErrorStart = self.nOffsetWithinParagraph + (self.lTokens[nTokenErrorStart]["nStart"] if cStartLimit == "<" else self.lTokens[nTokenErrorStart]["nEnd"]) nErrorEnd = self.nOffsetWithinParagraph + (self.lTokens[nTokenErrorEnd]["nEnd"] if cEndLimit == ">" else self.lTokens[nTokenErrorEnd]["nStart"]) if nErrorStart not in self.dError or nPriority > self.dErrorPriority.get(nErrorStart, -1): - self.dError[nErrorStart] = self._createErrorFromTokens(sWhat, nTokenOffset, nLastToken, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, bCaseSvty, \ + self.dError[nErrorStart] = self._createErrorFromTokens(sAction, nTokenOffset, nLastToken, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, bCaseSvty, \ sMessage, _rules_graph.dURL.get(iURL, ""), bShowRuleId, sOption, bContext) self.dErrorPriority[nErrorStart] = nPriority self.dSentenceError[nErrorStart] = self.dError[nErrorStart] if bDebug: echo(" NEW_ERROR: {}".format(self.dError[nErrorStart])) elif cActionType == "~": # text processor nTokenStart = nTokenOffset + eAct[0] if eAct[0] > 0 else nLastToken + eAct[0] nTokenEnd = nTokenOffset + eAct[1] if eAct[1] > 0 else nLastToken + eAct[1] - self._tagAndPrepareTokenForRewriting(sWhat, nTokenStart, nTokenEnd, nTokenOffset, nLastToken, eAct[2], bDebug) + self._tagAndPrepareTokenForRewriting(sAction, nTokenStart, nTokenEnd, nTokenOffset, nLastToken, eAct[2], bDebug) bChange = True if bDebug: - echo(" TEXT_PROCESSOR: [{}:{}] > {}".format(self.lTokens[nTokenStart]["sValue"], self.lTokens[nTokenEnd]["sValue"], sWhat)) + echo(" TEXT_PROCESSOR: [{}:{}] > {}".format(self.lTokens[nTokenStart]["sValue"], self.lTokens[nTokenEnd]["sValue"], sAction)) elif cActionType == "=": # disambiguation - getattr(gc_functions, sWhat)(self.lTokens, nTokenOffset, nLastToken) + getattr(gc_functions, sAction)(self.lTokens, nTokenOffset, nLastToken) if bDebug: - echo(" DISAMBIGUATOR: ({}) [{}:{}]".format(sWhat, self.lTokens[nTokenOffset+1]["sValue"], self.lTokens[nLastToken]["sValue"])) + echo(" DISAMBIGUATOR: ({}) [{}:{}]".format(sAction, self.lTokens[nTokenOffset+1]["sValue"], self.lTokens[nLastToken]["sValue"])) elif cActionType == ">": # we do nothing, this test is just a condition to apply all following actions if bDebug: echo(" COND_OK") elif cActionType == "/": @@ -627,16 +663,16 @@ # Tag nTokenStart = nTokenOffset + eAct[0] if eAct[0] > 0 else nLastToken + eAct[0] nTokenEnd = nTokenOffset + eAct[1] if eAct[1] > 0 else nLastToken + eAct[1] for i in range(nTokenStart, nTokenEnd+1): if "aTags" in self.lTokens[i]: - self.lTokens[i]["aTags"].update(sWhat.split("|")) + self.lTokens[i]["aTags"].update(sAction.split("|")) else: - self.lTokens[i]["aTags"] = set(sWhat.split("|")) + self.lTokens[i]["aTags"] = set(sAction.split("|")) if bDebug: - echo(" TAG: {} > [{}:{}]".format(sWhat, self.lTokens[nTokenStart]["sValue"], self.lTokens[nTokenEnd]["sValue"])) - for sTag in sWhat.split("|"): + echo(" TAG: {} > [{}:{}]".format(sAction, self.lTokens[nTokenStart]["sValue"], self.lTokens[nTokenEnd]["sValue"])) + for sTag in sAction.split("|"): if sTag not in self.dTags: self.dTags[sTag] = [nTokenStart, nTokenEnd] else: self.dTags[sTag][0] = min(nTokenStart, self.dTags[sTag][0]) self.dTags[sTag][1] = max(nTokenEnd, self.dTags[sTag][1]) @@ -644,11 +680,11 @@ # immunity if bDebug: echo(" IMMUNITY: " + sLineId + " / " + sRuleId) nTokenStart = nTokenOffset + eAct[0] if eAct[0] > 0 else nLastToken + eAct[0] nTokenEnd = nTokenOffset + eAct[1] if eAct[1] > 0 else nLastToken + eAct[1] - sImmunity = sWhat or "*" + sImmunity = sAction or "*" if nTokenEnd - nTokenStart == 0: self.lTokens[nTokenStart]["sImmunity"] = sImmunity nErrorStart = self.nOffsetWithinParagraph + self.lTokens[nTokenStart]["nStart"] if nErrorStart in self.dError: del self.dError[nErrorStart] @@ -656,10 +692,25 @@ for i in range(nTokenStart, nTokenEnd+1): self.lTokens[i]["sImmunity"] = sImmunity nErrorStart = self.nOffsetWithinParagraph + self.lTokens[i]["nStart"] if nErrorStart in self.dError: del self.dError[nErrorStart] + elif cActionType == "&": + # multi-tokens + nTokenStart = nTokenOffset + eAct[0] if eAct[0] > 0 else nLastToken + eAct[0] + nTokenEnd = nTokenOffset + eAct[1] if eAct[1] > 0 else nLastToken + eAct[1] + dMultiToken = { + "nTokenStart": nTokenStart, + "nTokenEnd": nTokenEnd, + "lTokens": self.lTokens[nTokenStart:nTokenEnd+1], + "lMorph": sAction.split("|") if sAction else [":HM"] + } + self.lTokens[nTokenStart]["nMultiStartTo"] = nTokenEnd + self.lTokens[nTokenEnd]["nMultiEndFrom"] = nTokenStart + self.lTokens[nTokenStart]["dMultiToken"] = dMultiToken + self.lTokens[nTokenEnd]["dMultiToken"] = dMultiToken + print(dMultiToken) else: echo("# error: unknown action at " + sLineId) elif cActionType == ">": if bDebug: echo(" COND_BREAK") @@ -773,47 +824,47 @@ else: sNew = m.expand(sRepl) sNew = sNew + " " * (nLen-len(sNew)) return sText[0:m.start(iGroup)] + sNew + sText[m.end(iGroup):] - def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, nTokenOffset, nLastToken, bCaseSvty, bDebug): + def _tagAndPrepareTokenForRewriting (self, sAction, nTokenRewriteStart, nTokenRewriteEnd, nTokenOffset, nLastToken, bCaseSvty, bDebug): "text processor: rewrite tokens between and position" - if sWhat == "*": + if sAction == "*": # purge text if nTokenRewriteEnd - nTokenRewriteStart == 0: self.lTokens[nTokenRewriteStart]["bToRemove"] = True else: for i in range(nTokenRewriteStart, nTokenRewriteEnd+1): self.lTokens[i]["bToRemove"] = True - elif sWhat == "␣": + elif sAction == "␣": # merge tokens self.lTokens[nTokenRewriteStart]["nMergeUntil"] = nTokenRewriteEnd - elif sWhat.startswith("␣"): - sWhat = self._expand(sWhat, nTokenOffset, nLastToken) + elif sAction.startswith("␣"): + sAction = self._expand(sAction, nTokenOffset, nLastToken) self.lTokens[nTokenRewriteStart]["nMergeUntil"] = nTokenRewriteEnd - self.lTokens[nTokenRewriteStart]["sMergedValue"] = sWhat[1:] - elif sWhat == "_": + self.lTokens[nTokenRewriteStart]["sMergedValue"] = sAction[1:] + elif sAction == "_": # neutralized token if nTokenRewriteEnd - nTokenRewriteStart == 0: self.lTokens[nTokenRewriteStart]["sNewValue"] = "_" else: for i in range(nTokenRewriteStart, nTokenRewriteEnd+1): self.lTokens[i]["sNewValue"] = "_" else: - if sWhat.startswith("="): - sWhat = getattr(gc_functions, sWhat[1:])(self.lTokens, nTokenOffset, nLastToken) + if sAction.startswith("="): + sAction = getattr(gc_functions, sAction[1:])(self.lTokens, nTokenOffset, nLastToken) else: - sWhat = self._expand(sWhat, nTokenOffset, nLastToken) + sAction = self._expand(sAction, nTokenOffset, nLastToken) bUppercase = bCaseSvty and self.lTokens[nTokenRewriteStart]["sValue"][0:1].isupper() if nTokenRewriteEnd - nTokenRewriteStart == 0: # one token if bUppercase: - sWhat = sWhat[0:1].upper() + sWhat[1:] - self.lTokens[nTokenRewriteStart]["sNewValue"] = sWhat + sAction = sAction[0:1].upper() + sAction[1:] + self.lTokens[nTokenRewriteStart]["sNewValue"] = sAction else: # several tokens - lTokenValue = sWhat.split("|") + lTokenValue = sAction.split("|") if len(lTokenValue) != (nTokenRewriteEnd - nTokenRewriteStart + 1): if bDebug: echo("Error. Text processor: number of replacements != number of tokens.") return for i, sValue in zip(range(nTokenRewriteStart, nTokenRewriteEnd+1), lTokenValue): Index: gc_core/py/lang_core/gc_functions.py ================================================================== --- gc_core/py/lang_core/gc_functions.py +++ gc_core/py/lang_core/gc_functions.py @@ -186,19 +186,17 @@ if sValue in sValues: return True return False -def g_morph (dToken, sPattern, sNegPattern="", nLeft=None, nRight=None, bMemorizeMorph=True): +def g_morph (dToken, sPattern, sNegPattern="", nLeft=None, nRight=None): "analyse a token, return True if not in morphologies and in morphologies" if "lMorph" in dToken: lMorph = dToken["lMorph"] else: if nLeft is not None: lMorph = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)]) - if bMemorizeMorph: - dToken["lMorph"] = lMorph else: lMorph = _oSpellChecker.getMorph(dToken["sValue"]) if not lMorph: return False # check negative condition @@ -213,16 +211,35 @@ # search sPattern zPattern = re.compile(sPattern) return any(zPattern.search(sMorph) for sMorph in lMorph) -def g_morph0 (dToken, sPattern, sNegPattern="", nLeft=None, nRight=None, bMemorizeMorph=True): +def g_morphx (dToken, sPattern, sNegPattern): + "analyse a multi-token, return True if not in morphologies and in morphologies" + if not "dMultiToken" in dToken: + return False + lMorph = dToken["dMultiToken"]["lMorph"] + if not lMorph: + return False + # check negative condition + if sNegPattern: + if sNegPattern == "*": + # all morph must match sPattern + zPattern = re.compile(sPattern) + return all(zPattern.search(sMorph) for sMorph in lMorph) + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(sMorph) for sMorph in lMorph): + return False + # search sPattern + zPattern = re.compile(sPattern) + return any(zPattern.search(sMorph) for sMorph in lMorph) + + +def g_morph0 (dToken, sPattern, sNegPattern="", nLeft=None, nRight=None): "analyse a token, return True if not in morphologies and in morphologies (disambiguation off)" if nLeft is not None: lMorph = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)]) - if bMemorizeMorph: - dToken["lMorph"] = lMorph else: lMorph = _oSpellChecker.getMorph(dToken["sValue"]) if not lMorph: return False # check negative condition Index: gc_lang/fr/modules-js/cregex.js ================================================================== --- gc_lang/fr/modules-js/cregex.js +++ gc_lang/fr/modules-js/cregex.js @@ -80,11 +80,15 @@ ///// FONCTIONS getLemmaOfMorph: function (sMorph) { - return this._zLemma.exec(sMorph)[1]; + let m = this._zLemma.exec(sMorph); + if (m) { + return m[1]; + } + return ""; }, agreement: function (l1, l2) { // returns True if agreement in gender and number is possible between morphologies and let [sGender1, sNumber1] = this.getGenderNumber(l1); Index: gc_lang/fr/modules-js/gce_analyseur.js ================================================================== --- gc_lang/fr/modules-js/gce_analyseur.js +++ gc_lang/fr/modules-js/gce_analyseur.js @@ -11,11 +11,11 @@ } else if (oToken["sValue"].search(/-l(?:es?|a)-(?:[mt]oi|nous|leur)$|(?:[nv]ous|lui|leur)-en$/) != -1) { nEnd = oToken["sValue"].slice(0,nEnd).lastIndexOf("-"); } } - return g_morph(oToken, sPattern, sNegPattern, 0, nEnd, false); + return g_morph(oToken, sPattern, sNegPattern, 0, nEnd); } function apposition (sWord1, sWord2) { // returns true if nom + nom (no agreement required) return sWord2.length < 2 || (cregex.mbNomNotAdj(gc_engine.oSpellChecker.getMorph(sWord2)) && cregex.mbPpasNomNotAdj(gc_engine.oSpellChecker.getMorph(sWord1))); Index: gc_lang/fr/modules/cregex.py ================================================================== --- gc_lang/fr/modules/cregex.py +++ gc_lang/fr/modules/cregex.py @@ -80,11 +80,14 @@ #### FONCTIONS def getLemmaOfMorph (s): "return lemma in morphology " - return Lemma.search(s).group(1) + m = Lemma.search(s) + if m: + return m.group(1) + return "" def agreement (l1, l2): "returns True if agreement in gender and number is possible between morphologies and " sGender1, sNumber1 = getGenderNumber(l1) sGender2, sNumber2 = getGenderNumber(l2) Index: gc_lang/fr/modules/gce_analyseur.py ================================================================== --- gc_lang/fr/modules/gce_analyseur.py +++ gc_lang/fr/modules/gce_analyseur.py @@ -9,11 +9,11 @@ if dToken["sValue"].count("-") > 1: if "-t-" in dToken["sValue"]: nEnd = nEnd - 2 elif re.search("-l(?:es?|a)-(?:[mt]oi|nous|leur)$|(?:[nv]ous|lui|leur)-en$", dToken["sValue"]): nEnd = dToken["sValue"][0:nEnd].rfind("-") - return g_morph(dToken, sPattern, sNegPattern, 0, nEnd, False) + return g_morph(dToken, sPattern, sNegPattern, 0, nEnd) def apposition (sWord1, sWord2): "returns True if nom + nom (no agreement required)" return len(sWord2) < 2 or (cr.mbNomNotAdj(_oSpellChecker.getMorph(sWord2)) and cr.mbPpasNomNotAdj(_oSpellChecker.getMorph(sWord1))) Index: gc_lang/fr/rules.grx ================================================================== --- gc_lang/fr/rules.grx +++ gc_lang/fr/rules.grx @@ -430,10 +430,11 @@ __/typo(typo_parenthèse_ouvrante_collée)__ \b[(](?=[^)][^)][^)]) <<- ->> " (" && Il manque un espace avant la parenthèse. TEST: C’est au fond du couloir{{(}}celui du deuxième étage{{)}}qu’il se trouve. ->> " (|||) " +TEST: de gain différentiel 𝐴 (𝑉ᵣ = 𝐴·𝑣H{{)}}et associé ->> ") " TEST: (a + b)² TEST: il faut (re)former tout ça. TEST: il (n’)est (qu’)ingénieur @@ -1560,10 +1561,65 @@ TEST: “C’est bon !”, croit savoir Marie. TEST: “Parce que… ?” finit par demander Paul. TEST: « Dans quel pays sommes-nous ? » demanda un manifestant. +!!!! Purge des références aux notes !! + +# les références aux notes +__(p_exposants)__ + [¹²³⁴⁵⁶⁷⁸⁹⁰]+ + <<- ~>> * + +__[i](p_références_aux_notes)__ + ({w_2})(\d+) @@0,$ + <<- not morph(\0, ":") and morph(\1, ":") ~2>> * + +TEST: POLITIQUESOCIÉTÉÉCONOMIEMONDECULTUREART DE VIVREMAGAZINE (qui peut faire boguer JavaScript avec certaines regex) + + +!!!! Normalisation du “t” euphonique !! + +__> - && Le “t” euphonique n’est pas nécessaire avec “\2”.|http://bdl.oqlf.gouv.qc.ca/bdl/gabarit_bdl.asp?T1=t+euphonique&id=2513 + <<- __else__ and \1 != "-t-" and \1 != "-T-" -1>> -t- && Pour le “t” euphonique, il faut deux traits d’union. Pas d’apostrophe. Pas d’espace. + <<- \1 != "-t-" ~1>> -t- +__> - && Le “t” euphonique est superflu quand le verbe se termine par “t” ou “d”.|http://bdl.oqlf.gouv.qc.ca/bdl/gabarit_bdl.asp?T1=t+euphonique&id=2513 + <<- \1 != "-t-" ~1>> -t- +__> -t-\2 && Euphonie. Il faut un “t” euphonique.|http://bdl.oqlf.gouv.qc.ca/bdl/gabarit_bdl.asp?T1=t+euphonique&id=2513 + +TEST: va{{ t’}}il y parvenir ? ->> -t- +TEST: A{{ t’}}elle soif ? ->> -t- +TEST: A{{ t-}}elle faim ? ->> -t- +TEST: a{{ t'}}elle ->> -t- +TEST: a{{-t'}}il ->> -t- +TEST: a{{-t }}il. ->> -t- +TEST: a{{ t’}}il. ->> -t- +TEST: a{{ t-}}on. ->> -t- +TEST: donne{{ t-}}il ->> -t- +TEST: donne{{-t }}il ->> -t- +TEST: vient{{-t-}}il ->> - +TEST: viendras{{-t-}}tu ->> - +TEST: Viendront{{ t-}}ils ->> - +TEST: viennent{{ t-}}ils ->> - +TEST: mangent{{-t-}}elles ->> - +TEST: Ont{{ t’}}ils ->> - +TEST: Ont{{-t’}}ils ->> - +TEST: l’ont{{ t’}}ils vu ? ->> - +TEST: exploite{{−t−}}il les ressources numériques ->> -t- +TEST: vainc{{-il}} ses ennemis aisément ->> -t-il +TEST: Assis, gronde{{-t -}}elle ->> -t- +TEST: vient-il demain ? +TEST: prend-elle l’avantage ? +TEST: saura-t-on jamais la vérité ? +TEST: arrive-t-elle ce matin ? +TEST: y aura-t-il du poulet au dîner ? + !! !! !! @@ -1637,24 +1693,10 @@ TEST: année {{2O11}} ->> 2011 TEST: {{3O}} (chiffre avec un O). ->> 30 - -!!!! Purge des références aux notes !! - -# les références aux notes -__(p_exposants)__ - [¹²³⁴⁵⁶⁷⁸⁹⁰]+ - <<- ~>> * - -__[i](p_références_aux_notes)__ - ({w_2})(\d+) @@0,$ - <<- not morph(\0, ":") and morph(\1, ":") ~2>> * - -TEST: POLITIQUESOCIÉTÉÉCONOMIEMONDECULTUREART DE VIVREMAGAZINE (qui peut faire boguer JavaScript avec certaines regex) - !!!! Traits d’union !! __[i]/tu(tu_trait_union_douteux)__ ({w1})(?:--|—|–|−|⁃)({w1}) @@0,$ @@ -1661,50 +1703,10 @@ <<- spell(\1+"-"+\2) and analyse(\1+"-"+\2, ":") ->> \1-\2 && Trait d’union : un tiret simple suffit. TEST: Nous préparons une {{contre–attaque}}. ->> contre-attaque TEST: Nous préparons une {{contre−attaque}}. ->> contre-attaque - -__> - && Le “t” euphonique n’est pas nécessaire avec “\2”.|http://bdl.oqlf.gouv.qc.ca/bdl/gabarit_bdl.asp?T1=t+euphonique&id=2513 - <<- __else__ and \1 != "-t-" and \1 != "-T-" -1>> -t- && Pour le “t” euphonique, il faut deux traits d’union. Pas d’apostrophe. Pas d’espace. - <<- \1 != "-t-" ~1>> -t- -__> - && Le “t” euphonique est superflu quand le verbe se termine par “t” ou “d”.|http://bdl.oqlf.gouv.qc.ca/bdl/gabarit_bdl.asp?T1=t+euphonique&id=2513 - <<- \1 != "-t-" ~1>> -t- -__> -t-\2 && Euphonie. Il faut un “t” euphonique.|http://bdl.oqlf.gouv.qc.ca/bdl/gabarit_bdl.asp?T1=t+euphonique&id=2513 - -TEST: va{{ t’}}il y parvenir ? ->> -t- -TEST: A{{ t’}}elle soif ? ->> -t- -TEST: A{{ t-}}elle faim ? ->> -t- -TEST: a{{ t'}}elle ->> -t- -TEST: a{{-t'}}il ->> -t- -TEST: a{{-t }}il. ->> -t- -TEST: a{{ t’}}il. ->> -t- -TEST: a{{ t-}}on. ->> -t- -TEST: donne{{ t-}}il ->> -t- -TEST: donne{{-t }}il ->> -t- -TEST: vient{{-t-}}il ->> - -TEST: viendras{{-t-}}tu ->> - -TEST: Viendront{{ t-}}ils ->> - -TEST: viennent{{ t-}}ils ->> - -TEST: mangent{{-t-}}elles ->> - -TEST: Ont{{ t’}}ils ->> - -TEST: Ont{{-t’}}ils ->> - -TEST: l’ont{{ t’}}ils vu ? ->> - -TEST: exploite{{−t−}}il les ressources numériques ->> -t- -TEST: vainc{{-il}} ses ennemis aisément ->> -t-il -TEST: Assis, gronde{{-t -}}elle ->> -t- -TEST: vient-il demain ? -TEST: prend-elle l’avantage ? -TEST: saura-t-on jamais la vérité ? -TEST: arrive-t-elle ce matin ? -TEST: y aura-t-il du poulet au dîner ? - @@@@ @@@@ @@@@ @@ -4901,10 +4903,11 @@ # and not (value(\1, "|est|une|") and value(<1, "|l’|d’|")) # and not (\2 == "mieux" and value(<1, "|qui|")) # ->> \1 && Doublon. # #TEST: Il y a un {{doublon doublon}}. ->> doublon + !! !! !!!! Élisions & euphonie !! @@ -23207,11 +23210,13 @@ TEST: Elle en est tombée des {{nus}}. ->> nues # numérique / digital __conf_numérique_digital__ - [>agence|>appareil|>banque|>caméra|>colonie|>colonisation|>communication|>compagnie|>connexion|>économie|>entreprise|>ère|>expérience|>identité|>industrie|>présence|>prise|>service|>solution|>stratégie|>télévision|>transformation|>transition] >digital + [>agence|>appareil|>banque|>caméra|>colonie|>colonisation|>communication|>compagnie|>connexion] >digital + [>document|>économie|>entreprise|>ère|>expérience|>fichier|>identité|>industrie|>présence|>prise] >digital + [>service|>solution|>stratégie|>télévision|>transformation|>transition|>révolution] >digital <<- /conf/ -2>> numérique|numériques && Confusion : “digital” est un adjectif se rapportant aux doigts (empreinte digitale, arthrose digitale, etc.). Écrivez “numérique”. [le|du|au] digital <<- /conf/ -2>> numérique Index: misc/grammalecte.sublime-color-scheme ================================================================== --- misc/grammalecte.sublime-color-scheme +++ misc/grammalecte.sublime-color-scheme @@ -66,15 +66,16 @@ { "name": "Entity Invalid", "scope": "entity.invalid", "foreground": "hsl(0, 100%, 80%)", "background": "hsl(0, 100%, 20%)", "font_style": "bold", }, { "name": "Token meta", "scope": "string.meta", "foreground": "hsl(270, 100%, 90%)", "background": "hsl(270, 100%, 40%)", }, { "name": "Token token", "scope": "string.token", "foreground": "hsl(240, 50%, 90%)", "background": "hsl(240, 50%, 40%)", }, { "name": "Token Jumptoken", "scope": "string.jumptoken", "foreground": "hsl(0, 50%, 90%)", "background": "hsl(10, 50%, 40%)", }, { "name": "Token lemma", "scope": "string.lemma", "foreground": "hsl(210, 100%, 80%)", "background": "hsl(210, 100%, 15%)", }, - { "name": "Token phonet", "scope": "string.phonet", "foreground": "hsl(90, 100%, 80%)", "background": "hsl(90, 100%, 10%)", }, + { "name": "Token phonet", "scope": "string.phonet", "foreground": "hsl(90, 100%, 80%)", "background": "hsl(90, 100%, 10%)", }, { "name": "Token tag", "scope": "string.tag", "foreground": "hsl(30, 100%, 90%)", "background": "hsl(30, 100%, 20%)", }, { "name": "Token regex", "scope": "string.regex", "foreground": "hsl(60, 100%, 80%)", "background": "hsl(60, 100%, 10%)", }, { "name": "Token morph regex", "scope": "string.morph.regex", "foreground": "hsl(150, 80%, 90%)", "background": "hsl(150, 80%, 10%)", }, - { "name": "Token morph negregex", "scope": "string.morph.negregex","foreground": "hsl(0, 80%, 90%)", "background": "hsl(0, 80%, 10%)", }, + { "name": "Token morph negregex", "scope": "string.morph.negregex", "foreground": "hsl(0, 80%, 90%)", "background": "hsl(0, 80%, 10%)", }, + { "name": "MulToken morph regex", "scope": "string.mt.morph.regex", "foreground": "hsl(180, 80%, 90%)", "background": "hsl(180, 80%, 10%)", }, { "name": "Keyword Python", "scope": "keyword.python", "foreground": "#A0A0A0", }, { "name": "Keyword", "scope": "keyword - (source.c keyword.operator | source.c++ keyword.operator | source.objc keyword.operator | source.objc++ keyword.operator), keyword.operator.word", "foreground": "#F06070", }, Index: misc/grammalecte.sublime-syntax ================================================================== --- misc/grammalecte.sublime-syntax +++ misc/grammalecte.sublime-syntax @@ -168,16 +168,22 @@ scope: string.morph captures: 1: entity.valid 2: string.morph.regex - - match: '(\$)([^@\s¬]*)' + - match: '(\$)([^\s¬]*)' scope: string.morph captures: 1: entity.valid 2: string.morph.regex + - match: '(&)([^\s¬]*)' + scope: string.morph + captures: + 1: entity.valid + 2: string.mt.morph.regex + - match: '(/)[\w-]+' scope: string.tag captures: 1: entity.valid