Index: gc_core/js/lang_core/gc_engine.js ================================================================== --- gc_core/js/lang_core/gc_engine.js +++ gc_core/js/lang_core/gc_engine.js @@ -597,19 +597,28 @@ console.log("TOKEN: " + oToken["sValue"]); } // check arcs for each existing pointer let lNextPointer = []; for (let oPointer of lPointer) { + if (oPointer["nMultiEnd"] != -1) { + if (oToken["i"] <= oPointer["nMultiEnd"]) { + lNextPointer.push(oPointer); + } + if (oToken["i"] != oPointer["nMultiEnd"]) { + continue; + } + } for (let [cActionType, sMatch, iNode] of this._getMatches(oGraph, oToken, oGraph[oPointer["iNode"]])) { if (cActionType === null) { lNextPointer.push(oPointer); continue; } if (bDebug) { console.log(" MATCH: " + cActionType + sMatch); } - lNextPointer.push({ "iToken1": oPointer["iToken1"], "iNode": iNode }); + let nMultiEnd = (cActionType != "&") ? -1 : dToken["nMultiStartTo"]; + lNextPointer.push({ "iToken1": oPointer["iToken1"], "iNode": iNode, "nMultiEnd": nMultiEnd }); } } lPointer = lNextPointer; // check arcs of first nodes for (let [cActionType, sMatch, iNode] of this._getMatches(oGraph, oToken, oGraph[0])) { @@ -617,14 +626,23 @@ continue; } if (bDebug) { console.log(" MATCH: " + cActionType + sMatch); } - lPointer.push({ "iToken1": iToken, "iNode": iNode }); + let nMultiEnd = (cActionType != "&") ? -1 : dToken["nMultiStartTo"]; + lPointer.push({ "iToken1": iToken, "iNode": iNode, "nMultiEnd": nMultiEnd }); } // check if there is rules to check for each pointer for (let oPointer of lPointer) { + if (oPointer["nMultiEnd"] != -1) { + if (oToken["i"] < oPointer["nMultiEnd"]) { + continue; + } + if (oToken["i"] == oPointer["nMultiEnd"]) { + oPointer["nMultiEnd"] = -1; + } + } if (oGraph[oPointer["iNode"]].hasOwnProperty("")) { let bChange = this._executeActions(oGraph, oGraph[oPointer["iNode"]][""], oPointer["iToken1"]-1, iToken, dOptions, sCountry, bShowRuleId, bDebug, bContext); if (bChange) { bTagAndRewrite = true; } @@ -657,10 +675,11 @@ // Suggestion [ option, condition, "-", replacement/suggestion/action, iTokenStart, iTokenEnd, cStartLimit, cEndLimit, bCaseSvty, nPriority, sMessage, iURL ] // TextProcessor [ option, condition, "~", replacement/suggestion/action, iTokenStart, iTokenEnd, bCaseSvty ] // Disambiguator [ option, condition, "=", replacement/suggestion/action ] // Tag [ option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ] // Immunity [ option, condition, "!", "", iTokenStart, iTokenEnd ] + // Immunity [ option, condition, "&", "", iTokenStart, iTokenEnd ] // Test [ option, condition, ">", "" ] if (!sOption || dOptions.gl_get(sOption, false)) { bCondMemo = !sFuncCond || gc_functions[sFuncCond](this.lTokens, nTokenOffset, nLastToken, sCountry, bCondMemo, this.dTags, this.sSentence, this.sSentence0); if (bCondMemo) { if (cActionType == "-") { @@ -748,11 +767,27 @@ if (this.dError.has(nErrorStart)) { this.dError.delete(nErrorStart); } } } - } else { + } + else if (cActionType == "#") { + // multi-tokens + let nTokenStart = (eAct[0] > 0) ? nTokenOffset + eAct[0] : nLastToken + eAct[0]; + let nTokenEnd = (eAct[1] > 0) ? nTokenOffset + eAct[1] : nLastToken + eAct[1]; + let oMultiToken = { + "nTokenStart": nTokenStart, + "nTokenEnd": nTokenEnd, + "lTokens": this.lTokens.slice(nTokenStart, nTokenEnd+1), + "lMorph": (sWhat) ? sWhat.split("|") : [":HM"] + } + this.lTokens[nTokenStart]["nMultiStartTo"] = nTokenEnd + this.lTokens[nTokenEnd]["nMultiEndFrom"] = nTokenStart + this.lTokens[nTokenStart]["dMultiToken"] = dMultiToken + this.lTokens[nTokenEnd]["dMultiToken"] = dMultiToken + } + else { console.log("# error: unknown action at " + sLineId); } } else if (cActionType == ">") { if (bDebug) { Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -507,10 +507,35 @@ if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in lMorph): continue if not sPattern or any(re.search(sPattern, sMorph) for sMorph in lMorph): yield ("@", sRegex, dNode[""][sRegex]) bTokenFound = True + # regex multi morph arcs + if "" in dNode: + if "nMultiStartTo" in dToken: + lMorph = dToken["dMultiToken"]["lMorph"] + for sRegex in dNode[""]: + if "¬" not in sRegex: + # no anti-pattern + if any(re.search(sRegex, sMorph) for sMorph in lMorph): + yield ("&", sRegex, dNode[""][sRegex]) + bTokenFound = True + else: + # there is an anti-pattern + sPattern, sNegPattern = sRegex.split("¬", 1) + if sNegPattern == "*": + # all morphologies must match with + if sPattern: + if all(re.search(sPattern, sMorph) for sMorph in lMorph): + yield ("&", sRegex, dNode[""][sRegex]) + bTokenFound = True + else: + if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in lMorph): + continue + if not sPattern or any(re.search(sPattern, sMorph) for sMorph in lMorph): + yield ("&", sRegex, dNode[""][sRegex]) + bTokenFound = True # token tags if "aTags" in dToken and "" in dNode: for sTag in dToken["aTags"]: if sTag in dNode[""]: yield ("/", sTag, dNode[""][sTag]) @@ -541,29 +566,39 @@ if bDebug: echo("TOKEN: " + dToken["sValue"]) # check arcs for each existing pointer lNextPointer = [] for dPointer in lPointer: + if dPointer["nMultiEnd"] != -1: + if dToken["i"] <= dPointer["nMultiEnd"]: + lNextPointer.append(dPointer) + if dToken["i"] != dPointer["nMultiEnd"]: + continue for cActionType, sMatch, iNode in self._getMatches(dGraph, dToken, dGraph[dPointer["iNode"]]): if cActionType is None: lNextPointer.append(dPointer) continue if bDebug: echo(" MATCH: " + cActionType + sMatch) - lNextPointer.append({ "iToken1": dPointer["iToken1"], "iNode": iNode }) + nMultiEnd = -1 if cActionType != "&" else dToken["nMultiStartTo"] + lNextPointer.append({ "iToken1": dPointer["iToken1"], "iNode": iNode, "nMultiEnd": nMultiEnd }) lPointer = lNextPointer # check arcs of first nodes for cActionType, sMatch, iNode in self._getMatches(dGraph, dToken, dGraph[0]): if cActionType is None: continue if bDebug: echo(" MATCH: " + cActionType + sMatch) - lPointer.append({ "iToken1": iToken, "iNode": iNode }) + nMultiEnd = -1 if cActionType != "&" else dToken["nMultiStartTo"] + lPointer.append({ "iToken1": iToken, "iNode": iNode, "nMultiEnd": nMultiEnd }) # check if there is rules to check for each pointer for dPointer in lPointer: - #if bDebug: - # echo("+", dPointer) + if dPointer["nMultiEnd"] != -1: + if dToken["i"] < dPointer["nMultiEnd"]: + continue + if dToken["i"] == dPointer["nMultiEnd"]: + dPointer["nMultiEnd"] = -1 if "" in dGraph[dPointer["iNode"]]: bChange = self._executeActions(dGraph, dGraph[dPointer["iNode"]][""], dPointer["iToken1"]-1, iToken, dOptions, sCountry, bShowRuleId, bDebug, bContext) if bChange: bTagAndRewrite = True if bTagAndRewrite: @@ -585,10 +620,11 @@ # Suggestion [ option, condition, "-", replacement/suggestion/action, iTokenStart, iTokenEnd, cStartLimit, cEndLimit, bCaseSvty, nPriority, sMessage, iURL ] # TextProcessor [ option, condition, "~", replacement/suggestion/action, iTokenStart, iTokenEnd, bCaseSvty ] # Disambiguator [ option, condition, "=", replacement/suggestion/action ] # Tag [ option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ] # Immunity [ option, condition, "!", option, iTokenStart, iTokenEnd ] + # Multi-token [ option, condition, "&", morphologies, iTokenStart, iTokenEnd ] # Test [ option, condition, ">", "" ] if not sOption or dOptions.get(sOption, False): bCondMemo = not sFuncCond or getattr(gc_functions, sFuncCond)(self.lTokens, nTokenOffset, nLastToken, sCountry, bCondMemo, self.dTags, self.sSentence, self.sSentence0) if bCondMemo: if cActionType == "-": @@ -656,10 +692,25 @@ for i in range(nTokenStart, nTokenEnd+1): self.lTokens[i]["sImmunity"] = sImmunity nErrorStart = self.nOffsetWithinParagraph + self.lTokens[i]["nStart"] if nErrorStart in self.dError: del self.dError[nErrorStart] + elif cActionType == "&": + # multi-tokens + nTokenStart = nTokenOffset + eAct[0] if eAct[0] > 0 else nLastToken + eAct[0] + nTokenEnd = nTokenOffset + eAct[1] if eAct[1] > 0 else nLastToken + eAct[1] + dMultiToken = { + "nTokenStart": nTokenStart, + "nTokenEnd": nTokenEnd, + "lTokens": self.lTokens[nTokenStart:nTokenEnd+1], + "lMorph": sWhat.split("|") if sWhat else [":HM"] + } + self.lTokens[nTokenStart]["nMultiStartTo"] = nTokenEnd + self.lTokens[nTokenEnd]["nMultiEndFrom"] = nTokenStart + self.lTokens[nTokenStart]["dMultiToken"] = dMultiToken + self.lTokens[nTokenEnd]["dMultiToken"] = dMultiToken + print(dMultiToken) else: echo("# error: unknown action at " + sLineId) elif cActionType == ">": if bDebug: echo(" COND_BREAK") Index: gc_lang/fr/rules.grx ================================================================== --- gc_lang/fr/rules.grx +++ gc_lang/fr/rules.grx @@ -430,10 +430,11 @@ __/typo(typo_parenthèse_ouvrante_collée)__ \b[(](?=[^)][^)][^)]) <<- ->> " (" && Il manque un espace avant la parenthèse. TEST: C’est au fond du couloir{{(}}celui du deuxième étage{{)}}qu’il se trouve. ->> " (|||) " +TEST: de gain différentiel 𝐴 (𝑉ᵣ = 𝐴·𝑣H{{)}}et associé ->> ") " TEST: (a + b)² TEST: il faut (re)former tout ça. TEST: il (n’)est (qu’)ingénieur @@ -3829,11 +3830,11 @@ <<- ~>> ␣ <<- =>> define(\2, ":MP:e:i") __merge__ - à la fois + à la pp <<- &>> :LW __immunités__ il y a @@ -4907,10 +4908,23 @@ # and not (value(\1, "|est|une|") and value(<1, "|l’|d’|")) # and not (\2 == "mieux" and value(<1, "|qui|")) # ->> \1 && Doublon. # #TEST: Il y a un {{doublon doublon}}. ->> doublon + +__test_merge__ + &:LW + <<- echo("DETECTED") ~>> * + + je vais &:LW + <<- --1>> X && TEST0. + + je vais &:LW de + <<- --1>> X && TEST2. + <<- -1>> Z && TEST1. + + !! !! !!!! Élisions & euphonie !!