Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -603,71 +603,71 @@ self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken if dToken["sType"] != "INFO" } if bDebug: print("UPDATE:") print(self) - def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False): + def _getNextMatchingNodes (self, dToken, dGraph, iNode1, dNode, bDebug=False): "generator: return nodes where “values” match arcs" # token value if dToken["sValue"] in dNode: if bDebug: print(" MATCH:", dToken["sValue"]) - yield dGraph[dNode[dToken["sValue"]]] + yield { "iNode1": iNode1, "dNode": dGraph[dNode[dToken["sValue"]]] } if dToken["sValue"][0:2].istitle(): # we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". sValue = dToken["sValue"].lower() if sValue in dNode: if bDebug: print(" MATCH:", sValue) - yield dGraph[dNode[sValue]] + yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] } elif dToken["sValue"].isupper(): sValue = dToken["sValue"].lower() if sValue in dNode: if bDebug: print(" MATCH:", sValue) - yield dGraph[dNode[sValue]] + yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] } sValue = dToken["sValue"].capitalize() if sValue in dNode: if bDebug: print(" MATCH:", sValue) - yield dGraph[dNode[sValue]] + yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] } # regex value arcs if "" in dNode: for sRegex in dNode[""]: if "¬" not in sRegex: # no anti-pattern if re.search(sRegex, dToken["sValue"]): if bDebug: print(" MATCH: ~" + sRegex) - yield dGraph[dNode[""][sRegex]] + yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] } else: # there is an anti-pattern sPattern, sNegPattern = sRegex.split("¬", 1) if sNegPattern and re.search(sNegPattern, dToken["sValue"]): continue if not sPattern or re.search(sPattern, dToken["sValue"]): if bDebug: print(" MATCH: ~" + sRegex) - yield dGraph[dNode[""][sRegex]] + yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] } # analysable tokens if dToken["sType"][0:4] == "WORD": # token lemmas if "" in dNode: for sLemma in _oSpellChecker.getLemma(dToken["sValue"]): if sLemma in dNode[""]: if bDebug: print(" MATCH: >" + sLemma) - yield dGraph[dNode[""][sLemma]] + yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sLemma]] } # regex morph arcs if "" in dNode: for sRegex in dNode[""]: if "¬" not in sRegex: # no anti-pattern lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"])) if any(re.search(sRegex, sMorph) for sMorph in lMorph): if bDebug: print(" MATCH: @" + sRegex) - yield dGraph[dNode[""][sRegex]] + yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] } else: # there is an anti-pattern sPattern, sNegPattern = sRegex.split("¬", 1) if sNegPattern == "*": # all morphologies must match with @@ -674,67 +674,65 @@ if sPattern: lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"])) if lMorph and all(re.search(sPattern, sMorph) for sMorph in lMorph): if bDebug: print(" MATCH: @" + sRegex) - yield dGraph[dNode[""][sRegex]] + yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] } else: lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"])) if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in lMorph): continue if not sPattern or any(re.search(sPattern, sMorph) for sMorph in lMorph): if bDebug: print(" MATCH: @" + sRegex) - yield dGraph[dNode[""][sRegex]] + yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] } # token tags if "tags" in dToken and "" in dNode: for sTag in dToken["tags"]: if sTag in dNode[""]: if bDebug: print(" MATCH: /" + sTag) - yield dGraph[dNode[""][sTag]] + yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sTag]] } # meta arc (for token type) if "" in dNode: for sMeta in dNode[""]: - # not regex here, we just search if exists within + # no regex here, we just search if exists within if sMeta == "*": if bDebug: print(" MATCH: *" + sMeta) - yield dGraph[dNode[""]["*"]] + yield { "iNode1": iNode1, "dNode": dGraph[dNode[""]["*"]] } elif "¬" in sMeta: if dToken["sType"] not in sMeta: if bDebug: print(" MATCH: *" + sMeta) - yield dGraph[dNode[""][sMeta]] + yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sMeta]] } elif dToken["sType"] in sMeta: if bDebug: print(" MATCH: *" + sMeta) - yield dGraph[dNode[""][sMeta]] + yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sMeta]] } def parse (self, dGraph, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False): "parse tokens from the text and execute actions encountered" dOpt = _dOptions if not dOptions else dOptions lPointer = [] bTagAndRewrite = False - for i, dToken in enumerate(self.lToken): + for iToken, dToken in enumerate(self.lToken): if bDebug: print("TOKEN:", dToken["sValue"]) # check arcs for each existing pointer lNextPointer = [] for dPointer in lPointer: - for dNode in self._getNextMatchingNodes(dToken, dGraph, dPointer["dNode"], bDebug): - lNextPointer.append({"iToken": dPointer["iToken"], "dNode": dNode}) + lNextPointer.extend(self._getNextPointers(dToken, dGraph, dPointer["iNode1"], dPointer["dNode"], bDebug)) lPointer = lNextPointer # check arcs of first nodes - for dNode in self._getNextMatchingNodes(dToken, dGraph, dGraph[0], bDebug): - lPointer.append({"iToken": i, "dNode": dNode}) + lPointer.extend(self._getNextPointers(dToken, dGraph, iToken, dGraph[0], bDebug)) # check if there is rules to check for each pointer for dPointer in lPointer: #if bDebug: # print("+", dPointer) if "" in dPointer["dNode"]: - bChange = self._executeActions(dGraph, dPointer["dNode"][""], dPointer["iToken"]-1, i, dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext) + bChange = self._executeActions(dGraph, dPointer["dNode"][""], dPointer["iNode1"]-1, iToken, dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext) if bChange: bTagAndRewrite = True if bTagAndRewrite: self.rewrite(bDebug) if bDebug: