Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -603,61 +603,69 @@ self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken if dToken["sType"] != "INFO" } if bDebug: print("UPDATE:") print(self) - def _getNextMatchingNodes (self, dToken, dGraph, iNode1, dNode, bDebug=False): + def _getNextPointers (self, dToken, dGraph, iNode1, dNode, bKeep=False, bDebug=False): "generator: return nodes where “values” match arcs" + bTokenFound = False # token value if dToken["sValue"] in dNode: if bDebug: print(" MATCH:", dToken["sValue"]) yield { "iNode1": iNode1, "dNode": dGraph[dNode[dToken["sValue"]]] } + bTokenFound = True if dToken["sValue"][0:2].istitle(): # we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". sValue = dToken["sValue"].lower() if sValue in dNode: if bDebug: print(" MATCH:", sValue) yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] } + bTokenFound = True elif dToken["sValue"].isupper(): sValue = dToken["sValue"].lower() if sValue in dNode: if bDebug: print(" MATCH:", sValue) yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] } + bTokenFound = True sValue = dToken["sValue"].capitalize() if sValue in dNode: if bDebug: print(" MATCH:", sValue) yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] } + bTokenFound = True # regex value arcs if "" in dNode: for sRegex in dNode[""]: if "¬" not in sRegex: # no anti-pattern if re.search(sRegex, dToken["sValue"]): if bDebug: print(" MATCH: ~" + sRegex) yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] } + bTokenFound = True else: # there is an anti-pattern sPattern, sNegPattern = sRegex.split("¬", 1) if sNegPattern and re.search(sNegPattern, dToken["sValue"]): continue if not sPattern or re.search(sPattern, dToken["sValue"]): if bDebug: print(" MATCH: ~" + sRegex) yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] } + bTokenFound = True # analysable tokens if dToken["sType"][0:4] == "WORD": # token lemmas if "" in dNode: for sLemma in _oSpellChecker.getLemma(dToken["sValue"]): if sLemma in dNode[""]: if bDebug: print(" MATCH: >" + sLemma) yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sLemma]] } + bTokenFound = True # regex morph arcs if "" in dNode: for sRegex in dNode[""]: if "¬" not in sRegex: # no anti-pattern @@ -664,10 +672,11 @@ lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"])) if any(re.search(sRegex, sMorph) for sMorph in lMorph): if bDebug: print(" MATCH: @" + sRegex) yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] } + bTokenFound = True else: # there is an anti-pattern sPattern, sNegPattern = sRegex.split("¬", 1) if sNegPattern == "*": # all morphologies must match with @@ -675,42 +684,55 @@ lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"])) if lMorph and all(re.search(sPattern, sMorph) for sMorph in lMorph): if bDebug: print(" MATCH: @" + sRegex) yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] } + bTokenFound = True else: lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"])) if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in lMorph): continue if not sPattern or any(re.search(sPattern, sMorph) for sMorph in lMorph): if bDebug: print(" MATCH: @" + sRegex) yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] } + bTokenFound = True # token tags if "tags" in dToken and "" in dNode: for sTag in dToken["tags"]: if sTag in dNode[""]: if bDebug: print(" MATCH: /" + sTag) yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sTag]] } + bTokenFound = True # meta arc (for token type) if "" in dNode: for sMeta in dNode[""]: # no regex here, we just search if exists within if sMeta == "*": if bDebug: print(" MATCH: *" + sMeta) yield { "iNode1": iNode1, "dNode": dGraph[dNode[""]["*"]] } + bTokenFound = True elif "¬" in sMeta: if dToken["sType"] not in sMeta: if bDebug: print(" MATCH: *" + sMeta) yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sMeta]] } + bTokenFound = True elif dToken["sType"] in sMeta: if bDebug: print(" MATCH: *" + sMeta) yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sMeta]] } + bTokenFound = True + if bKeep and not bTokenFound: + yield { "iNode1": iNode1, "dNode": dNode, "bKeep": True } + # JUMP + # Warning! Recurssion! + if "<>" in dNode: + yield from self._getNextPointers(self, dToken, dGraph, iNode1, dGraph[dNode["<>"]], True, bDebug) + def parse (self, dGraph, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False): "parse tokens from the text and execute actions encountered" dOpt = _dOptions if not dOptions else dOptions lPointer = [] @@ -719,14 +741,14 @@ if bDebug: print("TOKEN:", dToken["sValue"]) # check arcs for each existing pointer lNextPointer = [] for dPointer in lPointer: - lNextPointer.extend(self._getNextPointers(dToken, dGraph, dPointer["iNode1"], dPointer["dNode"], bDebug)) + lNextPointer.extend(self._getNextPointers(dToken, dGraph, dPointer["iNode1"], dPointer["dNode"], dPointer.get("bKeep", False), bDebug)) lPointer = lNextPointer # check arcs of first nodes - lPointer.extend(self._getNextPointers(dToken, dGraph, iToken, dGraph[0], bDebug)) + lPointer.extend(self._getNextPointers(dToken, dGraph, iToken, dGraph[0], False, bDebug)) # check if there is rules to check for each pointer for dPointer in lPointer: #if bDebug: # print("+", dPointer) if "" in dPointer["dNode"]: Index: gc_lang/fr/rules.grx ================================================================== --- gc_lang/fr/rules.grx +++ gc_lang/fr/rules.grx @@ -11121,10 +11121,11 @@ [>desseller|>desceller] [l’|@:[BD]] [>acidité|>activité|>allergie|>anévrisme|>anomalie|>arnaque|>appendicite|>atrophie|>baisse|>bébé|>blessure|>bug|>bogue|>carie|>cancer|>cause|>changement|>complot|>comète|>concentration|>corrélation|>croissance|>défaut|>défaillance|>demande|>dépression|>diabète|>différence|>diminution|>effluve|>épilepsie|>erreur|>essai|>existence|>grossesse|>grosseur|>faille|>faute|>fuite|>fraude|>grippe|>handicap|>hausse|>hémorragie|>hostilité|>hypertrophie|>incompatibilité|>incohérence|>infection|>infraction|>indice|>infidélité|>insuffisance|>intrigue|>irrégularité|>leucémie|>lésion|>lueur|>lumière|>maladie|>malformation|>manœuvre|>manipulation|>molécule|>mensonge|>mutation|>once|>perturbation|>personnalité|>piste|>perte|>planète|>exoplanète|>présence|>qualité|>odeur|>opportunité|>otite|>problème|>surdité|>talent|>tendance|>tentative|>tumeur|>utilisation|hoax|>variation|>vie|virus] <<- /conf/ -1>> =\1.replace("escell", "écel").replace("essell", "écel") # Confusion probable si ce mot se rapporte à “\3”. Desceller signifie briser un sceau, un cachet… Desseller signifie ôter une selle.|http://fr.wiktionary.org/wiki/déceler +TEST: il faut régler les problèmes {{descellés}} dans le code de l’analyseur syntaxique. TEST: il y a une erreur qu’on peut {{desceller}} dans ses analyses. TEST: elle a {{dessellé}} une forte hostilité dans ses propos. TEST: elle a {{dessellé}} deux problèmes.