Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -603,17 +603,10 @@ sValue = dToken["sValue"].capitalize() if sValue in dNode: if bDebug: print(" MATCH:", sValue) yield dGraph[dNode[sValue]] - # token lemmas - if "" in dNode: - for sLemma in _oSpellChecker.getLemma(dToken["sValue"]): - if sLemma in dNode[""]: - if bDebug: - print(" MATCH: >" + sLemma) - yield dGraph[dNode[""][sLemma]] # regex value arcs if "" in dNode: for sRegex in dNode[""]: if "¬" not in sRegex: # no anti-pattern @@ -628,35 +621,44 @@ continue if not sPattern or re.search(sPattern, dToken["sValue"]): if bDebug: print(" MATCH: ~" + sRegex) yield dGraph[dNode[""][sRegex]] - # regex morph arcs - if "" in dNode: - for sRegex in dNode[""]: - if "¬" not in sRegex: - # no anti-pattern - if any(re.search(sRegex, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): + # analysable tokens + if dToken["sType"][0:4] == "WORD": + # token lemmas + if "" in dNode: + for sLemma in _oSpellChecker.getLemma(dToken["sValue"]): + if sLemma in dNode[""]: if bDebug: - print(" MATCH: @" + sRegex) - yield dGraph[dNode[""][sRegex]] - else: - # there is an anti-pattern - sPattern, sNegPattern = sRegex.split("¬", 1) - if sNegPattern == "*": - # all morphologies must match with - if sPattern and all(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): + print(" MATCH: >" + sLemma) + yield dGraph[dNode[""][sLemma]] + # regex morph arcs + if "" in dNode: + for sRegex in dNode[""]: + if "¬" not in sRegex: + # no anti-pattern + if any(re.search(sRegex, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): if bDebug: print(" MATCH: @" + sRegex) yield dGraph[dNode[""][sRegex]] else: - if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): - continue - if not sPattern or any(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): - if bDebug: - print(" MATCH: @" + sRegex) - yield dGraph[dNode[""][sRegex]] + # there is an anti-pattern + sPattern, sNegPattern = sRegex.split("¬", 1) + if sNegPattern == "*": + # all morphologies must match with + if sPattern and all(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): + if bDebug: + print(" MATCH: @" + sRegex) + yield dGraph[dNode[""][sRegex]] + else: + if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): + continue + if not sPattern or any(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): + if bDebug: + print(" MATCH: @" + sRegex) + yield dGraph[dNode[""][sRegex]] # meta arc (for token type) if "" in dNode: for sMeta in dNode[""]: # not regex here, we just search if exists within if sMeta == "*":