Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -629,20 +629,32 @@ if bDebug: print(" MATCH: >" + sLemma) yield dGraph[dNode[""][sLemma]] # universal arc if "*" in dNode: - if bDebug: - print(" MATCH: *") - yield dGraph[dNode["*"]] + if dToken["sType"] != "PUNC": + if bDebug: + print(" MATCH: *") + yield dGraph[dNode["*"]] # regex value arcs if "" in dNode: for sRegex in dNode[""]: - if re.search(sRegex, dToken["sValue"]): - if bDebug: - print(" MATCH: ~" + sRegex) - yield dGraph[dNode[""][sRegex]] + if "¬" not in sRegex: + # no anti-pattern + if re.search(sRegex, dToken["sValue"]): + if bDebug: + print(" MATCH: ~" + sRegex) + yield dGraph[dNode[""][sRegex]] + else: + # there is an anti-pattern + sPattern, sNegPattern = sRegex.split("¬", 1) + if sNegPattern and re.search(sNegPattern, dToken["sValue"]): + continue + if re.search(sPattern, dToken["sValue"]): + if bDebug: + print(" MATCH: ~" + sRegex) + yield dGraph[dNode[""][sRegex]] # regex morph arcs if "" in dNode: for sRegex in dNode[""]: if "¬" not in sRegex: # no anti-pattern @@ -653,11 +665,11 @@ else: # there is an anti-pattern sPattern, sNegPattern = sRegex.split("¬", 1) if sNegPattern == "*": # all morphologies must match with - if all(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): + if sPattern and all(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): if bDebug: print(" MATCH: @" + sRegex) yield dGraph[dNode[""][sRegex]] else: if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): @@ -950,19 +962,19 @@ # search sPattern zPattern = re.compile(sPattern) return any(zPattern.search(sMorph) for sMorph in lMorph) -def g_tag_before (dToken, sTag, dTags): +def g_tag_before (dToken, dTags, sTag): if sTag not in dTags: return False if dToken["nStart"] > dTags[sTag][0]: return True return False -def g_tag_after (dToken, sTag, dTags): +def g_tag_after (dToken, dTags, sTag): if sTag not in dTags: return False if dToken["nStart"] < dTags[sTag][1]: return True return False