627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
|
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
|
+
-
-
-
+
+
+
+
+
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
|
for sLemma in _oSpellChecker.getLemma(dToken["sValue"]):
if sLemma in dNode["<lemmas>"]:
if bDebug:
print(" MATCH: >" + sLemma)
yield dGraph[dNode["<lemmas>"][sLemma]]
# universal arc
if "*" in dNode:
if dToken["sType"] != "PUNC":
if bDebug:
print(" MATCH: *")
yield dGraph[dNode["*"]]
if bDebug:
print(" MATCH: *")
yield dGraph[dNode["*"]]
# regex value arcs
if "<re_value>" in dNode:
for sRegex in dNode["<re_value>"]:
if "¬" not in sRegex:
# no anti-pattern
if re.search(sRegex, dToken["sValue"]):
if bDebug:
print(" MATCH: ~" + sRegex)
yield dGraph[dNode["<re_value>"][sRegex]]
if re.search(sRegex, dToken["sValue"]):
if bDebug:
print(" MATCH: ~" + sRegex)
yield dGraph[dNode["<re_value>"][sRegex]]
else:
# there is an anti-pattern
sPattern, sNegPattern = sRegex.split("¬", 1)
if sNegPattern and re.search(sNegPattern, dToken["sValue"]):
continue
if re.search(sPattern, dToken["sValue"]):
if bDebug:
print(" MATCH: ~" + sRegex)
yield dGraph[dNode["<re_value>"][sRegex]]
# regex morph arcs
if "<re_morph>" in dNode:
for sRegex in dNode["<re_morph>"]:
if "¬" not in sRegex:
# no anti-pattern
if any(re.search(sRegex, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
if bDebug:
print(" MATCH: @" + sRegex)
yield dGraph[dNode["<re_morph>"][sRegex]]
else:
# there is an anti-pattern
sPattern, sNegPattern = sRegex.split("¬", 1)
if sNegPattern == "*":
# all morphologies must match with <sPattern>
if all(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
if sPattern and all(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
if bDebug:
print(" MATCH: @" + sRegex)
yield dGraph[dNode["<re_morph>"][sRegex]]
else:
if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
continue
if any(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
|
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
|
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
|
-
+
-
+
|
if any(zNegPattern.search(sMorph) for sMorph in lMorph):
return False
# search sPattern
zPattern = re.compile(sPattern)
return any(zPattern.search(sMorph) for sMorph in lMorph)
def g_tag_before (dToken, sTag, dTags):
def g_tag_before (dToken, dTags, sTag):
if sTag not in dTags:
return False
if dToken["nStart"] > dTags[sTag][0]:
return True
return False
def g_tag_after (dToken, sTag, dTags):
def g_tag_after (dToken, dTags, sTag):
if sTag not in dTags:
return False
if dToken["nStart"] < dTags[sTag][1]:
return True
return False
|