Overview
Comment: | [core] gc engine: anti-pattern for value regex + restriction for universal arc |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | core | rg |
Files: | files | file ages | folders |
SHA3-256: |
da276e63689e987543e988248fd635c1 |
User & Date: | olr on 2018-06-22 13:26:05 |
Other Links: | branch diff | manifest | tags |
Context
2018-06-22
| ||
13:27 | [build][bugs] fix functions rewriting + [fr] conversion: regex rules -> graph rules check-in: 521ae6bdfb user: olr tags: fr, build, rg | |
13:26 | [core] gc engine: anti-pattern for value regex + restriction for universal arc check-in: da276e6368 user: olr tags: core, rg | |
12:51 | [build][core] look before, look after (fix spaces) check-in: cdcc60d8eb user: olr tags: core, build, rg | |
Changes
Modified gc_core/py/lang_core/gc_engine.py from [16275f0b5d] to [3b5bf65d7f].
︙ | ︙ | |||
627 628 629 630 631 632 633 | for sLemma in _oSpellChecker.getLemma(dToken["sValue"]): if sLemma in dNode["<lemmas>"]: if bDebug: print(" MATCH: >" + sLemma) yield dGraph[dNode["<lemmas>"][sLemma]] # universal arc if "*" in dNode: | > | | | > > | | | | > > > > > > > > > | | 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 | for sLemma in _oSpellChecker.getLemma(dToken["sValue"]): if sLemma in dNode["<lemmas>"]: if bDebug: print(" MATCH: >" + sLemma) yield dGraph[dNode["<lemmas>"][sLemma]] # universal arc if "*" in dNode: if dToken["sType"] != "PUNC": if bDebug: print(" MATCH: *") yield dGraph[dNode["*"]] # regex value arcs if "<re_value>" in dNode: for sRegex in dNode["<re_value>"]: if "¬" not in sRegex: # no anti-pattern if re.search(sRegex, dToken["sValue"]): if bDebug: print(" MATCH: ~" + sRegex) yield dGraph[dNode["<re_value>"][sRegex]] else: # there is an anti-pattern sPattern, sNegPattern = sRegex.split("¬", 1) if sNegPattern and re.search(sNegPattern, dToken["sValue"]): continue if re.search(sPattern, dToken["sValue"]): if bDebug: print(" MATCH: ~" + sRegex) yield dGraph[dNode["<re_value>"][sRegex]] # regex morph arcs if "<re_morph>" in dNode: for sRegex in dNode["<re_morph>"]: if "¬" not in sRegex: # no anti-pattern if any(re.search(sRegex, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): if bDebug: print(" MATCH: @" + sRegex) yield dGraph[dNode["<re_morph>"][sRegex]] else: # there is an anti-pattern sPattern, sNegPattern = sRegex.split("¬", 1) if sNegPattern == "*": # all morphologies must match with <sPattern> if sPattern and all(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): if bDebug: print(" MATCH: @" + sRegex) yield dGraph[dNode["<re_morph>"][sRegex]] else: if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): continue if any(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): |
︙ | ︙ | |||
948 949 950 951 952 953 954 | if any(zNegPattern.search(sMorph) for sMorph in lMorph): return False # search sPattern zPattern = re.compile(sPattern) return any(zPattern.search(sMorph) for sMorph in lMorph) | | | | 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 | if any(zNegPattern.search(sMorph) for sMorph in lMorph): return False # search sPattern zPattern = re.compile(sPattern) return any(zPattern.search(sMorph) for sMorph in lMorph) def g_tag_before (dToken, dTags, sTag): if sTag not in dTags: return False if dToken["nStart"] > dTags[sTag][0]: return True return False def g_tag_after (dToken, dTags, sTag): if sTag not in dTags: return False if dToken["nStart"] < dTags[sTag][1]: return True return False |
︙ | ︙ |