Overview
Comment: | [core] gc engine: use stored morphologies in tokens if they exist |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | core | rg |
Files: | files | file ages | folders |
SHA3-256: |
eecbc6012a8ab29bd5ccc6039f93db80 |
User & Date: | olr on 2018-07-26 08:31:29 |
Other Links: | branch diff | manifest | tags |
Context
2018-07-26
| ||
11:51 | [fr] conversion: regex rules -> graph rules check-in: 663cc65183 user: olr tags: fr, rg | |
08:31 | [core] gc engine: use stored morphologies in tokens if they exist check-in: eecbc6012a user: olr tags: core, rg | |
08:13 | [fr] conversion: regex rules -> graph rules check-in: 59eec1ef7b user: olr tags: fr, rg | |
Changes
Modified gc_core/py/lang_core/gc_engine.py from [3ecd1c5c57] to [d9f8c3ac90].
︙ | ︙ | |||
657 658 659 660 661 662 663 | print(" MATCH: >" + sLemma) yield dGraph[dNode["<lemmas>"][sLemma]] # regex morph arcs if "<re_morph>" in dNode: for sRegex in dNode["<re_morph>"]: if "¬" not in sRegex: # no anti-pattern | > | | > | | | 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 | print(" MATCH: >" + sLemma) yield dGraph[dNode["<lemmas>"][sLemma]] # regex morph arcs if "<re_morph>" in dNode: for sRegex in dNode["<re_morph>"]: if "¬" not in sRegex: # no anti-pattern lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"])) if any(re.search(sRegex, sMorph) for sMorph in lMorph): if bDebug: print(" MATCH: @" + sRegex) yield dGraph[dNode["<re_morph>"][sRegex]] else: # there is an anti-pattern sPattern, sNegPattern = sRegex.split("¬", 1) if sNegPattern == "*": # all morphologies must match with <sPattern> if sPattern: lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"])) if lMorph and all(re.search(sPattern, sMorph) for sMorph in lMorph): if bDebug: print(" MATCH: @" + sRegex) yield dGraph[dNode["<re_morph>"][sRegex]] else: lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"])) if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in lMorph): continue if not sPattern or any(re.search(sPattern, sMorph) for sMorph in lMorph): if bDebug: print(" MATCH: @" + sRegex) yield dGraph[dNode["<re_morph>"][sRegex]] # token tags if "tags" in dToken and "<tags>" in dNode: for sTag in dToken["tags"]: if sTag in dNode["<tags>"]: |
︙ | ︙ | |||
1013 1014 1015 1016 1017 1018 1019 | else: if nLeft is not None: lMorph = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)]) if bMemorizeMorph: dToken["lMorph"] = lMorph else: lMorph = _oSpellChecker.getMorph(dToken["sValue"]) | | | | 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 | else: if nLeft is not None: lMorph = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)]) if bMemorizeMorph: dToken["lMorph"] = lMorph else: lMorph = _oSpellChecker.getMorph(dToken["sValue"]) if not lMorph: return False # check negative condition if sNegPattern: if sNegPattern == "*": # all morph must match sPattern if not lMorph: return False zPattern = re.compile(sPattern) |
︙ | ︙ |