Grammalecte  Check-in [eecbc6012a]

Overview
Comment:[core] gc engine: use stored morphologies in tokens if they exist
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: eecbc6012a8ab29bd5ccc6039f93db809ffc08c46bf23080104b6603cd9fba42
User & Date: olr on 2018-07-26 08:31:29
Other Links: branch diff | manifest | tags
Context
2018-07-26
11:51
[fr] conversion: regex rules -> graph rules check-in: 663cc65183 user: olr tags: fr, rg
08:31
[core] gc engine: use stored morphologies in tokens if they exist check-in: eecbc6012a user: olr tags: core, rg
08:13
[fr] conversion: regex rules -> graph rules check-in: 59eec1ef7b user: olr tags: fr, rg
Changes

Modified gc_core/py/lang_core/gc_engine.py from [3ecd1c5c57] to [d9f8c3ac90].

657
658
659
660
661
662
663

664

665
666
667
668
669
670
671
672
673
674

675
676
677
678
679

680

681
682

683
684
685
686
687
688
689
657
658
659
660
661
662
663
664

665
666
667
668
669
670
671
672
673
674

675
676
677
678
679
680
681

682
683

684
685
686
687
688
689
690
691







+
-
+









-
+





+
-
+

-
+







                            print("  MATCH: >" + sLemma)
                        yield dGraph[dNode["<lemmas>"][sLemma]]
            # regex morph arcs
            if "<re_morph>" in dNode:
                for sRegex in dNode["<re_morph>"]:
                    if "¬" not in sRegex:
                        # no anti-pattern
                        lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"]))
                        if any(re.search(sRegex, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                        if any(re.search(sRegex, sMorph)  for sMorph in lMorph):
                            if bDebug:
                                print("  MATCH: @" + sRegex)
                            yield dGraph[dNode["<re_morph>"][sRegex]]
                    else:
                        # there is an anti-pattern
                        sPattern, sNegPattern = sRegex.split("¬", 1)
                        if sNegPattern == "*":
                            # all morphologies must match with <sPattern>
                            if sPattern:
                                lMorph = _oSpellChecker.getMorph(dToken["sValue"])
                                lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"]))
                                if lMorph and all(re.search(sPattern, sMorph)  for sMorph in lMorph):
                                    if bDebug:
                                        print("  MATCH: @" + sRegex)
                                    yield dGraph[dNode["<re_morph>"][sRegex]]
                        else:
                            lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"]))
                            if sNegPattern and any(re.search(sNegPattern, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                            if sNegPattern and any(re.search(sNegPattern, sMorph)  for sMorph in lMorph):
                                continue
                            if not sPattern or any(re.search(sPattern, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                            if not sPattern or any(re.search(sPattern, sMorph)  for sMorph in lMorph):
                                if bDebug:
                                    print("  MATCH: @" + sRegex)
                                yield dGraph[dNode["<re_morph>"][sRegex]]
        # token tags
        if "tags" in dToken and "<tags>" in dNode:
            for sTag in dToken["tags"]:
                if sTag in dNode["<tags>"]:
1013
1014
1015
1016
1017
1018
1019
1020
1021


1022
1023
1024
1025
1026
1027
1028
1015
1016
1017
1018
1019
1020
1021


1022
1023
1024
1025
1026
1027
1028
1029
1030







-
-
+
+







    else:
        if nLeft is not None:
            lMorph = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)])
            if bMemorizeMorph:
                dToken["lMorph"] = lMorph
        else:
            lMorph = _oSpellChecker.getMorph(dToken["sValue"])
        if not lMorph:
            return False
    if not lMorph:
        return False
    # check negative condition
    if sNegPattern:
        if sNegPattern == "*":
            # all morph must match sPattern
            if not lMorph:
                return False
            zPattern = re.compile(sPattern)