Grammalecte  Check-in [209474b757]

Overview
Comment:[core] gc engine: don’t check regexes on some META tokens
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: 209474b757fb0f0ee3d36754eed04e5712468c6144a869a135da660e7a3444ca
User & Date: olr on 2018-08-18 13:29:43
Other Links: branch diff | manifest | tags
Context
2018-08-18
20:27
[fr] conversion: regex rules -> graph rules check-in: 722c557610 user: olr tags: fr, rg
13:29
[core] gc engine: don’t check regexes on some META tokens check-in: 209474b757 user: olr tags: core, rg
13:07
[build] graph data display check-in: a7352c0d8f user: olr tags: build, rg
Changes

Modified gc_core/py/lang_core/gc_engine.py from [6415b59c9d] to [6ed05b1551].

635
636
637
638
639
640
641

642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
            sValue = dToken["sValue"].capitalize()
            if sValue in dNode:
                if bDebug:
                    print("  MATCH:", sValue)
                yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] }
                bTokenFound = True
        # regex value arcs

        if "<re_value>" in dNode:
            for sRegex in dNode["<re_value>"]:
                if "¬" not in sRegex:
                    # no anti-pattern
                    if re.search(sRegex, dToken["sValue"]):
                        if bDebug:
                            print("  MATCH: ~" + sRegex)
                        yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_value>"][sRegex]] }
                        bTokenFound = True
                else:
                    # there is an anti-pattern
                    sPattern, sNegPattern = sRegex.split("¬", 1)
                    if sNegPattern and re.search(sNegPattern, dToken["sValue"]):
                        continue
                    if not sPattern or re.search(sPattern, dToken["sValue"]):
                        if bDebug:
                            print("  MATCH: ~" + sRegex)
                        yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_value>"][sRegex]] }
                        bTokenFound = True
        # analysable tokens
        if dToken["sType"][0:4] == "WORD":
            # token lemmas
            if "<lemmas>" in dNode:
                for sLemma in _oSpellChecker.getLemma(dToken["sValue"]):
                    if sLemma in dNode["<lemmas>"]:
                        if bDebug:







>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
            sValue = dToken["sValue"].capitalize()
            if sValue in dNode:
                if bDebug:
                    print("  MATCH:", sValue)
                yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] }
                bTokenFound = True
        # regex value arcs
        if dToken["sType"] not in frozenset(["INFO", "PUNC", "SIGN"]):
            if "<re_value>" in dNode:
                for sRegex in dNode["<re_value>"]:
                    if "¬" not in sRegex:
                        # no anti-pattern
                        if re.search(sRegex, dToken["sValue"]):
                            if bDebug:
                                print("  MATCH: ~" + sRegex)
                            yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_value>"][sRegex]] }
                            bTokenFound = True
                    else:
                        # there is an anti-pattern
                        sPattern, sNegPattern = sRegex.split("¬", 1)
                        if sNegPattern and re.search(sNegPattern, dToken["sValue"]):
                            continue
                        if not sPattern or re.search(sPattern, dToken["sValue"]):
                            if bDebug:
                                print("  MATCH: ~" + sRegex)
                            yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_value>"][sRegex]] }
                            bTokenFound = True
        # analysable tokens
        if dToken["sType"][0:4] == "WORD":
            # token lemmas
            if "<lemmas>" in dNode:
                for sLemma in _oSpellChecker.getLemma(dToken["sValue"]):
                    if sLemma in dNode["<lemmas>"]:
                        if bDebug: