Grammalecte  Check-in [88118a8b73]

Overview
Comment:[core] better debugging mode
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: 88118a8b73ef2058b19a50b42620a332374d54e2909044cb591192102231d119
User & Date: olr on 2018-06-14 12:09:52
Other Links: branch diff | manifest | tags
Context
2018-06-14
12:31
[core] better debugging mode check-in: 51affbc3d4 user: olr tags: core, rg
12:09
[core] better debugging mode check-in: 88118a8b73 user: olr tags: core, rg
12:09
[fr] conversion: regex rules -> graph rules check-in: 1d33e72e1b user: olr tags: fr, rg
Changes

Modified gc_core/py/lang_core/gc_engine.py from [8eb4241441] to [9a771ad130].

162
163
164
165
166
167
168
169

170
171
172
173
174
175
176
162
163
164
165
166
167
168

169
170
171
172
173
174
175
176







-
+







        if sOption == "@@@@":
            # graph rules
            if not bParagraph and bSentenceChange:
                oSentence.update(s)
                bSentenceChange = False
            for sGraphName, sLineId in lRuleGroup:
                if bDebug:
                    print(sGraphName, sLineId)
                    print("\n>>>> GRAPH:", sGraphName, sLineId)
                bParagraphChange, errs = oSentence.parse(dAllGraph[sGraphName], dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext)
                dErrs.update(errs)
                if bParagraphChange:
                    s = oSentence.rewrite()
                    if bDebug:
                        print("~", oSentence.sSentence)
        elif not sOption or dOptions.get(sOption, False):
596
597
598
599
600
601
602
603

604
605
606

607

608
609
610
611
612
613
614
615
616

617

618
619
620
621
622

623

624
625
626
627
628
629
630


631
632
633
634
635
636
637


638
639
640
641
642


643
644
645
646
647
648
649
650
651


652
653
654
655

656
657
658
659

660
661
662


663
664
665
666
667
668
669
596
597
598
599
600
601
602

603
604
605
606
607

608
609
610
611
612
613
614
615
616
617
618

619
620
621
622
623
624
625

626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665

666
667
668
669

670
671
672
673
674
675
676
677
678
679
680
681
682







-
+



+
-
+









+
-
+





+
-
+







+
+







+
+





+
+









+
+



-
+



-
+



+
+







        self.dTokenPos = { dToken["nStart"]: dToken  for dToken in self.lToken }
        self.createError = self._createWriterError  if _bWriterError  else self._createDictError

    def update (self, sSentence):
        self.sSentence = sSentence
        self.lToken = list(_oTokenizer.genTokens(sSentence, True))

    def _getNextMatchingNodes (self, dToken, dGraph, dNode):
    def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False):
        "generator: return nodes where <dToken> “values” match <dNode> arcs"
        # token value
        if dToken["sValue"] in dNode:
            if bDebug:
            #print("value found: ", dToken["sValue"])
                print("value found: ", dToken["sValue"])
            yield dGraph[dNode[dToken["sValue"]]]
        # token lemmas
        if "<lemmas>" in dNode:
            for sLemma in _oSpellChecker.getLemma(dToken["sValue"]):
                if sLemma in dNode["<lemmas>"]:
                    #print("lemma found: ", sLemma)
                    yield dGraph[dNode["<lemmas>"][sLemma]]
        # universal arc
        if "*" in dNode:
            if bDebug:
            #print("generic arc")
                print("generic arc")
            yield dGraph[dNode["*"]]
        # regex value arcs
        if "<re_value>" in dNode:
            for sRegex in dNode["<re_value>"]:
                if re.search(sRegex, dToken["sValue"]):
                    if bDebug:
                    #print("value regex matching: ", sRegex)
                        print("value regex matching: ", sRegex)
                    yield dGraph[dNode["<re_value>"][sRegex]]
        # regex morph arcs
        if "<re_morph>" in dNode:
            for sRegex in dNode["<re_morph>"]:
                if "¬" not in sRegex:
                    # no anti-pattern
                    if any(re.search(sRegex, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                        if bDebug:
                            print("morph regex matching: ", sRegex)
                        yield dGraph[dNode["<re_morph>"][sRegex]]
                else:
                    # there is an anti-pattern
                    sPattern, sNegPattern = sRegex.split("¬", 1)
                    if sNegPattern == "*":
                        # all morphologies must match with <sPattern>
                        if all(re.search(sPattern, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                            if bDebug:
                                print("morph regex matching: ", sRegex)
                            yield dGraph[dNode["<re_morph>"][sRegex]]
                    else:
                        if sNegPattern and any(re.search(sNegPattern, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                            continue
                        if any(re.search(sPattern, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                            if bDebug:
                                print("morph regex matching: ", sRegex)
                            yield dGraph[dNode["<re_morph>"][sRegex]]

    def parse (self, dGraph, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False):
        dErr = {}
        dPriority = {}  # Key = position; value = priority
        dOpt = _dOptions  if not dOptions  else dOptions
        lPointer = []
        bChange = False
        for dToken in self.lToken:
            if bDebug:
                print("=", dToken["sValue"])
            # check arcs for each existing pointer
            lNextPointer = []
            for dPointer in lPointer:
                for dNode in self._getNextMatchingNodes(dToken, dGraph, dPointer["dNode"]):
                for dNode in self._getNextMatchingNodes(dToken, dGraph, dPointer["dNode"], bDebug):
                    lNextPointer.append({"iToken": dPointer["iToken"], "dNode": dNode})
            lPointer = lNextPointer
            # check arcs of first nodes
            for dNode in self._getNextMatchingNodes(dToken, dGraph, dGraph[0]):
            for dNode in self._getNextMatchingNodes(dToken, dGraph, dGraph[0], bDebug):
                lPointer.append({"iToken": dToken["i"], "dNode": dNode})
            # check if there is rules to check for each pointer
            for dPointer in lPointer:
                if bDebug:
                    print("+", dPointer)
                if "<rules>" in dPointer["dNode"]:
                    bHasChanged, errs = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iToken"]-1, dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext)
                    dErr.update(errs)
                    if bHasChanged:
                        bChange = True
        return (bChange, dErr)