Grammalecte  Check-in [28025b7ef5]

Overview
Comment:[core][bug] gc engine: use ad hoc token index
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: 28025b7ef5aea0cf70ab7cf9e3f492e4afc67cefff541948fd5b81f61fb2a0bc
User & Date: olr on 2018-07-24 22:14:18
Other Links: branch diff | manifest | tags
Context
2018-07-25
15:04
[build] get token outside secure scope check-in: 245941cc07 user: olr tags: build, rg
2018-07-24
22:14
[core][bug] gc engine: use ad hoc token index check-in: 28025b7ef5 user: olr tags: core, rg
22:13
[fr] conversion: regex rules -> graph rules check-in: 2b52a5e3d7 user: olr tags: fr, rg
Changes

Modified gc_core/py/lang_core/gc_engine.py from [903e91d939] to [a408aa043b].

709
710
711
712
713
714
715
716

717
718
719
720
721
722
723
724
725
726
727

728
729
730
731
732
733

734
735
736
737
738
739
740
709
710
711
712
713
714
715

716
717
718
719
720
721
722
723
724
725
726

727
728
729
730
731
732

733
734
735
736
737
738
739
740







-
+










-
+





-
+







                    yield dGraph[dNode["<meta>"][sMeta]]

    def parse (self, dGraph, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False):
        "parse tokens from the text and execute actions encountered"
        dOpt = _dOptions  if not dOptions  else dOptions
        lPointer = []
        bTagAndRewrite = False
        for dToken in self.lToken:
        for i, dToken in enumerate(self.lToken):
            if bDebug:
                print("TOKEN:", dToken["sValue"])
            # check arcs for each existing pointer
            lNextPointer = []
            for dPointer in lPointer:
                for dNode in self._getNextMatchingNodes(dToken, dGraph, dPointer["dNode"], bDebug):
                    lNextPointer.append({"iToken": dPointer["iToken"], "dNode": dNode})
            lPointer = lNextPointer
            # check arcs of first nodes
            for dNode in self._getNextMatchingNodes(dToken, dGraph, dGraph[0], bDebug):
                lPointer.append({"iToken": dToken["i"], "dNode": dNode})
                lPointer.append({"iToken": i, "dNode": dNode})
            # check if there is rules to check for each pointer
            for dPointer in lPointer:
                #if bDebug:
                #    print("+", dPointer)
                if "<rules>" in dPointer["dNode"]:
                    bChange = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iToken"]-1, dToken["i"], dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext)
                    bChange = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iToken"]-1, i, dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext)
                    if bChange:
                        bTagAndRewrite = True
        if bTagAndRewrite:
            self.rewrite(bDebug)
        if bDebug:
            print(self)
        return (bTagAndRewrite, self.sSentence)
930
931
932
933
934
935
936
937

938
939
940
941
942
943
944
945
946

947
948
949
950
951
952
953

954
955
956
957
958
959
960
930
931
932
933
934
935
936

937
938
939
940
941
942
943
944
945

946
947
948
949
950
951
952

953
954
955
956
957
958
959
960







-
+








-
+






-
+







    def rewrite (self, bDebug=False):
        "rewrite the sentence, modify tokens, purge the token list"
        if bDebug:
            print("REWRITE")
        lNewToken = []
        nMergeUntil = 0
        dTokenMerger = None
        for dToken in self.lToken:
        for iToken, dToken in enumerate(self.lToken):
            bKeepToken = True
            if dToken["sType"] != "INFO":
                if "bImmune" in dToken:
                    nErrorStart = self.nOffsetWithinParagraph + dToken["nStart"]
                    if nErrorStart in self.dError:
                        if bDebug:
                            print("immunity -> error removed:", self.dError[nErrorStart])
                        del self.dError[nErrorStart]
                if nMergeUntil and dToken["i"] <= nMergeUntil:
                if nMergeUntil and iToken <= nMergeUntil:
                    dTokenMerger["sValue"] += " " * (dToken["nStart"] - dTokenMerger["nEnd"]) + dToken["sValue"]
                    dTokenMerger["nEnd"] = dToken["nEnd"]
                    if bDebug:
                        print("  MERGED TOKEN:", dTokenMerger["sValue"])
                    bKeepToken = False
                if "nMergeUntil" in dToken:
                    if dToken["i"] > nMergeUntil: # this token is not already merged with a previous token
                    if iToken > nMergeUntil: # this token is not already merged with a previous token
                        dTokenMerger = dToken
                    if dToken["nMergeUntil"] > nMergeUntil:
                        nMergeUntil = dToken["nMergeUntil"]
                    del dToken["nMergeUntil"]
                elif "bToRemove" in dToken:
                    if bDebug:
                        print("  REMOVED:", dToken["sValue"])
977
978
979
980
981
982
983
984

985
986
987
988
989
990
991
977
978
979
980
981
982
983

984
985
986
987
988
989
990
991







-
+







                try:
                    del self.dTokenPos[dToken["nStart"]]
                except:
                    print(self)
                    print(dToken)
                    exit()
        if bDebug:
            print("  REWRITED:", self.sSentence)
            print("  TEXT REWRITED:", self.sSentence)
        self.lToken.clear()
        self.lToken = lNewToken



#### Analyse tokens