Grammalecte  Check-in [c051cc6ca9]

Overview
Comment:[core] merge tokens
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: c051cc6ca91c412eac591e533667687faae63a1ec1daea2fcfda56feeff2c721
User & Date: olr on 2018-06-23 16:28:37
Other Links: branch diff | manifest | tags
Context
2018-06-24
06:28
[core][bug] fix tokens merging check-in: 59d8df1fa2 user: olr tags: core, rg
2018-06-23
16:28
[core] merge tokens check-in: c051cc6ca9 user: olr tags: core, rg
13:05
[core][fr] immunity rules to prevent false positives check-in: 2cc4bc018d user: olr tags: fr, core, rg
Changes

Modified gc_core/py/lang_core/gc_engine.py from [51362c8a05] to [23dabd0b59].

879
880
881
882
883
884
885



886
887
888
889
890
891
892
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895







+
+
+







        if sWhat == "*":
            # purge text
            if nTokenRewriteEnd - nTokenRewriteStart == 0:
                self.lToken[nTokenRewriteStart]["bToRemove"] = True
            else:
                for i in range(nTokenRewriteStart, nTokenRewriteEnd+1):
                    self.lToken[i]["bToRemove"] = True
        elif sWhat == "_":
            # merge tokens
            self.lToken[nTokenRewriteStart]["nMergeUntil"] = nTokenRewriteEnd
        elif sWhat == "!":
            # immunity
            if nTokenRewriteEnd - nTokenRewriteStart == 0:
                self.lToken[nTokenRewriteStart]["bImmune"] = True
            else:
                for i in range(nTokenRewriteStart, nTokenRewriteEnd+1):
                    self.lToken[i]["bImmune"] = True
908
909
910
911
912
913
914


915


916
917
918


919












920

921
922
923
924
925



926
927
928
929
930
931
932
911
912
913
914
915
916
917
918
919

920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939

940
941
942
943
944

945
946
947
948
949
950
951
952
953
954







+
+
-
+
+



+
+

+
+
+
+
+
+
+
+
+
+
+
+
-
+




-
+
+
+







                    if bUppercase:
                        sValue = sValue[0:1].upper() + sValue[1:]
                    self.lToken[i]["sNewValue"] = sValue

    def rewrite (self, bDebug=False):
        "rewrite the sentence, modify tokens, purge the token list"
        lNewToken = []
        nMergeUntil = -1
        dTokenMerger = None
        for i, dToken in enumerate(self.lToken):
        for dToken in self.lToken:
            bKeepToken = True
            if "bImmune" in dToken:
                nErrorStart = self.nOffsetWithinParagraph + dToken["nStart"]
                if nErrorStart in self.dError:
                    if bDebug:
                        print("immunity -> error removed:", self.dError[nErrorStart])
                    del self.dError[nErrorStart]
            if dToken["i"] <= nMergeUntil:
                dTokenMerger["sValue"] += " " * (dToken["i"]["nStart"] - dTokenMerger["nEnd"]) + dToken["i"]["sValue"]
                dTokenMerger["nEnd"] = dToken["i"]["nEnd"]
                if bDebug:
                    print("Merged token:", dTokenMerger["sValue"])
                bKeepToken = False
            if "nMergeUntil" in dToken:
                if not nMergeUntil: # this token should alerady been merged with a previous token
                    dTokenMerger = dToken
                if dToken["nMergeUntil"] > nMergeUntil:
                    nMergeUntil = dToken["nMergeUntil"]  
                del dToken["nMergeUntil"]
            if "bToRemove" in dToken:
            elif "bToRemove" in dToken:
                # remove useless token
                self.sSentence = self.sSentence[:dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[dToken["nEnd"]:]
                if bDebug:
                    print("removed:", dToken["sValue"])
            else:
                bKeepToken = False
            #
            if bKeepToken:
                lNewToken.append(dToken)
                if "sNewValue" in dToken:
                    # rewrite token and sentence
                    if bDebug:
                        print(dToken["sValue"], "->", dToken["sNewValue"])
                    dToken["sRealValue"] = dToken["sValue"]
                    dToken["sValue"] = dToken["sNewValue"]