Grammalecte  Check-in [c051cc6ca9]

Overview
Comment:[core] merge tokens
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: c051cc6ca91c412eac591e533667687faae63a1ec1daea2fcfda56feeff2c721
User & Date: olr on 2018-06-23 16:28:37
Other Links: branch diff | manifest | tags
Context
2018-06-24
06:28
[core][bug] fix tokens merging check-in: 59d8df1fa2 user: olr tags: core, rg
2018-06-23
16:28
[core] merge tokens check-in: c051cc6ca9 user: olr tags: core, rg
13:05
[core][fr] immunity rules to prevent false positives check-in: 2cc4bc018d user: olr tags: fr, core, rg
Changes

Modified gc_core/py/lang_core/gc_engine.py from [51362c8a05] to [23dabd0b59].

879
880
881
882
883
884
885



886
887
888
889
890
891
892
        if sWhat == "*":
            # purge text
            if nTokenRewriteEnd - nTokenRewriteStart == 0:
                self.lToken[nTokenRewriteStart]["bToRemove"] = True
            else:
                for i in range(nTokenRewriteStart, nTokenRewriteEnd+1):
                    self.lToken[i]["bToRemove"] = True



        elif sWhat == "!":
            # immunity
            if nTokenRewriteEnd - nTokenRewriteStart == 0:
                self.lToken[nTokenRewriteStart]["bImmune"] = True
            else:
                for i in range(nTokenRewriteStart, nTokenRewriteEnd+1):
                    self.lToken[i]["bImmune"] = True







>
>
>







879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
        if sWhat == "*":
            # purge text
            if nTokenRewriteEnd - nTokenRewriteStart == 0:
                self.lToken[nTokenRewriteStart]["bToRemove"] = True
            else:
                for i in range(nTokenRewriteStart, nTokenRewriteEnd+1):
                    self.lToken[i]["bToRemove"] = True
        elif sWhat == "_":
            # merge tokens
            self.lToken[nTokenRewriteStart]["nMergeUntil"] = nTokenRewriteEnd
        elif sWhat == "!":
            # immunity
            if nTokenRewriteEnd - nTokenRewriteStart == 0:
                self.lToken[nTokenRewriteStart]["bImmune"] = True
            else:
                for i in range(nTokenRewriteStart, nTokenRewriteEnd+1):
                    self.lToken[i]["bImmune"] = True
908
909
910
911
912
913
914


915

916
917
918


919












920
921
922
923
924
925


926
927
928
929
930
931
932
                    if bUppercase:
                        sValue = sValue[0:1].upper() + sValue[1:]
                    self.lToken[i]["sNewValue"] = sValue

    def rewrite (self, bDebug=False):
        "rewrite the sentence, modify tokens, purge the token list"
        lNewToken = []


        for i, dToken in enumerate(self.lToken):

            if "bImmune" in dToken:
                nErrorStart = self.nOffsetWithinParagraph + dToken["nStart"]
                if nErrorStart in self.dError:


                    del self.dError[nErrorStart]












            if "bToRemove" in dToken:
                # remove useless token
                self.sSentence = self.sSentence[:dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[dToken["nEnd"]:]
                if bDebug:
                    print("removed:", dToken["sValue"])
            else:


                lNewToken.append(dToken)
                if "sNewValue" in dToken:
                    # rewrite token and sentence
                    if bDebug:
                        print(dToken["sValue"], "->", dToken["sNewValue"])
                    dToken["sRealValue"] = dToken["sValue"]
                    dToken["sValue"] = dToken["sNewValue"]







>
>
|
>



>
>

>
>
>
>
>
>
>
>
>
>
>
>
|




|
>
>







911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
                    if bUppercase:
                        sValue = sValue[0:1].upper() + sValue[1:]
                    self.lToken[i]["sNewValue"] = sValue

    def rewrite (self, bDebug=False):
        "rewrite the sentence, modify tokens, purge the token list"
        lNewToken = []
        nMergeUntil = -1
        dTokenMerger = None
        for dToken in self.lToken:
            bKeepToken = True
            if "bImmune" in dToken:
                nErrorStart = self.nOffsetWithinParagraph + dToken["nStart"]
                if nErrorStart in self.dError:
                    if bDebug:
                        print("immunity -> error removed:", self.dError[nErrorStart])
                    del self.dError[nErrorStart]
            if dToken["i"] <= nMergeUntil:
                dTokenMerger["sValue"] += " " * (dToken["i"]["nStart"] - dTokenMerger["nEnd"]) + dToken["i"]["sValue"]
                dTokenMerger["nEnd"] = dToken["i"]["nEnd"]
                if bDebug:
                    print("Merged token:", dTokenMerger["sValue"])
                bKeepToken = False
            if "nMergeUntil" in dToken:
                if not nMergeUntil: # this token should alerady been merged with a previous token
                    dTokenMerger = dToken
                if dToken["nMergeUntil"] > nMergeUntil:
                    nMergeUntil = dToken["nMergeUntil"]  
                del dToken["nMergeUntil"]
            elif "bToRemove" in dToken:
                # remove useless token
                self.sSentence = self.sSentence[:dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[dToken["nEnd"]:]
                if bDebug:
                    print("removed:", dToken["sValue"])
                bKeepToken = False
            #
            if bKeepToken:
                lNewToken.append(dToken)
                if "sNewValue" in dToken:
                    # rewrite token and sentence
                    if bDebug:
                        print(dToken["sValue"], "->", dToken["sNewValue"])
                    dToken["sRealValue"] = dToken["sValue"]
                    dToken["sValue"] = dToken["sNewValue"]