Grammalecte  Check-in [59d8df1fa2]

Overview
Comment:[core][bug] fix tokens merging
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: 59d8df1fa295536378d51f74f0e5bb0864f86b798c8412ad860cfc0168f2bac3
User & Date: olr on 2018-06-24 06:28:17
Other Links: branch diff | manifest | tags
Context
2018-06-24
11:39
[graphspell] code cleaning (pylint) check-in: 814d73b60e user: olr tags: graphspell, rg
06:28
[core][bug] fix tokens merging check-in: 59d8df1fa2 user: olr tags: core, rg
2018-06-23
16:28
[core] merge tokens check-in: c051cc6ca9 user: olr tags: core, rg
Changes

Modified gc_core/py/lang_core/gc_engine.py from [23dabd0b59] to [a4200b43e6].

911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
                    if bUppercase:
                        sValue = sValue[0:1].upper() + sValue[1:]
                    self.lToken[i]["sNewValue"] = sValue

    def rewrite (self, bDebug=False):
        "rewrite the sentence, modify tokens, purge the token list"
        lNewToken = []
        nMergeUntil = -1
        dTokenMerger = None
        for dToken in self.lToken:
            bKeepToken = True
            if "bImmune" in dToken:
                nErrorStart = self.nOffsetWithinParagraph + dToken["nStart"]
                if nErrorStart in self.dError:
                    if bDebug:
                        print("immunity -> error removed:", self.dError[nErrorStart])
                    del self.dError[nErrorStart]
            if dToken["i"] <= nMergeUntil:
                dTokenMerger["sValue"] += " " * (dToken["i"]["nStart"] - dTokenMerger["nEnd"]) + dToken["i"]["sValue"]
                dTokenMerger["nEnd"] = dToken["i"]["nEnd"]
                if bDebug:
                    print("Merged token:", dTokenMerger["sValue"])
                bKeepToken = False
            if "nMergeUntil" in dToken:
                if not nMergeUntil: # this token should alerady been merged with a previous token
                    dTokenMerger = dToken
                if dToken["nMergeUntil"] > nMergeUntil:
                    nMergeUntil = dToken["nMergeUntil"]  
                del dToken["nMergeUntil"]
            elif "bToRemove" in dToken:
                # remove useless token
                self.sSentence = self.sSentence[:dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[dToken["nEnd"]:]







|









|
|
|




|







911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
                    if bUppercase:
                        sValue = sValue[0:1].upper() + sValue[1:]
                    self.lToken[i]["sNewValue"] = sValue

    def rewrite (self, bDebug=False):
        "rewrite the sentence, modify tokens, purge the token list"
        lNewToken = []
        nMergeUntil = 0
        dTokenMerger = None
        for dToken in self.lToken:
            bKeepToken = True
            if "bImmune" in dToken:
                nErrorStart = self.nOffsetWithinParagraph + dToken["nStart"]
                if nErrorStart in self.dError:
                    if bDebug:
                        print("immunity -> error removed:", self.dError[nErrorStart])
                    del self.dError[nErrorStart]
            if nMergeUntil and dToken["i"] <= nMergeUntil:
                dTokenMerger["sValue"] += " " * (dToken["nStart"] - dTokenMerger["nEnd"]) + dToken["sValue"]
                dTokenMerger["nEnd"] = dToken["nEnd"]
                if bDebug:
                    print("Merged token:", dTokenMerger["sValue"])
                bKeepToken = False
            if "nMergeUntil" in dToken:
                if dToken["i"] > nMergeUntil: # this token is not already merged with a previous token
                    dTokenMerger = dToken
                if dToken["nMergeUntil"] > nMergeUntil:
                    nMergeUntil = dToken["nMergeUntil"]  
                del dToken["nMergeUntil"]
            elif "bToRemove" in dToken:
                # remove useless token
                self.sSentence = self.sSentence[:dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[dToken["nEnd"]:]

Modified gc_lang/fr/rules.grx from [7874208805] to [0f58bfdc3c].

4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
@@@@
@@@@
@@@@

__p_notre_père_qui_es_au_cieux__
    notre père qui [es|est] aux cieux
        <<- ~4>> !
        <<- ~3:0>> *


!!
!!
!!!! Formes verbales sans sujet                                                                     
!!
!!







|







4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
@@@@
@@@@
@@@@

__p_notre_père_qui_es_au_cieux__
    notre père qui [es|est] aux cieux
        <<- ~4>> !
        <<- ~3:0>> _


!!
!!
!!!! Formes verbales sans sujet                                                                     
!!
!!