Grammalecte  Diff

Differences From Artifact [28162284b3]:

To Artifact [0afa6afeb1]:


136
137
138
139
140
141
142
143
144
145
146


147
148
149
150
151
152
153
            dDA.clear()
            try:
                # regex parser
                _, errs = _proofread(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dDA, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
                aErrors.update(errs)
                # token parser
                oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart)
                bChange, errs = oSentence.parse(dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
                aErrors.update(errs)
                if bChange:
                    oSentence.rewrite()


            except:
                raise
    return aErrors.values() # this is a view (iterable)


def _getSentenceBoundaries (sText):
    iStart = _zBeginOfParagraph.match(sText).end()







|



>
>







136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
            dDA.clear()
            try:
                # regex parser
                _, errs = _proofread(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dDA, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
                aErrors.update(errs)
                # token parser
                oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart)
                bChange, errs = oSentence.parse(dPriority, sCountry, dOpt, bShowRuleId, True, bContext)
                aErrors.update(errs)
                if bChange:
                    oSentence.rewrite()
                    if True:
                        print("~", oSentence.sSentence)
            except:
                raise
    return aErrors.values() # this is a view (iterable)


def _getSentenceBoundaries (sText):
    iStart = _zBeginOfParagraph.match(sText).end()
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677


678
679
680
681


682
683
684
685
686


687
688

689
690
691
692
693


694
695
696
697
698
699
700
            lPointer = lNextPointer
            # check arcs of first nodes
            for dNode in self._getNextMatchingNodes(dToken, dGraph[0]):
                lPointer.append({"iToken": dToken["i"], "dNode": dNode})
            # check if there is rules to check for each pointer
            for dPointer in lPointer:
                if "<rules>" in dPointer["dNode"]:
                    bHasChanged, errs = self._executeActions(dPointer["dNode"]["<rules>"], dPointer["iToken"]-1, dPriority, dOpt, sCountry, bShowRuleId, bContext)
                    dErr.update(errs)
                    if bHasChanged:
                        bChange = True
        if dErr:
            print(dErr)
        return (bChange, dErr)

    def _executeActions (self, dNode, nTokenOffset, dPriority, dOpt, sCountry, bShowRuleId, bContext):
        "execute actions found in the DARG"
        dErrs = {}
        bChange = False
        for sLineId, nextNodeKey in dNode.items():
            for sRuleId in dGraph[nextNodeKey]:
                print(sRuleId)
                bCondMemo = None
                sFuncCond, cActionType, sWhat, *eAct = dRule[sRuleId]
                # action in lActions: [ condition, action type, replacement/suggestion/action[, iTokenStart, iTokenEnd[, nPriority, message, URL]] ]
                try:
                    bCondMemo = not sFuncCond or globals()[sFuncCond](self.lToken, nTokenOffset, sCountry, bCondMemo)
                    if bCondMemo:
                        if cActionType == "-":
                            # grammar error
                            print("-")
                            nTokenErrorStart = nTokenOffset + eAct[0]
                            nTokenErrorEnd = nTokenOffset + eAct[1]
                            nErrorStart = self.nOffset + self.lToken[nTokenErrorStart]["nStart"]
                            nErrorEnd = self.nOffset + self.lToken[nTokenErrorEnd]["nEnd"]
                            if nErrorStart not in dErrs or eAct[2] > dPriority[nErrorStart]:
                                dErrs[nErrorStart] = self.createError(sWhat, nTokenOffset, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, True, eAct[3], eAct[4], bShowRuleId, "notype", bContext)
                                dPriority[nErrorStart] = eAct[2]


                        elif cActionType == "~":
                            # text processor
                            print("~")
                            self._tagAndPrepareTokenForRewriting(sWhat, nTokenOffset + eAct[0], nTokenOffset + eAct[1])


                            bChange = True
                        elif cActionType == "=":
                            # disambiguation
                            print("=")
                            globals()[sWhat](self.lToken, nTokenOffset)


                        elif cActionType == ">":
                            # we do nothing, this test is just a condition to apply all following actions

                            print(">")
                            pass
                        else:
                            print("# error: unknown action at " + sLineId)
                    elif cActionType == ">":


                        break
                except Exception as e:
                    raise Exception(str(e), sLineId)
        return bChange, dErrs

    def _createWriterError (self, sSugg, nTokenOffset, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
        "error for Writer (LO/OO)"







|



<
<


|





<








<







>
>


<

>
>



<

>
>


>
|




>
>







642
643
644
645
646
647
648
649
650
651
652


653
654
655
656
657
658
659
660

661
662
663
664
665
666
667
668

669
670
671
672
673
674
675
676
677
678
679

680
681
682
683
684
685

686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
            lPointer = lNextPointer
            # check arcs of first nodes
            for dNode in self._getNextMatchingNodes(dToken, dGraph[0]):
                lPointer.append({"iToken": dToken["i"], "dNode": dNode})
            # check if there is rules to check for each pointer
            for dPointer in lPointer:
                if "<rules>" in dPointer["dNode"]:
                    bHasChanged, errs = self._executeActions(dPointer["dNode"]["<rules>"], dPointer["iToken"]-1, dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext)
                    dErr.update(errs)
                    if bHasChanged:
                        bChange = True


        return (bChange, dErr)

    def _executeActions (self, dNode, nTokenOffset, dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext):
        "execute actions found in the DARG"
        dErrs = {}
        bChange = False
        for sLineId, nextNodeKey in dNode.items():
            for sRuleId in dGraph[nextNodeKey]:

                bCondMemo = None
                sFuncCond, cActionType, sWhat, *eAct = dRule[sRuleId]
                # action in lActions: [ condition, action type, replacement/suggestion/action[, iTokenStart, iTokenEnd[, nPriority, message, URL]] ]
                try:
                    bCondMemo = not sFuncCond or globals()[sFuncCond](self.lToken, nTokenOffset, sCountry, bCondMemo)
                    if bCondMemo:
                        if cActionType == "-":
                            # grammar error

                            nTokenErrorStart = nTokenOffset + eAct[0]
                            nTokenErrorEnd = nTokenOffset + eAct[1]
                            nErrorStart = self.nOffset + self.lToken[nTokenErrorStart]["nStart"]
                            nErrorEnd = self.nOffset + self.lToken[nTokenErrorEnd]["nEnd"]
                            if nErrorStart not in dErrs or eAct[2] > dPriority[nErrorStart]:
                                dErrs[nErrorStart] = self.createError(sWhat, nTokenOffset, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, True, eAct[3], eAct[4], bShowRuleId, "notype", bContext)
                                dPriority[nErrorStart] = eAct[2]
                                if bDebug:
                                    print("-", sRuleId, dErrs[nErrorStart])
                        elif cActionType == "~":
                            # text processor

                            self._tagAndPrepareTokenForRewriting(sWhat, nTokenOffset + eAct[0], nTokenOffset + eAct[1])
                            if bDebug:
                                print("~", sRuleId)
                            bChange = True
                        elif cActionType == "=":
                            # disambiguation

                            globals()[sWhat](self.lToken, nTokenOffset)
                            if bDebug:
                                print("=", sRuleId)
                        elif cActionType == ">":
                            # we do nothing, this test is just a condition to apply all following actions
                            if bDebug:
                                print(">", sRuleId)
                            pass
                        else:
                            print("# error: unknown action at " + sLineId)
                    elif cActionType == ">":
                        if bDebug:
                            print(">!", sRuleId)
                        break
                except Exception as e:
                    raise Exception(str(e), sLineId)
        return bChange, dErrs

    def _createWriterError (self, sSugg, nTokenOffset, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
        "error for Writer (LO/OO)"
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
            p = PropertyValue()
            p.Name = "FullCommentURL"
            p.Value = sURL
            xErr.aProperties = (p,)
        else:
            xErr.aProperties = ()
        return xErr
                                                             
    def _createDictError (self, sSugg, nTokenOffset, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
        "error as a dictionary"
        dErr = {}
        dErr["nStart"] = nStart
        dErr["nEnd"] = nEnd
        dErr["sLineId"] = sLineId
        dErr["sRuleId"] = sRuleId







|







737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
            p = PropertyValue()
            p.Name = "FullCommentURL"
            p.Value = sURL
            xErr.aProperties = (p,)
        else:
            xErr.aProperties = ()
        return xErr

    def _createDictError (self, sSugg, nTokenOffset, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
        "error as a dictionary"
        dErr = {}
        dErr["nStart"] = nStart
        dErr["nEnd"] = nEnd
        dErr["sLineId"] = sLineId
        dErr["sRuleId"] = sRuleId
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
        if bContext:
            dErr['sUnderlined'] = self.sSentence0[dErr["nStart"]:dErr["nEnd"]]
            dErr['sBefore'] = self.sSentence0[max(0,dErr["nStart"]-80):dErr["nStart"]]
            dErr['sAfter'] = self.sSentence0[dErr["nEnd"]:dErr["nEnd"]+80]
        return dErr

    def _expand (self, sMsg, nTokenOffset):
        print(sMsg)
        for m in re.finditer(r"\\([0-9]+)", sMsg):
            sMsg = sMsg.replace(m.group(0), self.lToken[int(m.group(1))+nTokenOffset]["sValue"])
        print(sMsg)
        return sMsg

    def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, bUppercase=True):
        "text processor: rewrite tokens between <nTokenRewriteStart> and <nTokenRewriteEnd> position"
        if sWhat == "*":
            # purge text
            if nTokenRewriteEnd - nTokenRewriteStart == 0:







|


|







777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
        if bContext:
            dErr['sUnderlined'] = self.sSentence0[dErr["nStart"]:dErr["nEnd"]]
            dErr['sBefore'] = self.sSentence0[max(0,dErr["nStart"]-80):dErr["nStart"]]
            dErr['sAfter'] = self.sSentence0[dErr["nEnd"]:dErr["nEnd"]+80]
        return dErr

    def _expand (self, sMsg, nTokenOffset):
        #print("*", sMsg)
        for m in re.finditer(r"\\([0-9]+)", sMsg):
            sMsg = sMsg.replace(m.group(0), self.lToken[int(m.group(1))+nTokenOffset]["sValue"])
        #print(">", sMsg)
        return sMsg

    def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, bUppercase=True):
        "text processor: rewrite tokens between <nTokenRewriteStart> and <nTokenRewriteEnd> position"
        if sWhat == "*":
            # purge text
            if nTokenRewriteEnd - nTokenRewriteStart == 0:
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
                # remove useless token
                self.sSentence = self.sSentence[:self.nOffset+dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[self.nOffset+dToken["nEnd"]:]
                #print("removed:", dToken["sValue"])
            else:
                lNewToken.append(dToken)
                if "sNewValue" in dToken:
                    # rewrite token and sentence
                    print(dToken["sValue"], "->", dToken["sNewValue"])
                    dToken["sRealValue"] = dToken["sValue"]
                    dToken["sValue"] = dToken["sNewValue"]
                    nDiffLen = len(dToken["sRealValue"]) - len(dToken["sNewValue"])
                    sNewRepl = (dToken["sNewValue"] + " " * nDiffLen)  if nDiffLen >= 0  else dToken["sNewValue"][:len(dToken["sRealValue"])]
                    self.sSentence = self.sSentence[:self.nOffset+dToken["nStart"]] + sNewRepl + self.sSentence[self.nOffset+dToken["nEnd"]:]
                    del dToken["sNewValue"]
        print(self.sSentence)
        self.lToken.clear()
        self.lToken = lNewToken



#### Analyse tokens








|






<







823
824
825
826
827
828
829
830
831
832
833
834
835
836

837
838
839
840
841
842
843
                # remove useless token
                self.sSentence = self.sSentence[:self.nOffset+dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[self.nOffset+dToken["nEnd"]:]
                #print("removed:", dToken["sValue"])
            else:
                lNewToken.append(dToken)
                if "sNewValue" in dToken:
                    # rewrite token and sentence
                    #print(dToken["sValue"], "->", dToken["sNewValue"])
                    dToken["sRealValue"] = dToken["sValue"]
                    dToken["sValue"] = dToken["sNewValue"]
                    nDiffLen = len(dToken["sRealValue"]) - len(dToken["sNewValue"])
                    sNewRepl = (dToken["sNewValue"] + " " * nDiffLen)  if nDiffLen >= 0  else dToken["sNewValue"][:len(dToken["sRealValue"])]
                    self.sSentence = self.sSentence[:self.nOffset+dToken["nStart"]] + sNewRepl + self.sSentence[self.nOffset+dToken["nEnd"]:]
                    del dToken["sNewValue"]

        self.lToken.clear()
        self.lToken = lNewToken



#### Analyse tokens