Grammalecte  Check-in [c55bec5997]

Overview
Comment:[core] better communication between graph rules and regex rules (still a mess for the transition)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: c55bec5997b5f628feda053fcc611fecafd498b4ac9b87934d575421e4c6c06d
User & Date: olr on 2018-07-09 13:49:47
Other Links: branch diff | manifest | tags
Context
2018-07-09
15:01
[fr] conversion: regex rules -> graph rules check-in: d05dab5b5a user: olr tags: fr, rg
13:49
[core] better communication between graph rules and regex rules (still a mess for the transition) check-in: c55bec5997 user: olr tags: core, rg
13:48
[fr] micro-corrections check-in: 2c4096dba5 user: olr tags: fr, rg
Changes

Modified gc_core/py/lang_core/gc_engine.py from [e53fccaf02] to [8ce717d080].

111
112
113
114
115
116
117
118

119
120
121
122
123
124
125
126

127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147

148
149
150
151

152
153
154

155
156
157
158
159
160
161

162
163
164
165
166
167
168
169

170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188


189
190
191
192
193
194
195
111
112
113
114
115
116
117

118
119
120
121
122
123
124
125

126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146

147

148
149

150
151
152

153

154
155
156
157
158
159
160
161
162
163
164
165
166
167

168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185


186
187
188
189
190
191
192
193
194







-
+







-
+




















-
+
-


-
+


-
+
-






+







-
+

















-
-
+
+







        yield (iStart, m.end())
        iStart = m.end()


def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
    "analyses the paragraph sText and returns list of errors"
    #sText = unicodedata.normalize("NFC", sText)
    aErrors = None
    dErrors = {}
    sRealText = sText
    dPriority = {}  # Key = position; value = priority
    dOpt = _dOptions  if not dOptions  else dOptions
    bShowRuleId = option('idrule')

    # parse paragraph
    try:
        sNew, aErrors = _proofread(None, sText, sRealText, 0, True, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
        sNew, dErrors = _proofread(None, sText, sRealText, 0, True, dErrors, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
        if sNew:
            sText = sNew
    except:
        raise

    # cleanup
    if " " in sText:
        sText = sText.replace(" ", ' ') # nbsp
    if " " in sText:
        sText = sText.replace(" ", ' ') # nnbsp
    if "'" in sText:
        sText = sText.replace("'", "’")
    if "‑" in sText:
        sText = sText.replace("‑", "-") # nobreakdash

    # parse sentences
    for iStart, iEnd in _getSentenceBoundaries(sText):
        if 4 < (iEnd - iStart) < 2000:
            try:
                oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart)
                _, errs = _proofread(oSentence, sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
                _, dErrors = _proofread(oSentence, sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dErrors, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
                aErrors.update(errs)
            except:
                raise
    return aErrors.values() # this is a view (iterable)
    return dErrors.values() # this is a view (iterable)


def _proofread (oSentence, s, sx, nOffset, bParagraph, dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext):
def _proofread (oSentence, s, sx, nOffset, bParagraph, dErrors, dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext):
    dErrs = {}
    bParagraphChange = False
    bSentenceChange = False
    dTokenPos = oSentence.dTokenPos if oSentence else {}
    for sOption, lRuleGroup in _getRules(bParagraph):
        if sOption == "@@@@":
            # graph rules
            oSentence.dError = dErrors
            if not bParagraph and bSentenceChange:
                oSentence.update(s, bDebug)
                bSentenceChange = False
            for sGraphName, sLineId in lRuleGroup:
                if bDebug:
                    print("\n>>>> GRAPH:", sGraphName, sLineId)
                bParagraphChange, s = oSentence.parse(dAllGraph[sGraphName], dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext)
                dErrs.update(oSentence.dError)
                dErrors.update(oSentence.dError)
                dTokenPos = oSentence.dTokenPos
        elif not sOption or dOptions.get(sOption, False):
            # regex rules
            for zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions in lRuleGroup:
                if sRuleId not in _aIgnoredRules:
                    for m in zRegex.finditer(s):
                        bCondMemo = None
                        for sFuncCond, cActionType, sWhat, *eAct in lActions:
                            # action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ]
                            try:
                                bCondMemo = not sFuncCond or globals()[sFuncCond](s, sx, m, dTokenPos, sCountry, bCondMemo)
                                if bCondMemo:
                                    if bDebug:
                                        print("RULE:", sLineId)
                                    if cActionType == "-":
                                        # grammar error
                                        nErrorStart = nOffset + m.start(eAct[0])
                                        if nErrorStart not in dErrs or nPriority > dPriority.get(nErrorStart, -1):
                                            dErrs[nErrorStart] = _createError(s, sx, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bShowRuleId, sOption, bContext)
                                        if nErrorStart not in dErrors or nPriority > dPriority.get(nErrorStart, -1):
                                            dErrors[nErrorStart] = _createError(s, sx, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bShowRuleId, sOption, bContext)
                                            dPriority[nErrorStart] = nPriority
                                    elif cActionType == "~":
                                        # text processor
                                        s = _rewrite(s, sWhat, eAct[0], m, bUppercase)
                                        bParagraphChange = True
                                        bSentenceChange = True
                                        if bDebug:
206
207
208
209
210
211
212
213
214


215
216
217
218
219
220
221
205
206
207
208
209
210
211


212
213
214
215
216
217
218
219
220







-
-
+
+







                                    else:
                                        echo("# error: unknown action at " + sLineId)
                                elif cActionType == ">":
                                    break
                            except Exception as e:
                                raise Exception(str(e), "# " + sLineId + " # " + sRuleId)
    if bParagraphChange:
        return (s, dErrs)
    return (False, dErrs)
        return (s, dErrors)
    return (False, dErrors)


def _createError (s, sx, sRepl, nOffset, m, iGroup, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
    nStart = nOffset + m.start(iGroup)
    nEnd = nOffset + m.end(iGroup)
    # suggestions
    if sRepl[0:1] == "=":
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728

729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
698
699
700
701
702
703
704


705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724

725

726
727
728
729
730
731
732
733
734
735

736
737
738
739
740
741
742







-
-




















-
+
-










-







                elif dToken["sType"] in sMeta:
                    if bDebug:
                        print("  MATCH: *" + sMeta)
                    yield dGraph[dNode["<meta>"][sMeta]]

    def parse (self, dGraph, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False):
        "parse tokens from the text and execute actions encountered"
        self.dError = {}
        dPriority = {}  # Key = position; value = priority
        dOpt = _dOptions  if not dOptions  else dOptions
        lPointer = []
        bTagAndRewrite = False
        for dToken in self.lToken:
            if bDebug:
                print("TOKEN:", dToken["sValue"])
            # check arcs for each existing pointer
            lNextPointer = []
            for dPointer in lPointer:
                for dNode in self._getNextMatchingNodes(dToken, dGraph, dPointer["dNode"], bDebug):
                    lNextPointer.append({"iToken": dPointer["iToken"], "dNode": dNode})
            lPointer = lNextPointer
            # check arcs of first nodes
            for dNode in self._getNextMatchingNodes(dToken, dGraph, dGraph[0], bDebug):
                lPointer.append({"iToken": dToken["i"], "dNode": dNode})
            # check if there is rules to check for each pointer
            for dPointer in lPointer:
                #if bDebug:
                #    print("+", dPointer)
                if "<rules>" in dPointer["dNode"]:
                    bChange, dErr = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iToken"]-1, dToken["i"], dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext)
                    bChange = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iToken"]-1, dToken["i"], dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext)
                    self.dError.update(dErr)
                    if bChange:
                        bTagAndRewrite = True
        if bTagAndRewrite:
            self.rewrite(bDebug)
        if bDebug:
            print(self)
        return (bTagAndRewrite, self.sSentence)

    def _executeActions (self, dGraph, dNode, nTokenOffset, nLastToken, dPriority, dOptions, sCountry, bShowRuleId, bDebug, bContext):
        "execute actions found in the DARG"
        dError = {}
        bChange = False
        for sLineId, nextNodeKey in dNode.items():
            bCondMemo = None
            for sRuleId in dGraph[nextNodeKey]:
                try:
                    if bDebug:
                        print("  TRY:", sRuleId)
757
758
759
760
761
762
763
764
765


766
767
768

769
770
771
772
773
774
775
752
753
754
755
756
757
758


759
760
761
762

763
764
765
766
767
768
769
770







-
-
+
+


-
+







                            if cActionType == "-":
                                # grammar error
                                nTokenErrorStart = nTokenOffset + eAct[0]
                                if "bImmune" not in self.lToken[nTokenErrorStart]:
                                    nTokenErrorEnd = (nTokenOffset + eAct[1])  if eAct[1]  else nLastToken
                                    nErrorStart = self.nOffsetWithinParagraph + self.lToken[nTokenErrorStart]["nStart"]
                                    nErrorEnd = self.nOffsetWithinParagraph + self.lToken[nTokenErrorEnd]["nEnd"]
                                    if nErrorStart not in dError or eAct[2] > dPriority.get(nErrorStart, -1):
                                        dError[nErrorStart] = self._createError(sWhat, nTokenOffset, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, True, eAct[3], eAct[4], bShowRuleId, "notype", bContext)
                                    if nErrorStart not in self.dError or eAct[2] > dPriority.get(nErrorStart, -1):
                                        self.dError[nErrorStart] = self._createError(sWhat, nTokenOffset, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, True, eAct[3], eAct[4], bShowRuleId, "notype", bContext)
                                        dPriority[nErrorStart] = eAct[2]
                                        if bDebug:
                                            print("  NEW_ERROR:", dError[nErrorStart], "\n  ", dRule[sRuleId])
                                            print("  NEW_ERROR:", self.dError[nErrorStart], "\n  ", dRule[sRuleId])
                            elif cActionType == "~":
                                # text processor
                                if bDebug:
                                    print("  TAG_PREPARE:\n  ", dRule[sRuleId])
                                nEndToken = (nTokenOffset + eAct[1])  if eAct[1]  else nLastToken
                                self._tagAndPrepareTokenForRewriting(sWhat, nTokenOffset + eAct[0], nEndToken, nTokenOffset, True, bDebug)
                                bChange = True
796
797
798
799
800
801
802
803

804
805
806
807
808
809
810
791
792
793
794
795
796
797

798
799
800
801
802
803
804
805







-
+







                                print("# error: unknown action at " + sLineId)
                        elif cActionType == ">":
                            if bDebug:
                                print("  COND_BREAK")
                            break
                except Exception as e:
                    raise Exception(str(e), sLineId, sRuleId, self.sSentence)
        return bChange, dError
        return bChange

    def _createError (self, sSugg, nTokenOffset, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
        # suggestions
        if sSugg[0:1] == "=":
            sSugg = globals()[sSugg[1:]](self.lToken, nTokenOffset)
            lSugg = sSugg.split("|")  if sSugg  else []
        elif sSugg == "_":