136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
|
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
|
-
+
+
+
|
dDA.clear()
try:
# regex parser
_, errs = _proofread(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dDA, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
aErrors.update(errs)
# token parser
oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart)
bChange, errs = oSentence.parse(dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
bChange, errs = oSentence.parse(dPriority, sCountry, dOpt, bShowRuleId, True, bContext)
aErrors.update(errs)
if bChange:
oSentence.rewrite()
if True:
print("~", oSentence.sSentence)
except:
raise
return aErrors.values() # this is a view (iterable)
def _getSentenceBoundaries (sText):
iStart = _zBeginOfParagraph.match(sText).end()
|
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
|
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
|
-
+
-
-
-
+
-
-
+
+
-
+
+
-
+
+
+
-
+
+
+
|
lPointer = lNextPointer
# check arcs of first nodes
for dNode in self._getNextMatchingNodes(dToken, dGraph[0]):
lPointer.append({"iToken": dToken["i"], "dNode": dNode})
# check if there is rules to check for each pointer
for dPointer in lPointer:
if "<rules>" in dPointer["dNode"]:
bHasChanged, errs = self._executeActions(dPointer["dNode"]["<rules>"], dPointer["iToken"]-1, dPriority, dOpt, sCountry, bShowRuleId, bContext)
bHasChanged, errs = self._executeActions(dPointer["dNode"]["<rules>"], dPointer["iToken"]-1, dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext)
dErr.update(errs)
if bHasChanged:
bChange = True
if dErr:
print(dErr)
return (bChange, dErr)
def _executeActions (self, dNode, nTokenOffset, dPriority, dOpt, sCountry, bShowRuleId, bContext):
def _executeActions (self, dNode, nTokenOffset, dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext):
"execute actions found in the DARG"
dErrs = {}
bChange = False
for sLineId, nextNodeKey in dNode.items():
for sRuleId in dGraph[nextNodeKey]:
print(sRuleId)
bCondMemo = None
sFuncCond, cActionType, sWhat, *eAct = dRule[sRuleId]
# action in lActions: [ condition, action type, replacement/suggestion/action[, iTokenStart, iTokenEnd[, nPriority, message, URL]] ]
try:
bCondMemo = not sFuncCond or globals()[sFuncCond](self.lToken, nTokenOffset, sCountry, bCondMemo)
if bCondMemo:
if cActionType == "-":
# grammar error
print("-")
nTokenErrorStart = nTokenOffset + eAct[0]
nTokenErrorEnd = nTokenOffset + eAct[1]
nErrorStart = self.nOffset + self.lToken[nTokenErrorStart]["nStart"]
nErrorEnd = self.nOffset + self.lToken[nTokenErrorEnd]["nEnd"]
if nErrorStart not in dErrs or eAct[2] > dPriority[nErrorStart]:
dErrs[nErrorStart] = self.createError(sWhat, nTokenOffset, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, True, eAct[3], eAct[4], bShowRuleId, "notype", bContext)
dPriority[nErrorStart] = eAct[2]
if bDebug:
print("-", sRuleId, dErrs[nErrorStart])
elif cActionType == "~":
# text processor
print("~")
self._tagAndPrepareTokenForRewriting(sWhat, nTokenOffset + eAct[0], nTokenOffset + eAct[1])
if bDebug:
print("~", sRuleId)
bChange = True
elif cActionType == "=":
# disambiguation
print("=")
globals()[sWhat](self.lToken, nTokenOffset)
if bDebug:
print("=", sRuleId)
elif cActionType == ">":
# we do nothing, this test is just a condition to apply all following actions
if bDebug:
print(">")
print(">", sRuleId)
pass
else:
print("# error: unknown action at " + sLineId)
elif cActionType == ">":
if bDebug:
print(">!", sRuleId)
break
except Exception as e:
raise Exception(str(e), sLineId)
return bChange, dErrs
def _createWriterError (self, sSugg, nTokenOffset, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
"error for Writer (LO/OO)"
|
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
|
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
|
-
+
|
p = PropertyValue()
p.Name = "FullCommentURL"
p.Value = sURL
xErr.aProperties = (p,)
else:
xErr.aProperties = ()
return xErr
def _createDictError (self, sSugg, nTokenOffset, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
"error as a dictionary"
dErr = {}
dErr["nStart"] = nStart
dErr["nEnd"] = nEnd
dErr["sLineId"] = sLineId
dErr["sRuleId"] = sRuleId
|
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
|
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
|
-
+
-
+
|
if bContext:
dErr['sUnderlined'] = self.sSentence0[dErr["nStart"]:dErr["nEnd"]]
dErr['sBefore'] = self.sSentence0[max(0,dErr["nStart"]-80):dErr["nStart"]]
dErr['sAfter'] = self.sSentence0[dErr["nEnd"]:dErr["nEnd"]+80]
return dErr
def _expand (self, sMsg, nTokenOffset):
print(sMsg)
#print("*", sMsg)
for m in re.finditer(r"\\([0-9]+)", sMsg):
sMsg = sMsg.replace(m.group(0), self.lToken[int(m.group(1))+nTokenOffset]["sValue"])
print(sMsg)
#print(">", sMsg)
return sMsg
def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, bUppercase=True):
"text processor: rewrite tokens between <nTokenRewriteStart> and <nTokenRewriteEnd> position"
if sWhat == "*":
# purge text
if nTokenRewriteEnd - nTokenRewriteStart == 0:
|