743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
|
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
|
-
+
-
-
+
+
+
-
+
-
-
-
-
+
+
-
+
+
+
-
-
-
+
|
dError = {}
bChange = False
for sLineId, nextNodeKey in dNode.items():
bCondMemo = None
for sRuleId in dGraph[nextNodeKey]:
try:
if bDebug:
print("ACTION:", sRuleId)
print(" TRY:", sRuleId)
print(dRule[sRuleId])
sOption, sFuncCond, cActionType, sWhat, *eAct = dRule[sRuleId]
# Suggestion [ option, condition, "-", replacement/suggestion/action, iTokenStart, iTokenEnd, nPriority, message, URL ]
# TextProcessor [ option, condition, "~", replacement/suggestion/action, iTokenStart, iTokenEnd ]
# Disambiguator [ option, condition, "=", replacement/suggestion/action ]
# Sentence Tag [ option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ]
# Test [ option, condition, ">", "" ]
if not sOption or dOptions.get(sOption, False):
bCondMemo = not sFuncCond or globals()[sFuncCond](self.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, self.dTags, self.sSentence, self.sSentence0)
if bCondMemo:
if cActionType == "-":
# grammar error
nTokenErrorStart = nTokenOffset + eAct[0]
if "bImmune" not in self.lToken[nTokenErrorStart]:
nTokenErrorEnd = (nTokenOffset + eAct[1]) if eAct[1] else nLastToken
nErrorStart = self.nOffsetWithinParagraph + self.lToken[nTokenErrorStart]["nStart"]
nErrorEnd = self.nOffsetWithinParagraph + self.lToken[nTokenErrorEnd]["nEnd"]
if nErrorStart not in dError or eAct[2] > dPriority.get(nErrorStart, -1):
dError[nErrorStart] = self.createError(sWhat, nTokenOffset, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, True, eAct[3], eAct[4], bShowRuleId, "notype", bContext)
dPriority[nErrorStart] = eAct[2]
if bDebug:
print("ERROR:", sRuleId, dError[nErrorStart])
print(" NEW_ERROR:", dError[nErrorStart], "\n ", dRule[sRuleId])
elif cActionType == "~":
# text processor
if bDebug:
print(" TAG_PREPARE:\n ", dRule[sRuleId])
nEndToken = (nTokenOffset + eAct[1]) if eAct[1] else nLastToken
self._tagAndPrepareTokenForRewriting(sWhat, nTokenOffset + eAct[0], nEndToken, nTokenOffset, bDebug)
self._tagAndPrepareTokenForRewriting(sWhat, nTokenOffset + eAct[0], nEndToken, nTokenOffset, True, bDebug)
if bDebug:
print("RW:", sRuleId)
bChange = True
elif cActionType == "=":
# disambiguation
globals()[sWhat](self.lToken, nTokenOffset)
if bDebug:
print("DA:", sRuleId)
print(" DISAMBIGUATOR:\n ", dRule[sRuleId])
globals()[sWhat](self.lToken, nTokenOffset)
elif cActionType == ">":
# we do nothing, this test is just a condition to apply all following actions
if bDebug:
print(">>>", sRuleId)
print(" COND_OK")
pass
elif cActionType == "/":
# sentence tags
if bDebug:
print(" SENTENCE_TAG:\n ", dRule[sRuleId])
nTokenTag = nTokenOffset + eAct[0]
if sWhat not in self.dTags:
self.dTags[sWhat] = (nTokenTag, nTokenTag)
elif nTokenTag > self.dTags[sWhat][1]:
self.dTags[sWhat] = (self.dTags[sWhat][0], nTokenTag)
if bDebug:
print("/", sRuleId)
else:
print("# error: unknown action at " + sLineId)
elif cActionType == ">":
if bDebug:
print(">!", sRuleId)
print(" COND_BREAK")
break
except Exception as e:
raise Exception(str(e), sLineId, sRuleId, self.sSentence)
return bChange, dError
def _createWriterError (self, sSugg, nTokenOffset, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
"error for Writer (LO/OO)"
|
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
|
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
|
-
+
|
sText = sText.replace(m.group(0), self.lToken[int(m.group(1))+nTokenOffset]["sValue"])
#print(">", sText)
return sText
def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, nTokenOffset, bUppercase=True, bDebug=False):
"text processor: rewrite tokens between <nTokenRewriteStart> and <nTokenRewriteEnd> position"
if bDebug:
print("REWRITING:", nTokenRewriteStart, nTokenRewriteEnd)
print(" REWRITING:", nTokenRewriteStart, nTokenRewriteEnd)
if sWhat == "*":
# purge text
if nTokenRewriteEnd - nTokenRewriteStart == 0:
self.lToken[nTokenRewriteStart]["bToRemove"] = True
else:
for i in range(nTokenRewriteStart, nTokenRewriteEnd+1):
self.lToken[i]["bToRemove"] = True
|
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
|
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
|
+
+
-
+
-
+
-
+
|
else:
if bUppercase:
sValue = sValue[0:1].upper() + sValue[1:]
self.lToken[i]["sNewValue"] = sValue
def rewrite (self, bDebug=False):
"rewrite the sentence, modify tokens, purge the token list"
if bDebug:
print("REWRITE")
lNewToken = []
nMergeUntil = 0
dTokenMerger = None
for dToken in self.lToken:
bKeepToken = True
if "bImmune" in dToken:
nErrorStart = self.nOffsetWithinParagraph + dToken["nStart"]
if nErrorStart in self.dError:
if bDebug:
print("immunity -> error removed:", self.dError[nErrorStart])
del self.dError[nErrorStart]
if nMergeUntil and dToken["i"] <= nMergeUntil:
dTokenMerger["sValue"] += " " * (dToken["nStart"] - dTokenMerger["nEnd"]) + dToken["sValue"]
dTokenMerger["nEnd"] = dToken["nEnd"]
if bDebug:
print("Merged token:", dTokenMerger["sValue"])
print(" MERGED TOKEN:", dTokenMerger["sValue"])
bKeepToken = False
if "nMergeUntil" in dToken:
if dToken["i"] > nMergeUntil: # this token is not already merged with a previous token
dTokenMerger = dToken
if dToken["nMergeUntil"] > nMergeUntil:
nMergeUntil = dToken["nMergeUntil"]
del dToken["nMergeUntil"]
elif "bToRemove" in dToken:
# remove useless token
self.sSentence = self.sSentence[:dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[dToken["nEnd"]:]
if bDebug:
print("removed:", dToken["sValue"])
print(" REMOVED:", dToken["sValue"])
bKeepToken = False
#
if bKeepToken:
lNewToken.append(dToken)
if "sNewValue" in dToken:
# rewrite token and sentence
if bDebug:
print(dToken["sValue"], "->", dToken["sNewValue"])
dToken["sRealValue"] = dToken["sValue"]
dToken["sValue"] = dToken["sNewValue"]
nDiffLen = len(dToken["sRealValue"]) - len(dToken["sNewValue"])
sNewRepl = (dToken["sNewValue"] + " " * nDiffLen) if nDiffLen >= 0 else dToken["sNewValue"][:len(dToken["sRealValue"])]
self.sSentence = self.sSentence[:dToken["nStart"]] + sNewRepl + self.sSentence[dToken["nEnd"]:]
del dToken["sNewValue"]
if bDebug:
print(self.sSentence)
print(" REWRITED:", self.sSentence)
self.lToken.clear()
self.lToken = lNewToken
#### Analyse tokens
|