949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
|
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
|
-
+
-
+
+
-
+
-
-
+
-
+
-
-
|
"rewrite the sentence, modify tokens, purge the token list"
if bDebug:
print("REWRITE")
lNewToken = []
nMergeUntil = 0
dTokenMerger = None
for dToken in self.lToken:
bKeepToken = True
bRemoveToken = False
if "bImmune" in dToken:
nErrorStart = self.nOffsetWithinParagraph + dToken["nStart"]
if nErrorStart in self.dError:
if bDebug:
print("immunity -> error removed:", self.dError[nErrorStart])
del self.dError[nErrorStart]
if nMergeUntil and dToken["i"] <= nMergeUntil:
dTokenMerger["sValue"] += " " * (dToken["nStart"] - dTokenMerger["nEnd"]) + dToken["sValue"]
dTokenMerger["nEnd"] = dToken["nEnd"]
if bDebug:
print(" MERGED TOKEN:", dTokenMerger["sValue"])
bKeepToken = False
bRemoveToken = True
if "nMergeUntil" in dToken:
if dToken["i"] > nMergeUntil: # this token is not already merged with a previous token
dTokenMerger = dToken
if dToken["nMergeUntil"] > nMergeUntil:
nMergeUntil = dToken["nMergeUntil"]
del dToken["nMergeUntil"]
#
elif "bToRemove" in dToken:
if bRemoveToken or "bToRemove" in dToken:
# remove useless token
self.sSentence = self.sSentence[:dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[dToken["nEnd"]:]
if bDebug:
print(" REMOVED:", dToken["sValue"])
self.sSentence = self.sSentence[:dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[dToken["nEnd"]:]
bKeepToken = False
else:
#
if bKeepToken:
lNewToken.append(dToken)
if "sNewValue" in dToken:
# rewrite token and sentence
if bDebug:
print(dToken["sValue"], "->", dToken["sNewValue"])
dToken["sRealValue"] = dToken["sValue"]
dToken["sValue"] = dToken["sNewValue"]
|