280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
|
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
|
-
+
|
try:
self.sSentence = sText[iStart:iEnd]
self.sSentence0 = self.sText0[iStart:iEnd]
self.nOffsetWithinParagraph = iStart
self.lTokens = list(_oTokenizer.genTokens(self.sSentence, True))
self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lTokens if dToken["sType"] != "INFO" }
if bFullInfo:
self.lTokens0 = list(self.lTokens) # the list of tokens is duplicated, to keep all tokens from being deleted when analysis
self.lTokens0 = list(self.lTokens) # the list of tokens is duplicated, to keep tokens from being deleted when analysis
self.parseText(self.sSentence, self.sSentence0, False, iStart, sCountry, dOpt, bShowRuleId, bDebug, bContext)
if bFullInfo:
for dToken in self.lTokens0:
if dToken["sType"] == "WORD":
dToken["bValidToken"] = _oSpellChecker.isValidToken(dToken["sValue"])
if "lMorph" not in dToken:
dToken["lMorph"] = _oSpellChecker.getMorph(dToken["sValue"])
|
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
|
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
|
-
-
-
+
+
|
echo("REWRITE")
lNewTokens = []
lNewTokens0 = []
nMergeUntil = 0
dTokenMerger = {}
for iToken, dToken in enumerate(self.lTokens):
bKeepToken = True
bKeepToken0 = True
if dToken["sType"] != "INFO":
if nMergeUntil and iToken <= nMergeUntil:
# token to merge
dTokenMerger["sValue"] += " " * (dToken["nStart"] - dTokenMerger["nEnd"]) + dToken["sValue"]
dTokenMerger["nEnd"] = dToken["nEnd"]
if bDebug:
echo(" MERGED TOKEN: " + dTokenMerger["sValue"])
bKeepToken = False
bKeepToken0 = False
dToken["bMerged"] = True
bKeepToken = False
if "nMergeUntil" in dToken:
# first token to be merge with
if iToken > nMergeUntil: # this token is not to be merged with a previous token
dTokenMerger = dToken
if dToken["nMergeUntil"] > nMergeUntil:
nMergeUntil = dToken["nMergeUntil"]
del dToken["nMergeUntil"]
|
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
|
879
880
881
882
883
884
885
886
887
888
889
|
-
-
-
-
-
|
del dToken["sNewValue"]
else:
try:
del self.dTokenPos[dToken["nStart"]]
except KeyError:
echo(self)
echo(dToken)
if self.lTokens0 is not None and bKeepToken0:
lNewTokens0.append(dToken)
if bDebug:
echo(" TEXT REWRITED: " + self.sSentence)
self.lTokens.clear()
self.lTokens = lNewTokens
if self.lTokens0 is not None:
self.lTokens0.clear()
self.lTokens0 = lNewTokens0
|