274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
|
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
|
-
+
|
if bFullInfo:
lParagraphErrors = list(self.dError.values())
lSentences = []
self.dSentenceError.clear()
# parse sentences
sText = self._getCleanText()
for iStart, iEnd in text.getSentenceBoundaries(sText):
if 4 < (iEnd - iStart) < 2000:
if 2 < (iEnd - iStart) < 3000:
try:
self.sSentence = sText[iStart:iEnd]
self.sSentence0 = self.sText0[iStart:iEnd]
self.nOffsetWithinParagraph = iStart
self.lTokens = list(_oTokenizer.genTokens(self.sSentence, True))
self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lTokens if dToken["sType"] != "INFO" }
if bFullInfo:
|