Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -141,11 +141,11 @@ # parse sentences for iStart, iEnd in _getSentenceBoundaries(sText): if 4 < (iEnd - iStart) < 2000: try: - oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart) + oSentence = TextParser(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart) _, dErrors = _proofread(oSentence, sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dErrors, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext) except: raise return dErrors.values() # this is a view (iterable) @@ -563,13 +563,13 @@ return True -#### TOKEN SENTENCE CHECKER +#### TEXT PARSER -class TokenSentence: +class TextParser: "Text parser" def __init__ (self, sSentence, sSentence0, nOffset): self.sSentence = sSentence self.sSentence0 = sSentence0 @@ -578,11 +578,12 @@ self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken if dToken["sType"] != "INFO" } self.dTags = {} self.dError = {} def __str__ (self): - s = "sentence: " + self.sSentence0 + "\n" + s = "TEXT ==========\n" + s += "sentence: " + self.sSentence0 + "\n" s += "now: " + self.sSentence + "\n" for dToken in self.lToken: s += f'{dToken["nStart"]}\t{dToken["nEnd"]}\t{dToken["sValue"]}' if "lMorph" in dToken: s += "\t" + str(dToken["lMorph"]) @@ -682,12 +683,12 @@ if bDebug: print(" MATCH: @" + sRegex) yield dGraph[dNode[""][sRegex]] # token tags if "tags" in dToken and "" in dNode: - for sTag in dNode[""]: - if sTag in dToken["tags"]: + for sTag in dToken["tags"]: + if sTag in dNode[""]: if bDebug: print(" MATCH: /" + sTag) yield dGraph[dNode[""][sTag]] # meta arc (for token type) if "" in dNode: @@ -787,11 +788,11 @@ pass elif cActionType == "/": if bDebug: print(" SEMANTIC_TAG:\n ", dRule[sRuleId]) nTokenStart = nTokenOffset + eAct[0] - nTokenEnd = nTokenOffset + eAct[1] + nTokenEnd = nTokenOffset + (eAct[1] if eAct[1] else eAct[0]) for i in range(nTokenStart, nTokenEnd+1): if "tags" in self.lToken[i]: self.lToken[i]["tags"].update(sWhat.split("|")) else: self.lToken[i]["tags"] = set(sWhat.split("|"))