Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -155,21 +155,25 @@ iStart = m.end() def _proofread (oSentence, s, sx, nOffset, bParagraph, dDA, dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext): dErrs = {} - bChange = False + bParagraphChange = False + bSentenceChange = False for sOption, lRuleGroup in _getRules(bParagraph): if sOption == "@@@@": # graph rules + if not bParagraph and bSentenceChange: + oSentence.update(s) + bSentenceChange = False for sGraphName, sLineId in lRuleGroup: if bDebug: print(sGraphName, sLineId) - bChange, errs = oSentence.parse(dAllGraph[sGraphName], dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext) + bParagraphChange, errs = oSentence.parse(dAllGraph[sGraphName], dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext) dErrs.update(errs) - if bChange: - oSentence.rewrite() + if bParagraphChange: + s = oSentence.rewrite() if bDebug: print("~", oSentence.sSentence) elif not sOption or dOptions.get(sOption, False): # regex rules for zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions in lRuleGroup: @@ -188,11 +192,12 @@ dErrs[nErrorStart] = _createRegexError(s, sx, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bShowRuleId, sOption, bContext) dPriority[nErrorStart] = nPriority elif cActionType == "~": # text processor s = _rewrite(s, sWhat, eAct[0], m, bUppercase) - bChange = True + bParagraphChange = True + bSentenceChange = True if bDebug: echo("~ " + s + " -- " + m.group(eAct[0]) + " # " + sLineId) elif cActionType == "=": # disambiguation globals()[sWhat](s, m, dDA) @@ -205,11 +210,11 @@ echo("# error: unknown action at " + sLineId) elif cActionType == ">": break except Exception as e: raise Exception(str(e), "# " + sLineId + " # " + sRuleId) - if bChange: + if bParagraphChange: return (s, dErrs) return (False, dErrs) def _createRegexWriterError (s, sx, sRepl, nOffset, m, iGroup, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext): @@ -583,10 +588,14 @@ self.sSentence0 = sSentence0 self.nOffset = nOffset self.lToken = list(_oTokenizer.genTokens(sSentence, True)) self.createError = self._createWriterError if _bWriterError else self._createDictError + def update (self, sSentence): + self.sSentence = sSentence + self.lToken = list(_oTokenizer.genTokens(sSentence, True)) + def _getNextMatchingNodes (self, dToken, dGraph, dNode): "generator: return nodes where “values” match arcs" # token value if dToken["sValue"] in dNode: #print("value found: ", dToken["sValue"]) @@ -835,10 +844,11 @@ sNewRepl = (dToken["sNewValue"] + " " * nDiffLen) if nDiffLen >= 0 else dToken["sNewValue"][:len(dToken["sRealValue"])] self.sSentence = self.sSentence[:self.nOffset+dToken["nStart"]] + sNewRepl + self.sSentence[self.nOffset+dToken["nEnd"]:] del dToken["sNewValue"] self.lToken.clear() self.lToken = lNewToken + return self.sSentence #### Analyse tokens Index: graphspell/tokenizer.py ================================================================== --- graphspell/tokenizer.py +++ graphspell/tokenizer.py @@ -43,12 +43,13 @@ if sLang not in _PATTERNS: self.sLang = "default" self.zToken = re.compile( "(?i)" + '|'.join(sRegex for sRegex in _PATTERNS[sLang]) ) def genTokens (self, sText, bStartEndToken=False): + i = 0 if bStartEndToken: yield { "i": 0, "sType": "INFO", "sValue": "", "nStart": 0, "nEnd": 0 } for i, m in enumerate(self.zToken.finditer(sText), 1): yield { "i": i, "sType": m.lastgroup, "sValue": m.group(), "nStart": m.start(), "nEnd": m.end() } if bStartEndToken: iEnd = len(sText) yield { "i": i+1, "sType": "INFO", "sValue": "", "nStart": iEnd, "nEnd": iEnd }