Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -158,17 +158,18 @@ dTokenPos = oSentence.dTokenPos if oSentence else {} for sOption, lRuleGroup in _getRules(bParagraph): if sOption == "@@@@": # graph rules if not bParagraph and bSentenceChange: - oSentence.update(s) + oSentence.update(s, bDebug) bSentenceChange = False for sGraphName, sLineId in lRuleGroup: if bDebug: print("\n>>>> GRAPH:", sGraphName, sLineId) bParagraphChange, s = oSentence.parse(dAllGraph[sGraphName], dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext) dErrs.update(oSentence.dError) + dTokenPos = oSentence.dTokenPos elif not sOption or dOptions.get(sOption, False): # regex rules for zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions in lRuleGroup: if sRuleId not in _aIgnoredRules: for m in zRegex.finditer(s): @@ -176,10 +177,12 @@ for sFuncCond, cActionType, sWhat, *eAct in lActions: # action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ] try: bCondMemo = not sFuncCond or globals()[sFuncCond](s, sx, m, dTokenPos, sCountry, bCondMemo) if bCondMemo: + if bDebug: + print("RULE:", sLineId) if cActionType == "-": # grammar error nErrorStart = nOffset + m.start(eAct[0]) if nErrorStart not in dErrs or nPriority > dPriority.get(nErrorStart, -1): dErrs[nErrorStart] = _createError(s, sx, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bShowRuleId, sOption, bContext) @@ -409,18 +412,13 @@ def morphex (dTokenPos, tWord, sPattern, sNegPattern, bNoWord=False): "analyse a tuple (position, word), returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)" if not tWord: return bNoWord - lMorph = dTokenPos[tWord[0]]["lMorph"] if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]] else _oSpellChecker.getMorph(tWord[1]) if not lMorph: return False - if (tWord[1].startswith("noir")): - print(tWord) - print(dTokenPos) - print(lMorph) # check negative condition zNegPattern = re.compile(sNegPattern) if any(zNegPattern.search(s) for s in lMorph): return False # search sPattern @@ -576,11 +574,11 @@ def __init__ (self, sSentence, sSentence0, nOffset): self.sSentence = sSentence self.sSentence0 = sSentence0 self.nOffsetWithinParagraph = nOffset self.lToken = list(_oTokenizer.genTokens(sSentence, True)) - self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken } + self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken if dToken["sType"] != "INFO" } self.dTags = {} self.dError = {} def __str__ (self): s = "sentence: " + self.sSentence0 + "\n" @@ -592,14 +590,22 @@ s += "\n" for nPos, dToken in self.dTokenPos.items(): s += f"{nPos}\t{dToken}\n" return s - def update (self, sSentence): + def update (self, sSentence, bDebug=False): "update and retokenize" self.sSentence = sSentence - self.lToken = list(_oTokenizer.genTokens(sSentence, True)) + lNewToken = list(_oTokenizer.genTokens(sSentence, True)) + for dToken in lNewToken: + if "lMorph" in self.dTokenPos.get(dToken["nStart"], {}): + dToken["lMorph"] = self.dTokenPos[dToken["nStart"]]["lMorph"] + self.lToken = lNewToken + self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken if dToken["sType"] != "INFO" } + if bDebug: + print("UPDATE:") + print(self) def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False): "generator: return nodes where “values” match arcs" # token value if dToken["sValue"] in dNode: @@ -913,33 +919,34 @@ lNewToken = [] nMergeUntil = 0 dTokenMerger = None for dToken in self.lToken: bKeepToken = True - if "bImmune" in dToken: - nErrorStart = self.nOffsetWithinParagraph + dToken["nStart"] - if nErrorStart in self.dError: - if bDebug: - print("immunity -> error removed:", self.dError[nErrorStart]) - del self.dError[nErrorStart] - if nMergeUntil and dToken["i"] <= nMergeUntil: - dTokenMerger["sValue"] += " " * (dToken["nStart"] - dTokenMerger["nEnd"]) + dToken["sValue"] - dTokenMerger["nEnd"] = dToken["nEnd"] - if bDebug: - print(" MERGED TOKEN:", dTokenMerger["sValue"]) - bKeepToken = False - if "nMergeUntil" in dToken: - if dToken["i"] > nMergeUntil: # this token is not already merged with a previous token - dTokenMerger = dToken - if dToken["nMergeUntil"] > nMergeUntil: - nMergeUntil = dToken["nMergeUntil"] - del dToken["nMergeUntil"] - elif "bToRemove" in dToken: - if bDebug: - print(" REMOVED:", dToken["sValue"]) - self.sSentence = self.sSentence[:dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[dToken["nEnd"]:] - bKeepToken = False + if dToken["sType"] != "INFO": + if "bImmune" in dToken: + nErrorStart = self.nOffsetWithinParagraph + dToken["nStart"] + if nErrorStart in self.dError: + if bDebug: + print("immunity -> error removed:", self.dError[nErrorStart]) + del self.dError[nErrorStart] + if nMergeUntil and dToken["i"] <= nMergeUntil: + dTokenMerger["sValue"] += " " * (dToken["nStart"] - dTokenMerger["nEnd"]) + dToken["sValue"] + dTokenMerger["nEnd"] = dToken["nEnd"] + if bDebug: + print(" MERGED TOKEN:", dTokenMerger["sValue"]) + bKeepToken = False + if "nMergeUntil" in dToken: + if dToken["i"] > nMergeUntil: # this token is not already merged with a previous token + dTokenMerger = dToken + if dToken["nMergeUntil"] > nMergeUntil: + nMergeUntil = dToken["nMergeUntil"] + del dToken["nMergeUntil"] + elif "bToRemove" in dToken: + if bDebug: + print(" REMOVED:", dToken["sValue"]) + self.sSentence = self.sSentence[:dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[dToken["nEnd"]:] + bKeepToken = False # if bKeepToken: lNewToken.append(dToken) if "sNewValue" in dToken: # rewrite token and sentence @@ -950,11 +957,16 @@ nDiffLen = len(dToken["sRealValue"]) - len(dToken["sNewValue"]) sNewRepl = (dToken["sNewValue"] + " " * nDiffLen) if nDiffLen >= 0 else dToken["sNewValue"][:len(dToken["sRealValue"])] self.sSentence = self.sSentence[:dToken["nStart"]] + sNewRepl + self.sSentence[dToken["nEnd"]:] del dToken["sNewValue"] else: - del self.dTokenPos[dToken["nStart"]] + try: + del self.dTokenPos[dToken["nStart"]] + except: + print(self) + print(dToken) + exit() if bDebug: print(" REWRITED:", self.sSentence) self.lToken.clear() self.lToken = lNewToken