Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -283,11 +283,11 @@ self.update(sText, bDebug) bChange = False for sGraphName, sLineId in lRuleGroup: if sGraphName not in dOptions or dOptions[sGraphName]: if bDebug: - print("\n>>>> GRAPH:", sGraphName, sLineId) + echo("\n>>>> GRAPH:", sGraphName, sLineId) sText = self.parseGraph(_rules_graph.dAllGraph[sGraphName], sCountry, dOptions, bShowRuleId, bDebug, bContext) elif not sOption or dOptions.get(sOption, False): # regex rules for zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions in lRuleGroup: if sRuleId not in _aIgnoredRules: @@ -297,11 +297,11 @@ # action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ] try: bCondMemo = not sFuncCond or globals()[sFuncCond](sText, sText0, m, self.dTokenPos, sCountry, bCondMemo) if bCondMemo: if bDebug: - print("RULE:", sLineId) + echo("RULE:", sLineId) if cActionType == "-": # grammar error nErrorStart = nOffset + m.start(eAct[0]) if nErrorStart not in self.dError or nPriority > self.dErrorPriority.get(nErrorStart, -1): self.dError[nErrorStart] = self._createErrorFromRegex(sText, sText0, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bShowRuleId, sOption, bContext) @@ -341,42 +341,42 @@ if "lMorph" in self.dTokenPos.get(dToken["nStart"], {}): dToken["lMorph"] = self.dTokenPos[dToken["nStart"]]["lMorph"] self.lToken = lNewToken self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken if dToken["sType"] != "INFO" } if bDebug: - print("UPDATE:") - print(self) + echo("UPDATE:") + echo(self) def _getNextPointers (self, dToken, dGraph, dPointer, bDebug=False): "generator: return nodes where “values” match arcs" dNode = dPointer["dNode"] iNode1 = dPointer["iNode1"] bTokenFound = False # token value if dToken["sValue"] in dNode: if bDebug: - print(" MATCH:", dToken["sValue"]) + echo(" MATCH:", dToken["sValue"]) yield { "iNode1": iNode1, "dNode": dGraph[dNode[dToken["sValue"]]] } bTokenFound = True if dToken["sValue"][0:2].istitle(): # we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". sValue = dToken["sValue"].lower() if sValue in dNode: if bDebug: - print(" MATCH:", sValue) + echo(" MATCH:", sValue) yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] } bTokenFound = True elif dToken["sValue"].isupper(): sValue = dToken["sValue"].lower() if sValue in dNode: if bDebug: - print(" MATCH:", sValue) + echo(" MATCH:", sValue) yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] } bTokenFound = True sValue = dToken["sValue"].capitalize() if sValue in dNode: if bDebug: - print(" MATCH:", sValue) + echo(" MATCH:", sValue) yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] } bTokenFound = True # regex value arcs if dToken["sType"] not in frozenset(["INFO", "PUNC", "SIGN"]): if "" in dNode: @@ -383,31 +383,31 @@ for sRegex in dNode[""]: if "¬" not in sRegex: # no anti-pattern if re.search(sRegex, dToken["sValue"]): if bDebug: - print(" MATCH: ~" + sRegex) + echo(" MATCH: ~" + sRegex) yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] } bTokenFound = True else: # there is an anti-pattern sPattern, sNegPattern = sRegex.split("¬", 1) if sNegPattern and re.search(sNegPattern, dToken["sValue"]): continue if not sPattern or re.search(sPattern, dToken["sValue"]): if bDebug: - print(" MATCH: ~" + sRegex) + echo(" MATCH: ~" + sRegex) yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] } bTokenFound = True # analysable tokens if dToken["sType"][0:4] == "WORD": # token lemmas if "" in dNode: for sLemma in _oSpellChecker.getLemma(dToken["sValue"]): if sLemma in dNode[""]: if bDebug: - print(" MATCH: >" + sLemma) + echo(" MATCH: >" + sLemma) yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sLemma]] } bTokenFound = True # regex morph arcs if "" in dNode: for sRegex in dNode[""]: @@ -414,11 +414,11 @@ if "¬" not in sRegex: # no anti-pattern lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"])) if any(re.search(sRegex, sMorph) for sMorph in lMorph): if bDebug: - print(" MATCH: @" + sRegex) + echo(" MATCH: @" + sRegex) yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] } bTokenFound = True else: # there is an anti-pattern sPattern, sNegPattern = sRegex.split("¬", 1) @@ -426,43 +426,43 @@ # all morphologies must match with if sPattern: lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"])) if lMorph and all(re.search(sPattern, sMorph) for sMorph in lMorph): if bDebug: - print(" MATCH: @" + sRegex) + echo(" MATCH: @" + sRegex) yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] } bTokenFound = True else: lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"])) if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in lMorph): continue if not sPattern or any(re.search(sPattern, sMorph) for sMorph in lMorph): if bDebug: - print(" MATCH: @" + sRegex) + echo(" MATCH: @" + sRegex) yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] } bTokenFound = True # token tags if "tags" in dToken and "" in dNode: for sTag in dToken["tags"]: if sTag in dNode[""]: if bDebug: - print(" MATCH: /" + sTag) + echo(" MATCH: /" + sTag) yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sTag]] } bTokenFound = True # meta arc (for token type) if "" in dNode: for sMeta in dNode[""]: # no regex here, we just search if exists within if sMeta == "*" or dToken["sType"] == sMeta: if bDebug: - print(" MATCH: *" + sMeta) + echo(" MATCH: *" + sMeta) yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sMeta]] } bTokenFound = True elif "¬" in sMeta: if dToken["sType"] not in sMeta: if bDebug: - print(" MATCH: *" + sMeta) + echo(" MATCH: *" + sMeta) yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sMeta]] } bTokenFound = True if "bKeep" in dPointer and not bTokenFound: yield dPointer # JUMP @@ -476,11 +476,11 @@ dOpt = _dOptions if not dOptions else dOptions lPointer = [] bTagAndRewrite = False for iToken, dToken in enumerate(self.lToken): if bDebug: - print("TOKEN:", dToken["sValue"]) + echo("TOKEN:", dToken["sValue"]) # check arcs for each existing pointer lNextPointer = [] for dPointer in lPointer: lNextPointer.extend(self._getNextPointers(dToken, dGraph, dPointer, bDebug)) lPointer = lNextPointer @@ -487,19 +487,19 @@ # check arcs of first nodes lPointer.extend(self._getNextPointers(dToken, dGraph, { "iNode1": iToken, "dNode": dGraph[0] }, bDebug)) # check if there is rules to check for each pointer for dPointer in lPointer: #if bDebug: - # print("+", dPointer) + # echo("+", dPointer) if "" in dPointer["dNode"]: bChange = self._executeActions(dGraph, dPointer["dNode"][""], dPointer["iNode1"]-1, iToken, dOpt, sCountry, bShowRuleId, bDebug, bContext) if bChange: bTagAndRewrite = True if bTagAndRewrite: self.rewriteFromTags(bDebug) if bDebug: - print(self) + echo(self) return self.sSentence def _executeActions (self, dGraph, dNode, nTokenOffset, nLastToken, dOptions, sCountry, bShowRuleId, bDebug, bContext): "execute actions found in the DARG" bChange = False @@ -506,11 +506,11 @@ for sLineId, nextNodeKey in dNode.items(): bCondMemo = None for sRuleId in dGraph[nextNodeKey]: try: if bDebug: - print(" >TRY:", sRuleId) + echo(" >TRY:", sRuleId) sOption, sFuncCond, cActionType, sWhat, *eAct = _rules_graph.dRule[sRuleId] # Suggestion [ option, condition, "-", replacement/suggestion/action, iTokenStart, iTokenEnd, cStartLimit, cEndLimit, bCaseSvty, nPriority, sMessage, sURL ] # TextProcessor [ option, condition, "~", replacement/suggestion/action, iTokenStart, iTokenEnd, bCaseSvty ] # Disambiguator [ option, condition, "=", replacement/suggestion/action ] # Tag [ option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ] @@ -529,29 +529,29 @@ nErrorEnd = self.nOffsetWithinParagraph + (self.lToken[nTokenErrorEnd]["nEnd"] if cEndLimit == ">" else self.lToken[nTokenErrorEnd]["nStart"]) if nErrorStart not in self.dError or nPriority > self.dErrorPriority.get(nErrorStart, -1): self.dError[nErrorStart] = self._createErrorFromTokens(sWhat, nTokenOffset, nLastToken, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, bCaseSvty, sMessage, sURL, bShowRuleId, sOption, bContext) self.dErrorPriority[nErrorStart] = nPriority if bDebug: - print(" NEW_ERROR: ", sRuleId, sLineId, ": ", self.dError[nErrorStart]) + echo(" NEW_ERROR: ", sRuleId, sLineId, ": ", self.dError[nErrorStart]) elif cActionType == "~": # text processor nTokenStart = nTokenOffset + eAct[0] if eAct[0] > 0 else nLastToken + eAct[0] nTokenEnd = nTokenOffset + eAct[1] if eAct[1] > 0 else nLastToken + eAct[1] self._tagAndPrepareTokenForRewriting(sWhat, nTokenStart, nTokenEnd, nTokenOffset, nLastToken, eAct[2], bDebug) bChange = True if bDebug: - print(" TEXT_PROCESSOR: ", sRuleId, sLineId) - print(" ", self.lToken[nTokenStart]["sValue"], ":", self.lToken[nTokenEnd]["sValue"], " >", sWhat) + echo(" TEXT_PROCESSOR: ", sRuleId, sLineId) + echo(" ", self.lToken[nTokenStart]["sValue"], ":", self.lToken[nTokenEnd]["sValue"], " >", sWhat) elif cActionType == "=": # disambiguation globals()[sWhat](self.lToken, nTokenOffset, nLastToken) if bDebug: - print(" DISAMBIGUATOR: ", sRuleId, sLineId, "("+sWhat+")", self.lToken[nTokenOffset+1]["sValue"], ":", self.lToken[nLastToken]["sValue"]) + echo(" DISAMBIGUATOR: ", sRuleId, sLineId, "("+sWhat+")", self.lToken[nTokenOffset+1]["sValue"], ":", self.lToken[nLastToken]["sValue"]) elif cActionType == ">": # we do nothing, this test is just a condition to apply all following actions if bDebug: - print(" COND_OK: ", sRuleId, sLineId) + echo(" COND_OK: ", sRuleId, sLineId) pass elif cActionType == "/": # Tag nTokenStart = nTokenOffset + eAct[0] if eAct[0] > 0 else nLastToken + eAct[0] nTokenEnd = nTokenOffset + eAct[1] if eAct[1] > 0 else nLastToken + eAct[1] @@ -559,21 +559,21 @@ if "tags" in self.lToken[i]: self.lToken[i]["tags"].update(sWhat.split("|")) else: self.lToken[i]["tags"] = set(sWhat.split("|")) if bDebug: - print(" TAG: ", sRuleId, sLineId) - print(" ", sWhat, " >", self.lToken[nTokenStart]["sValue"], ":", self.lToken[nTokenEnd]["sValue"]) + echo(" TAG: ", sRuleId, sLineId) + echo(" ", sWhat, " >", self.lToken[nTokenStart]["sValue"], ":", self.lToken[nTokenEnd]["sValue"]) if sWhat not in self.dTags: self.dTags[sWhat] = [nTokenStart, nTokenStart] else: self.dTags[sWhat][0] = min(nTokenStart, self.dTags[sWhat][0]) self.dTags[sWhat][1] = max(nTokenEnd, self.dTags[sWhat][1]) elif cActionType == "%": # immunity if bDebug: - print(" IMMUNITY:\n ", _rules_graph.dRule[sRuleId]) + echo(" IMMUNITY:\n ", _rules_graph.dRule[sRuleId]) nTokenStart = nTokenOffset + eAct[0] if eAct[0] > 0 else nLastToken + eAct[0] nTokenEnd = nTokenOffset + eAct[1] if eAct[1] > 0 else nLastToken + eAct[1] if nTokenEnd - nTokenStart == 0: self.lToken[nTokenStart]["bImmune"] = True nErrorStart = self.nOffsetWithinParagraph + self.lToken[nTokenStart]["nStart"] @@ -584,14 +584,14 @@ self.lToken[i]["bImmune"] = True nErrorStart = self.nOffsetWithinParagraph + self.lToken[i]["nStart"] if nErrorStart in self.dError: del self.dError[nErrorStart] else: - print("# error: unknown action at " + sLineId) + echo("# error: unknown action at " + sLineId) elif cActionType == ">": if bDebug: - print(" COND_BREAK: ", sRuleId, sLineId) + echo(" COND_BREAK: ", sRuleId, sLineId) break except Exception as e: raise Exception(str(e), sLineId, sRuleId, self.sSentence) return bChange @@ -673,17 +673,17 @@ dErr['sBefore'] = self.sSentence0[max(0,nStart-80):nStart] dErr['sAfter'] = self.sSentence0[nEnd:nEnd+80] return dErr def _expand (self, sText, nTokenOffset, nLastToken): - #print("*", sText) + #echo("*", sText) for m in re.finditer(r"\\(-?[0-9]+)", sText): if m.group(1)[0:1] == "-": sText = sText.replace(m.group(0), self.lToken[nLastToken+int(m.group(1))+1]["sValue"]) else: sText = sText.replace(m.group(0), self.lToken[nTokenOffset+int(m.group(1))]["sValue"]) - #print(">", sText) + #echo(">", sText) return sText def rewriteText (self, sText, sRepl, iGroup, m, bUppercase): "text processor: write in at position" nLen = m.end(iGroup) - m.start(iGroup) @@ -702,11 +702,11 @@ return sText[0:m.start(iGroup)] + sNew + sText[m.end(iGroup):] def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, nTokenOffset, nLastToken, bCaseSvty, bDebug): "text processor: rewrite tokens between and position" if bDebug: - print(" START:", nTokenRewriteStart, "END:", nTokenRewriteEnd) + echo(" START:", nTokenRewriteStart, "END:", nTokenRewriteEnd) if sWhat == "*": # purge text if nTokenRewriteEnd - nTokenRewriteStart == 0: self.lToken[nTokenRewriteStart]["bToRemove"] = True else: @@ -735,11 +735,11 @@ self.lToken[nTokenRewriteStart]["sNewValue"] = sWhat else: # several tokens lTokenValue = sWhat.split("|") if len(lTokenValue) != (nTokenRewriteEnd - nTokenRewriteStart + 1): - print("Error. Text processor: number of replacements != number of tokens.") + echo("Error. Text processor: number of replacements != number of tokens.") return for i, sValue in zip(range(nTokenRewriteStart, nTokenRewriteEnd+1), lTokenValue): if not sValue or sValue == "*": self.lToken[i]["bToRemove"] = True else: @@ -748,11 +748,11 @@ self.lToken[i]["sNewValue"] = sValue def rewriteFromTags (self, bDebug=False): "rewrite the sentence, modify tokens, purge the token list" if bDebug: - print("REWRITE") + echo("REWRITE") lNewToken = [] nMergeUntil = 0 dTokenMerger = None for iToken, dToken in enumerate(self.lToken): bKeepToken = True @@ -759,30 +759,30 @@ if dToken["sType"] != "INFO": if nMergeUntil and iToken <= nMergeUntil: dTokenMerger["sValue"] += " " * (dToken["nStart"] - dTokenMerger["nEnd"]) + dToken["sValue"] dTokenMerger["nEnd"] = dToken["nEnd"] if bDebug: - print(" MERGED TOKEN:", dTokenMerger["sValue"]) + echo(" MERGED TOKEN:", dTokenMerger["sValue"]) bKeepToken = False if "nMergeUntil" in dToken: if iToken > nMergeUntil: # this token is not already merged with a previous token dTokenMerger = dToken if dToken["nMergeUntil"] > nMergeUntil: nMergeUntil = dToken["nMergeUntil"] del dToken["nMergeUntil"] elif "bToRemove" in dToken: if bDebug: - print(" REMOVED:", dToken["sValue"]) + echo(" REMOVED:", dToken["sValue"]) self.sSentence = self.sSentence[:dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[dToken["nEnd"]:] bKeepToken = False # if bKeepToken: lNewToken.append(dToken) if "sNewValue" in dToken: # rewrite token and sentence if bDebug: - print(dToken["sValue"], "->", dToken["sNewValue"]) + echo(dToken["sValue"], "->", dToken["sNewValue"]) dToken["sRealValue"] = dToken["sValue"] dToken["sValue"] = dToken["sNewValue"] nDiffLen = len(dToken["sRealValue"]) - len(dToken["sNewValue"]) sNewRepl = (dToken["sNewValue"] + " " * nDiffLen) if nDiffLen >= 0 else dToken["sNewValue"][:len(dToken["sRealValue"])] self.sSentence = self.sSentence[:dToken["nStart"]] + sNewRepl + self.sSentence[dToken["nEnd"]:] @@ -789,15 +789,15 @@ del dToken["sNewValue"] else: try: del self.dTokenPos[dToken["nStart"]] except: - print(self) - print(dToken) + echo(self) + echo(dToken) exit() if bDebug: - print(" TEXT REWRITED:", self.sSentence) + echo(" TEXT REWRITED:", self.sSentence) self.lToken.clear() self.lToken = lNewToken #### common functions @@ -879,11 +879,11 @@ return True lMorph = _oSpellChecker.getMorph(tWord[1]) if not lMorph: echo("> not in dictionary") return True - print("TOKENS:", dTokenPos) + echo("TOKENS:", dTokenPos) if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]]: echo("DA: " + str(dTokenPos[tWord[0]]["lMorph"])) echo("FSA: " + str(lMorph)) return True @@ -1070,11 +1070,11 @@ def select (dTokenPos, nPos, sWord, sPattern, lDefault=None): "Disambiguation: select morphologies of matching " if not sWord: return True if nPos not in dTokenPos: - print("Error. There should be a token at this position: ", nPos) + echo("Error. There should be a token at this position: ", nPos) return True lMorph = _oSpellChecker.getMorph(sWord) if not lMorph or len(lMorph) == 1: return True lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ] @@ -1089,11 +1089,11 @@ def exclude (dTokenPos, nPos, sWord, sPattern, lDefault=None): "Disambiguation: exclude morphologies of matching " if not sWord: return True if nPos not in dTokenPos: - print("Error. There should be a token at this position: ", nPos) + echo("Error. There should be a token at this position: ", nPos) return True lMorph = _oSpellChecker.getMorph(sWord) if not lMorph or len(lMorph) == 1: return True lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ] @@ -1106,11 +1106,11 @@ def define (dTokenPos, nPos, lMorph): "Disambiguation: set morphologies of token at with " if nPos not in dTokenPos: - print("Error. There should be a token at this position: ", nPos) + echo("Error. There should be a token at this position: ", nPos) return True dTokenPos[nPos]["lMorph"] = lMorph return True @@ -1120,44 +1120,44 @@ "select morphologies for according to , always return True" lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) if not lMorph or len(lMorph) == 1: if lDefault: dToken["lMorph"] = lDefault - #print("DA:", dToken["sValue"], dToken["lMorph"]) + #echo("DA:", dToken["sValue"], dToken["lMorph"]) return True lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ] if lSelect: if len(lSelect) != len(lMorph): dToken["lMorph"] = lSelect elif lDefault: dToken["lMorph"] = lDefault - #print("DA:", dToken["sValue"], dToken["lMorph"]) + #echo("DA:", dToken["sValue"], dToken["lMorph"]) return True def g_exclude (dToken, sPattern, lDefault=None): "select morphologies for according to , always return True" lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) if not lMorph or len(lMorph) == 1: if lDefault: dToken["lMorph"] = lDefault - #print("DA:", dToken["sValue"], dToken["lMorph"]) + #echo("DA:", dToken["sValue"], dToken["lMorph"]) return True lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ] if lSelect: if len(lSelect) != len(lMorph): dToken["lMorph"] = lSelect elif lDefault: dToken["lMorph"] = lDefault - #print("DA:", dToken["sValue"], dToken["lMorph"]) + #echo("DA:", dToken["sValue"], dToken["lMorph"]) return True def g_define (dToken, lMorph): "set morphologies of , always return True" dToken["lMorph"] = lMorph - #print("DA:", dToken["sValue"], lMorph) + #echo("DA:", dToken["sValue"], lMorph) return True def g_define_from (dToken, nLeft=None, nRight=None): if nLeft is not None: