Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -79,12 +79,14 @@ # regex parser _, errs = _proofread(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dDA, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext) aErrors.update(errs) # token parser oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart) - _, errs = oSentence.parse(dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext) + bChange, errs = oSentence.parse(dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext) aErrors.update(errs) + if bChange: + oSentence.rewrite() except: raise return aErrors.values() # this is a view (iterable) @@ -664,14 +666,14 @@ #### TOKEN SENTENCE CHECKER class TokenSentence: - def __init__ (self, sSentence, sSentence0, iStart): + def __init__ (self, sSentence, sSentence0, nOffset): self.sSentence = sSentence self.sSentence0 = sSentence0 - self.iStart = iStart + self.nOffset = nOffset self.lToken = list(_oTokenizer.genTokens(sSentence, True)) def _getNextMatchingNodes (self, dToken, dNode): "generator: return nodes where “values” match arcs" # token value @@ -750,11 +752,11 @@ if dErr: print(dErr) return (bChange, dErr) def _executeActions (self, dNode, nTokenOffset, dPriority, dOpt, sCountry, bShowRuleId, bContext): - #print(locals()) + "execute actions found in the DARG" dErrs = {} bChange = False for sLineId, nextNodeKey in dNode.items(): for sRuleId in dGraph[nextNodeKey]: print(sRuleId) @@ -767,23 +769,20 @@ if cActionType == "-": # grammar error print("-") nTokenErrorStart = nTokenOffset + eAct[0] nTokenErrorEnd = nTokenOffset + eAct[1] - nErrorStart = self.iStart + self.lToken[nTokenErrorStart]["nStart"] - nErrorEnd = self.iStart + self.lToken[nTokenErrorEnd]["nEnd"] + nErrorStart = self.nOffset + self.lToken[nTokenErrorStart]["nStart"] + nErrorEnd = self.nOffset + self.lToken[nTokenErrorEnd]["nEnd"] if nErrorStart not in dErrs or eAct[2] > dPriority[nErrorStart]: dErrs[nErrorStart] = _createTokenError(self.lToken, self.sSentence, self.sSentence0, sWhat, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, True, eAct[3], eAct[4], bShowRuleId, "notype", bContext) dPriority[nErrorStart] = eAct[2] elif cActionType == "~": # text processor print("~") - self._rewrite(sWhat, nErrorStart, nErrorEnd) - elif cActionType == "@": - # jump - print("@") - self._jump(sWhat) + self._tagAndPrepareTokenForRewriting(sWhat, nTokenOffset + eAct[0], nTokenOffset + eAct[1]) + bChange = True elif cActionType == "=": # disambiguation print("=") globals()[sWhat](self.lToken) elif cActionType == ">": @@ -796,27 +795,60 @@ break except Exception as e: raise Exception(str(e), sLineId) return bChange, dErrs - def _rewrite (self, sWhat, nErrorStart, nErrorEnd): - "text processor: rewrite tokens between and position" - lTokenValue = sWhat.split("|") - if len(lTokenValue) != (nErrorEnd - nErrorStart + 1): - print("Error. Text processor: number of replacements != number of tokens.") - return - for i, sValue in zip(range(nErrorStart, nErrorEnd+1), lTokenValue): - self.lToken[i]["sValue"] = sValue - - def _jump (self, sWhat): - try: - nFrom, nTo = sWhat.split(">") - self.lToken[int(nFrom)]["iJump"] = int(nTo) - except: - print("# Error. Jump failed: ", sWhat) - traceback.print_exc() - return + def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, bUppercase=True): + "text processor: rewrite tokens between and position" + if sWhat == "*": + # purge text + if nTokenRewriteEnd - nTokenRewriteStart == 0: + self.lToken[nTokenRewriteStart]["bToRemove"] = True + else: + for i in range(nTokenRewriteStart, nTokenRewriteEnd+1): + self.lToken[i]["bToRemove"] = True + else: + if sWhat.startswith("="): + sWhat = globals()[sWhat[1:]](self.lToken) + bUppercase = bUppercase and self.lToken[nTokenRewriteStart]["sValue"][0:1].isupper() + if nTokenRewriteEnd - nTokenRewriteStart == 0: + sWhat = sWhat + " " * (len(self.lToken[nTokenRewriteStart]["sValue"])-len(sWhat)) + if bUppercase: + sWhat = sWhat[0:1].upper() + sWhat[1:] + self.lToken[nTokenRewriteStart]["sNewValue"] = sWhat + else: + lTokenValue = sWhat.split("|") + if len(lTokenValue) != (nTokenRewriteEnd - nTokenRewriteStart + 1): + print("Error. Text processor: number of replacements != number of tokens.") + return + for i, sValue in zip(range(nTokenRewriteStart, nTokenRewriteEnd+1), lTokenValue): + if bUppercase: + sValue = sValue[0:1].upper() + sValue[1:] + self.lToken[i]["sNewValue"] = sValue + + def rewrite (self): + "rewrite the sentence, modify tokens, purge the token list" + lNewToken = [] + for i, dToken in enumerate(self.lToken): + if "bToRemove" in dToken: + # remove useless token + self.sSentence = self.sSentence[:self.nOffset+dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[self.nOffset+dToken["nEnd"]:] + #print("removed:", dToken["sValue"]) + else: + lNewToken.append(dToken) + if "sNewValue" in dToken: + # rewrite token and sentence + print(dToken["sValue"], "->", dToken["sNewValue"]) + dToken["sRealValue"] = dToken["sValue"] + dToken["sValue"] = dToken["sNewValue"] + nDiffLen = len(dToken["sRealValue"]) - len(dToken["sNewValue"]) + sNewRepl = (dToken["sNewValue"] + " " * nDiffLen) if nDiffLen >= 0 else dToken["sNewValue"][:len(dToken["sRealValue"])] + self.sSentence = self.sSentence[:self.nOffset+dToken["nStart"]] + sNewRepl + self.sSentence[self.nOffset+dToken["nEnd"]:] + del dToken["sNewValue"] + print(self.sSentence) + self.lToken.clear() + self.lToken = lNewToken #### Analyse tokens def g_morph (dToken, sPattern, sNegPattern=""): Index: gc_lang/fr/rules_graph.grx ================================================================== --- gc_lang/fr/rules_graph.grx +++ gc_lang/fr/rules_graph.grx @@ -36,10 +36,24 @@ # Fin d’interprétation du fichier avec une ligne commençant par #END # ERREURS COURANTES # http://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Fautes_d%27orthographe/Courantes + +__pp__ + >avoir marre [d’|des|du|de] + <<- ~1:3>> * + +TEST: J’en ai marre de ces gens-là. + + +__pp2__ + il ne pense qu’ à sa gueule + <<- ~4:7>> que|Z|a|perdu + +TEST: il ne pense qu’à sa gueule. + __avoir_confiance_en__ >avoir confiance (dans) [moi|toi|soi|lui|elle|nous|vous|eux|elles] <<- -1>> en # Avoir confiance en quelqu’un ou quelque chose.|http://grammalecte.net @@ -63,14 +77,15 @@ __faire_plaisir__ >faire plaisirs <<- -2>> plaisir # Faire plaisir : dans cette locution, “plaisir” doit être au singulier. + <<- ~2>> * TEST: Ça me fait {{plaisirs}}. __test__ je ~préf[éè]r [que|qu’] @(?::Os|:M)¬:X @:I <<- morph(\1, ":V") and morph(\4, ":Os|:M", ":X") -5>> SUBJONCTIF # SUBJONCTIF. TEST: je préférerais qu’Isabelle {{est}} partie.