Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -11,11 +11,11 @@ from ..graphspell.spellchecker import SpellChecker from ..graphspell.echo import echo from . import gc_options from ..graphspell.tokenizer import Tokenizer -from .gc_rules_graph import dGraph +from .gc_rules_graph import dGraph, dRule __all__ = [ "lang", "locales", "pkg", "name", "version", "author", \ "load", "parse", "getSpellChecker", \ "setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \ @@ -586,51 +586,68 @@ def parse (self): dErr = {} lPointer = [] for dToken in self.lToken: + # check arcs for each existing pointer + lNewPointer = [] for i, dPointer in enumerate(lPointer): bValid = False + bFirst = True for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]): - dPointer["nOffset"] = dToken["i"] - dPointer["dNode"] = dNode + if bFirst: + dPointer["nOffset"] = dToken["i"] + dPointer["dNode"] = dNode + else: + lNewPointer.append({"nOffset": dPointer["nOffset"], "dNode": dNode}) + bFirst = False bValid = True if not bValid: del lPointer[i] - for dNode in self._getNextMatchingNodes(dToken, dGraph): + lPointer.extend(lNewPointer) + # check arcs of first nodes + for dNode in self._getNextMatchingNodes(dToken, dGraph[0]): lPointer.append({"nOffset": 0, "dNode": dNode}) + # check if there is rules to check for each pointer for dPointer in lPointer: if "" in dPointer["dNode"]: - for dNode in dGraph[dPointer["dNode"][""]]: - dErr = self._executeActions(dNode, nOffset) + dErr = self._executeActions(dPointer["dNode"][""], dPointer["nOffset"]) + if dErr: + print(dErr) return dErr def _getNextMatchingNodes (self, dToken, dNode): # token value if dToken["sValue"] in dNode: + print("value found: ", dToken["sValue"]) yield dGraph[dNode[dToken["sValue"]]] # token lemmas if "" in dNode: for sLemma in _oSpellChecker.getLemma(dToken["sValue"]): if sLemma in dNode[""]: + print("lemma found: ", sLemma) yield dGraph[dNode[""][sLemma]] # universal arc if "*" in dNode: + print("generic arc") yield dGraph[dNode["*"]] # regex value arcs if "" in dNode: for sRegex in dNode[""]: if re.search(sRegex, dToken["sValue"]): + print("value regex matching: ", sRegex) yield dGraph[dNode[""][sRegex]] # regex morph arcs if "" in dNode: for sRegex in dNode[""]: for sMorph in _oSpellChecker.getMorph(dToken["sValue"]): if re.search(sRegex, sMorph): + print("morph regex matching: ", sRegex) yield dGraph[dNode[""][sRegex]] def _executeActions (self, dNode, nOffset): + dErrs = {} for sLineId, nextNodeKey in dNode.items(): for sArc in dGraph[nextNodeKey]: print(sArc) bCondMemo = None sFuncCond, cActionType, sWhat, *eAct = dRule[sArc] @@ -639,12 +656,12 @@ bCondMemo = not sFuncCond or globals()[sFuncCond](self, sCountry, bCondMemo) if bCondMemo: if cActionType == "-": # grammar error print("-") - nErrorStart = nSentenceOffset + m.start(eAct[0]) - nErrorEnd = nSentenceOffset + m.start(eAct[1]) + nErrorStart = self.iStart + self.lToken[eAct[0]]["nStart"] + nErrorEnd = self.iStart + self.lToken[eAct[1]]["nEnd"] if nErrorStart not in dErrs or nPriority > dPriority[nErrorStart]: dErrs[nErrorStart] = _createError(self, sWhat, nErrorStart, nErrorEnd, sLineId, bUppercase, eAct[2], eAct[3], bIdRule, sOption, bContext) dPriority[nErrorStart] = nPriority elif cActionType == "~": # text processor @@ -665,19 +682,12 @@ else: print("# error: unknown action at " + sLineId) elif cActionType == ">": break except Exception as e: - raise Exception(str(e), "# " + sLineId + " # " + sRuleId) - - def _createWriterError (self): - d = {} - return d - - def _createDictError (self): - d = {} - return d + raise Exception(str(e), sLineId) + return dErrs def _rewrite (self, sWhat, nErrorStart, nErrorEnd): "text processor: rewrite tokens between and position" lTokenValue = sWhat.split("|") if len(lTokenValue) != (nErrorEnd - nErrorStart + 1): Index: gc_lang/fr/rules_graph.grx ================================================================== --- gc_lang/fr/rules_graph.grx +++ gc_lang/fr/rules_graph.grx @@ -37,22 +37,26 @@ # ERREURS COURANTES # http://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Fautes_d%27orthographe/Courantes -__rule1__ - les ~:N:.:s - des ~:N:.:s - ces ~:N:.:s - <<- -1>> acquit # Message0|http://test.grammalecte.net - -__rule2__ - ci important que soi - ci vraiment il y a - ci pour ça - <<- morph(\2, ":[WAR]", False) -1>> si # Message1|http://test.grammalecte.net - -__rule3__ - contre ([nature|pétrie|action]) par ([ennui|sélection]) - <<- morph(\1, "xxxx") -1:2>> =\1+\2 # Message2|http://test.grammalecte.org - <<- ~1>> hyper|fonction - +__avoir_confiance_en__ + >avoir confiance (dans) [moi|toi|soi|lui|elle|nous|vous|eux|elles] + <<- -1>> en # Avoir confiance en quelqu’un ou quelque chose.|http://grammalecte.net + +TEST: Elle avait confiance {{dans}} lui. + + +__code_legacy__ + legacy code + code legacy + <<- -1:2>> code hérité|code reliquat # Anglicisme superflu. + +TEST: c’est du {{legacy code}}. +TEST: ce {{code legacy}} est un cauchemar + + +__être_en_xxxx__ + [>être|>rester|>demeurer] an [désaccord|accord] + <<- -2>> en # Confusion. Un an = une année. + +TEST: Je suis {{an}} désaccord avec lui. Index: make.py ================================================================== --- make.py +++ make.py @@ -231,12 +231,15 @@ print() # TEST FILES with open("grammalecte/"+sLang+"/gc_test.txt", "w", encoding="utf-8", newline="\n") as hDstPy: hDstPy.write("# TESTS FOR LANG [" + sLang + "]\n\n") + hDstPy.write("# REGEX RULES\n\n") hDstPy.write(dVars['regex_gctests']) + hDstPy.write("\n\n\n# GRAPH RULES\n\n") hDstPy.write(dVars['graph_gctests']) + hDstPy.write("\n") createOXT(spLang, dVars, xConfig._sections['oxt'], spLangPack, bInstallOXT) createServerOptions(sLang, dVars) createPackageZip(sLang, dVars, spLangPack)