@@ -11,11 +11,11 @@ from ..graphspell.spellchecker import SpellChecker from ..graphspell.echo import echo from . import gc_options from ..graphspell.tokenizer import Tokenizer -from .gc_rules_graph import dGraph +from .gc_rules_graph import dGraph, dRule __all__ = [ "lang", "locales", "pkg", "name", "version", "author", \ "load", "parse", "getSpellChecker", \ "setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \ @@ -586,51 +586,68 @@ def parse (self): dErr = {} lPointer = [] for dToken in self.lToken: + # check arcs for each existing pointer + lNewPointer = [] for i, dPointer in enumerate(lPointer): bValid = False + bFirst = True for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]): - dPointer["nOffset"] = dToken["i"] - dPointer["dNode"] = dNode + if bFirst: + dPointer["nOffset"] = dToken["i"] + dPointer["dNode"] = dNode + else: + lNewPointer.append({"nOffset": dPointer["nOffset"], "dNode": dNode}) + bFirst = False bValid = True if not bValid: del lPointer[i] - for dNode in self._getNextMatchingNodes(dToken, dGraph): + lPointer.extend(lNewPointer) + # check arcs of first nodes + for dNode in self._getNextMatchingNodes(dToken, dGraph[0]): lPointer.append({"nOffset": 0, "dNode": dNode}) + # check if there is rules to check for each pointer for dPointer in lPointer: if "" in dPointer["dNode"]: - for dNode in dGraph[dPointer["dNode"][""]]: - dErr = self._executeActions(dNode, nOffset) + dErr = self._executeActions(dPointer["dNode"][""], dPointer["nOffset"]) + if dErr: + print(dErr) return dErr def _getNextMatchingNodes (self, dToken, dNode): # token value if dToken["sValue"] in dNode: + print("value found: ", dToken["sValue"]) yield dGraph[dNode[dToken["sValue"]]] # token lemmas if "" in dNode: for sLemma in _oSpellChecker.getLemma(dToken["sValue"]): if sLemma in dNode[""]: + print("lemma found: ", sLemma) yield dGraph[dNode[""][sLemma]] # universal arc if "*" in dNode: + print("generic arc") yield dGraph[dNode["*"]] # regex value arcs if "" in dNode: for sRegex in dNode[""]: if re.search(sRegex, dToken["sValue"]): + print("value regex matching: ", sRegex) yield dGraph[dNode[""][sRegex]] # regex morph arcs if "" in dNode: for sRegex in dNode[""]: for sMorph in _oSpellChecker.getMorph(dToken["sValue"]): if re.search(sRegex, sMorph): + print("morph regex matching: ", sRegex) yield dGraph[dNode[""][sRegex]] def _executeActions (self, dNode, nOffset): + dErrs = {} for sLineId, nextNodeKey in dNode.items(): for sArc in dGraph[nextNodeKey]: print(sArc) bCondMemo = None sFuncCond, cActionType, sWhat, *eAct = dRule[sArc] @@ -639,12 +656,12 @@ bCondMemo = not sFuncCond or globals()[sFuncCond](self, sCountry, bCondMemo) if bCondMemo: if cActionType == "-": # grammar error print("-") - nErrorStart = nSentenceOffset + m.start(eAct[0]) - nErrorEnd = nSentenceOffset + m.start(eAct[1]) + nErrorStart = self.iStart + self.lToken[eAct[0]]["nStart"] + nErrorEnd = self.iStart + self.lToken[eAct[1]]["nEnd"] if nErrorStart not in dErrs or nPriority > dPriority[nErrorStart]: dErrs[nErrorStart] = _createError(self, sWhat, nErrorStart, nErrorEnd, sLineId, bUppercase, eAct[2], eAct[3], bIdRule, sOption, bContext) dPriority[nErrorStart] = nPriority elif cActionType == "~": # text processor @@ -665,19 +682,12 @@ else: print("# error: unknown action at " + sLineId) elif cActionType == ">": break except Exception as e: - raise Exception(str(e), "# " + sLineId + " # " + sRuleId) - - def _createWriterError (self): - d = {} - return d - - def _createDictError (self): - d = {} - return d + raise Exception(str(e), sLineId) + return dErrs def _rewrite (self, sWhat, nErrorStart, nErrorEnd): "text processor: rewrite tokens between and position" lTokenValue = sWhat.split("|") if len(lTokenValue) != (nErrorEnd - nErrorStart + 1):