Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -78,11 +78,12 @@ # regex parser _, errs = _proofread(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dDA, dPriority, sCountry, dOpt, bDebug, bContext) aErrors.update(errs) # token parser oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart) - oSentence.parse(dPriority, sCountry, dOpt, bDebug, bContext) + _, errs = oSentence.parse(dPriority, sCountry, dOpt, bDebug, bContext) + aErrors.update(errs) except: raise return aErrors.values() # this is a view (iterable) @@ -673,45 +674,12 @@ self.sSentence = sSentence self.sSentence0 = sSentence0 self.iStart = iStart self.lToken = list(_oTokenizer.genTokens(sSentence)) - def parse (self, dPriority, sCountry="${country_default}", dOptions=None, bDebug=False, bContext=False): - dErr = {} - dPriority = {} # Key = position; value = priority - dOpt = _dOptions if not dOptions else dOptions - lPointer = [] - bIdRule = option('idrule') - for dToken in self.lToken: - # check arcs for each existing pointer - lNewPointer = [] - for i, dPointer in enumerate(lPointer): - bValid = False - bFirst = True - for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]): - if bFirst: - dPointer["nOffset"] = dToken["i"] - dPointer["dNode"] = dNode - else: - lNewPointer.append({"nOffset": dPointer["nOffset"], "dNode": dNode}) - bFirst = False - bValid = True - if not bValid: - del lPointer[i] - lPointer.extend(lNewPointer) - # check arcs of first nodes - for dNode in self._getNextMatchingNodes(dToken, dGraph[0]): - lPointer.append({"nOffset": 0, "dNode": dNode}) - # check if there is rules to check for each pointer - for dPointer in lPointer: - if "" in dPointer["dNode"]: - dErr = self._executeActions(dPointer["dNode"][""], dPointer["nOffset"], dPriority, dOpt, bIdRule, bContext) - if dErr: - print(dErr) - return dErr - def _getNextMatchingNodes (self, dToken, dNode): + "generator: return nodes where “values” match arcs" # token value if dToken["sValue"] in dNode: #print("value found: ", dToken["sValue"]) yield dGraph[dNode[dToken["sValue"]]] # token lemmas @@ -735,13 +703,52 @@ for sRegex in dNode[""]: for sMorph in _oSpellChecker.getMorph(dToken["sValue"]): if re.search(sRegex, sMorph): #print("morph regex matching: ", sRegex) yield dGraph[dNode[""][sRegex]] + + def parse (self, dPriority, sCountry="${country_default}", dOptions=None, bDebug=False, bContext=False): + dErr = {} + dPriority = {} # Key = position; value = priority + dOpt = _dOptions if not dOptions else dOptions + lPointer = [] + bIdRule = option('idrule') + bChange = False + for dToken in self.lToken: + # check arcs for each existing pointer + lNewPointer = [] + for i, dPointer in enumerate(lPointer): + bValid = False + bFirst = True + for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]): + if bFirst: + dPointer["dNode"] = dNode + else: + lNewPointer.append({"nOffset": dPointer["nOffset"], "dNode": dNode}) + bFirst = False + bValid = True + if not bValid: + del lPointer[i] + lPointer.extend(lNewPointer) + # check arcs of first nodes + for dNode in self._getNextMatchingNodes(dToken, dGraph[0]): + lPointer.append({"nOffset": 0, "dNode": dNode}) + # check if there is rules to check for each pointer + for dPointer in lPointer: + if "" in dPointer["dNode"]: + bHasChanged, errs = self._executeActions(dPointer["dNode"][""], dPointer["nOffset"], dPriority, dOpt, bIdRule, bContext) + dErr.update(errs) + if bHasChanged: + bChange = True + if dErr: + print(dErr) + return (bChange, dErr) def _executeActions (self, dNode, nTokenOffset, dPriority, dOpt, bIdRule, bContext): + print(locals()) dErrs = {} + bChange = False for sLineId, nextNodeKey in dNode.items(): for sRuleId in dGraph[nextNodeKey]: print(sRuleId) bCondMemo = None sFuncCond, cActionType, sWhat, *eAct = dRule[sRuleId] @@ -777,11 +784,11 @@ print("# error: unknown action at " + sLineId) elif cActionType == ">": break except Exception as e: raise Exception(str(e), sLineId) - return dErrs + return bChange, dErrs def _rewrite (self, sWhat, nErrorStart, nErrorEnd): "text processor: rewrite tokens between and position" lTokenValue = sWhat.split("|") if len(lTokenValue) != (nErrorEnd - nErrorStart + 1):