Index: compile_rules_graph.py ================================================================== --- compile_rules_graph.py +++ compile_rules_graph.py @@ -22,11 +22,11 @@ s = re.sub(r"isRealEnd *\(\)", 'after([""])', s) s = re.sub(r"isEnd0 *\(\)", 'after0(["", ","])', s) s = re.sub(r"isRealEnd0 *\(\)", 'after0([""])', s) s = re.sub(r"(select|exclude)[(][\\](\d+)", '\\1(lToken[\\2]', s) s = re.sub(r"define[(][\\](\d+)", 'define(lToken[\\1]', s) - s = re.sub(r"(morph|morphex|displayInfo)[(][\\](\d+)", '\\1(lToken[\\2])', s) + s = re.sub(r"(morph|morphex|displayInfo)[(]\\(\d+)", '\\1(lToken[\\2]', s) s = re.sub(r"token\(\s*(\d)", 'nextToken(\\1', s) # token(n) s = re.sub(r"token\(\s*-(\d)", 'prevToken(\\1', s) # token(-n) s = re.sub(r"before\(\s*", 'look(s[:m.start()], ', s) # before(s) s = re.sub(r"after\(\s*", 'look(s[m.end():], ', s) # after(s) s = re.sub(r"textarea\(\s*", 'look(s, ', s) # textarea(s) @@ -113,11 +113,11 @@ sURL = mURL.group(1).strip() sMsg = sMsg[:mURL.start(0)].strip() if sMsg[0:1] == "=": sMsg = prepareFunction(sMsg[1:]) lFUNCTIONS.append(("g_m_"+sIdAction, sMsg)) - for x in re.finditer("group[(](\d+)[)]", sMsg): + for x in re.finditer("group[(](\\d+)[)]", sMsg): if int(x.group(1)) > nGroup: print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)") sMsg = "=g_m_"+sIdAction else: for x in re.finditer(r"\\(\d+)", sMsg): @@ -128,11 +128,11 @@ if sAction[0:1] == "=" or cAction == "=": if "define" in sAction and not re.search(r"define\(\\\d+ *, *\[.*\] *\)", sAction): print("# Error in action at line " + sIdAction + ": second argument for define must be a list of strings") sAction = prepareFunction(sAction) - for x in re.finditer("group[(](\d+)[)]", sAction): + for x in re.finditer("group[(](\\d+)[)]", sAction): if int(x.group(1)) > nGroup: print("# Error in groups in replacement at line " + sIdAction + " ("+str(nGroup)+" groups only)") else: for x in re.finditer(r"\\(\d+)", sAction): if int(x.group(1)) > nGroup: @@ -267,16 +267,41 @@ print(e) oDARG = darg.DARG(lPreparedRule, sLang) oRuleGraph = oDARG.createGraph() + # creating file with all functions callable by rules + print(" creating callables...") + sPyCallables = "# generated code, do not edit\n" + #sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n" + for sFuncName, sReturn in lFUNCTIONS: + if sFuncName.startswith("g_c_"): # condition + sParams = "lToken, sCountry, bCondMemo" + elif sFuncName.startswith("g_m_"): # message + sParams = "lToken" + elif sFuncName.startswith("g_s_"): # suggestion + sParams = "lToken" + elif sFuncName.startswith("g_p_"): # preprocessor + sParams = "lToken" + elif sFuncName.startswith("g_d_"): # disambiguator + sParams = "lToken" + else: + print("# Unknown function type in [" + sFuncName + "]") + continue + sPyCallables += "def {} ({}):\n".format(sFuncName, sParams) + sPyCallables += " return " + sReturn + "\n" + #sJSCallables += " {}: function ({})".format(sFuncName, sParams) + " {\n" + #sJSCallables += " return " + jsconv.py2js(sReturn) + ";\n" + #sJSCallables += " },\n" + #sJSCallables += "}\n" + # Result d = { - "graph_callables": None, + "graph_callables": sPyCallables, "graph_gctests": None, "rules_graph": oRuleGraph, "rules_actions": dACTIONS } return d Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -76,10 +76,13 @@ dDA.clear() try: # regex parser _, errs = _proofread(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dDA, dPriority, sCountry, dOpt, bDebug, bContext) aErrors.update(errs) + # token parser + oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, dPriority, sCountry, dOpt, bDebug, bContext) + oSentence.parse() except: raise return aErrors.values() # this is a view (iterable) @@ -566,5 +569,234 @@ #### CALLABLES (generated code) ${callables} + + + +#### TOKEN SENTENCE CHECKER + +class TokenSentence: + + def __init__ (self, sSentence, sSentence0, iStart, dPriority, sCountry, dOpt, bDebug, bContext): + self.sSentence = sSentence + self.sSentence0 = sSentence0 + self.iStart = iStart + self.lToken = list(_oTokenizer.genTokens(sSentence)) + + def parse (self): + dErr = {} + lPointer = [] + for dToken in self.lToken: + for i, dPointer in enumerate(lPointer): + bValid = False + for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]): + dPointer["nOffset"] = dToken["i"] + dPointer["dNode"] = dNode + bValid = True + if not bValid: + del lPointer[i] + for dNode in self._getNextMatchingNodes(dToken, dGraph): + lPointer.append({"nOffset": 0, "dNode": dNode}) + for dPointer in lPointer: + if "" in dPointer["dNode"]: + for dNode in dGraph[dPointer["dNode"][""]]: + dErr = self._executeActions(dNode, nOffset) + return dErr + + def _getNextMatchingNodes (self, dToken, dNode): + # token value + if dToken["sValue"] in dNode: + yield dGraph[dNode[dToken["sValue"]]] + # token lemmas + for sLemma in _oSpellChecker.getLemma(dToken["sValue"]): + if sLemma in dNode: + yield dGraph[dNode[sLemma]] + # universal arc + if "*" in dNode: + yield dGraph[dNode["*"]] + # regex arcs + if "~" in dNode: + for sRegex in dNode["~"]: + for sMorph in _oSpellChecker.getMorph(dToken["sValue"]): + if re.search(sRegex, sMorph): + yield dGraph[dNode["~"][sRegex]] + + def _executeActions (self, dNode, nOffset): + for sLineId, nextNodeKey in dNode.items(): + for sArc in dGraph[nextNodeKey]: + print(sArc) + bCondMemo = None + sFuncCond, cActionType, sWhat, *eAct = dRule[sArc] + # action in lActions: [ condition, action type, replacement/suggestion/action[, iGroupStart, iGroupEnd[, message, URL]] ] + try: + bCondMemo = not sFuncCond or globals()[sFuncCond](self, sCountry, bCondMemo) + if bCondMemo: + if cActionType == "-": + # grammar error + print("-") + nErrorStart = nSentenceOffset + m.start(eAct[0]) + nErrorEnd = nSentenceOffset + m.start(eAct[1]) + if nErrorStart not in dErrs or nPriority > dPriority[nErrorStart]: + dErrs[nErrorStart] = _createError(self, sWhat, nErrorStart, nErrorEnd, sLineId, bUppercase, eAct[2], eAct[3], bIdRule, sOption, bContext) + dPriority[nErrorStart] = nPriority + elif cActionType == "~": + # text processor + print("~") + self._rewrite(sWhat, nErrorStart, nErrorEnd) + elif cActionType == "@": + # jump + print("@") + self._jump(sWhat) + elif cActionType == "=": + # disambiguation + print("=") + globals()[sWhat](self.lToken) + elif cActionType == ">": + # we do nothing, this test is just a condition to apply all following actions + print(">") + pass + else: + print("# error: unknown action at " + sLineId) + elif cActionType == ">": + break + except Exception as e: + raise Exception(str(e), "# " + sLineId + " # " + sRuleId) + + def _createWriterError (self): + d = {} + return d + + def _createDictError (self): + d = {} + return d + + def _rewrite (self, sWhat, nErrorStart, nErrorEnd): + "text processor: rewrite tokens between and position" + lTokenValue = sWhat.split("|") + if len(lTokenValue) != (nErrorEnd - nErrorStart + 1): + print("Error. Text processor: number of replacements != number of tokens.") + return + for i, sValue in zip(range(nErrorStart, nErrorEnd+1), lTokenValue): + self.lToken[i]["sValue"] = sValue + + def _jump (self, sWhat): + try: + nFrom, nTo = sWhat.split(">") + self.lToken[int(nFrom)]["iJump"] = int(nTo) + except: + print("# Error. Jump failed: ", sWhat) + traceback.print_exc() + return + + +#### Analyse tokens + +def g_morph (dToken, sPattern, bStrict=True): + "analyse a token, return True if in morphologies" + if "lMorph" in dToken: + lMorph = dToken["lMorph"] + else: + lMorph = _oSpellChecker.getMorph(dToken["sValue"]) + if not lMorph: + return False + zPattern = re.compile(sPattern) + if bStrict: + return all(zPattern.search(sMorph) for sMorph in lMorph) + return any(zPattern.search(sMorph) for sMorph in lMorph) + +def g_morphex (dToken, sPattern, sNegPattern): + "analyse a token, return True if not in morphologies and in morphologies" + if "lMorph" in dToken: + lMorph = dToken["lMorph"] + else: + lMorph = _oSpellChecker.getMorph(dToken["sValue"]) + if not lMorph: + return False + # check negative condition + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(sMorph) for sMorph in lMorph): + return False + # search sPattern + zPattern = re.compile(sPattern) + return any(zPattern.search(sMorph) for sMorph in lMorph) + +def g_analyse (dToken, sPattern, bStrict=True): + "analyse a token, return True if in morphologies (disambiguation off)" + lMorph = _oSpellChecker.getMorph(dToken["sValue"]) + if not lMorph: + return False + zPattern = re.compile(sPattern) + if bStrict: + return all(zPattern.search(sMorph) for sMorph in lMorph) + return any(zPattern.search(sMorph) for sMorph in lMorph) + + +def g_analysex (dToken, sPattern, sNegPattern): + "analyse a token, return True if not in morphologies and in morphologies (disambiguation off)" + lMorph = _oSpellChecker.getMorph(dToken["sValue"]) + if not lMorph: + return False + # check negative condition + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(sMorph) for sMorph in lMorph): + return False + # search sPattern + zPattern = re.compile(sPattern) + return any(zPattern.search(sMorph) for sMorph in lMorph) + + +#### Go outside the rule scope + +def g_nextToken (i): + pass + +def g_prevToken (i): + pass + +def g_look (): + pass + +def g_lookAndCheck (): + pass + + +#### Disambiguator + +def g_select (dToken, sPattern, lDefault=None): + "select morphologies for according to , always return True" + lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) + if not lMorph or len(lMorph) == 1: + return True + lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ] + if lSelect: + if len(lSelect) != len(lMorph): + dToken["lMorph"] = lSelect + elif lDefault: + dToken["lMorph"] = lDefault + return True + + +def g_exclude (dToken, sPattern, lDefault=None): + "select morphologies for according to , always return True" + lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) + if not lMorph or len(lMorph) == 1: + return True + lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ] + if lSelect: + if len(lSelect) != len(lMorph): + dToken["lMorph"] = lSelect + elif lDefault: + dToken["lMorph"] = lDefault + return True + + +def g_define (dToken, lMorph): + "set morphologies of , always return True" + dToken["lMorph"] = lMorph + return True + + +#### CALLABLES (generated code) + +${graph_callables} Index: gc_core/py/lang_core/gc_sentence.py ================================================================== --- gc_core/py/lang_core/gc_sentence.py +++ gc_core/py/lang_core/gc_sentence.py @@ -1,15 +1,15 @@ # Sentence checker from ..graphspell.tokenizer import Tokenizer -from .gc_graph import dGraph +from .gc_rules_graph import dGraph oTokenizer = Tokenizer("${lang}") -class Sentence: +class TokenSentence: def __init__ (self, sSentence, sSentence0, nOffset): self.sSentence = sSentence self.sSentence0 = sSentence0 self.nOffset = nOffset @@ -30,33 +30,39 @@ for dNode in self._getNextMatchingNodes(dToken, dGraph): lPointer.append({"nOffset": 0, "dNode": dNode}) for dPointer in lPointer: if "" in dPointer["dNode"]: for dNode in dGraph[dPointer["dNode"][""]]: - dErr = self._executeActions(dNode) + dErr = self._executeActions(dNode, nOffset) return dErr def _getNextMatchingNodes (self, dToken, dNode): + # token value if dToken["sValue"] in dNode: yield dGraph[dNode[dToken["sValue"]]] - for sLemma in dToken["sLemma"]: + # token lemmas + for sLemma in dToken["lLemma"]: if sLemma in dNode: - yield dGraph[dNode[dToken["sValue"]]] + yield dGraph[dNode[sLemma]] + # universal arc + if "*" in dNode: + yield dGraph[dNode["*"]] + # regex arcs if "~" in dNode: for sRegex in dNode["~"]: for sMorph in dToken["lMorph"]: if re.search(sRegex, sMorph): yield dGraph[dNode["~"][sRegex]] - def _executeActions (self, dNode): + def _executeActions (self, dNode, nOffset): for sLineId, nextNodeKey in dNode.items(): for sArc in dGraph[nextNodeKey]: bCondMemo = None sFuncCond, cActionType, sWhat, *eAct = dRule[sArc] # action in lActions: [ condition, action type, replacement/suggestion/action[, iGroupStart, iGroupEnd[, message, URL]] ] try: - bCondMemo = not sFuncCond or globals()[sFuncCond](self, dDA, sCountry, bCondMemo) + bCondMemo = not sFuncCond or globals()[sFuncCond](self, sCountry, bCondMemo) if bCondMemo: if cActionType == "-": # grammar error nErrorStart = nSentenceOffset + m.start(eAct[0]) nErrorEnd = nSentenceOffset + m.start(eAct[1]) @@ -63,24 +69,22 @@ if nErrorStart not in dErrs or nPriority > dPriority[nErrorStart]: dErrs[nErrorStart] = _createError(self, sWhat, nErrorStart, nErrorEnd, sLineId, bUppercase, eAct[2], eAct[3], bIdRule, sOption, bContext) dPriority[nErrorStart] = nPriority elif cActionType == "~": # text processor - self.lToken = _rewrite(self, sWhat, nErrorStart, nErrorEnd, bUppercase) - bChange = True + self._rewrite(sWhat, nErrorStart, nErrorEnd) elif cActionType == "@": - # text processor - self.lToken = _rewrite(self, sWhat, nErrorStart, nErrorEnd, bUppercase) - bChange = True + # jump + self._jump(sWhat) elif cActionType == "=": # disambiguation - globals()[sWhat](self, dDA) + globals()[sWhat](self.lToken) elif cActionType == ">": # we do nothing, this test is just a condition to apply all following actions pass else: - echo("# error: unknown action at " + sLineId) + print("# error: unknown action at " + sLineId) elif cActionType == ">": break except Exception as e: raise Exception(str(e), "# " + sLineId + " # " + sRuleId) @@ -90,52 +94,144 @@ def _createDictError (self): d = {} return d - -#### Common functions + def _rewrite (self, sWhat, nErrorStart, nErrorEnd): + "text processor: rewrite tokens between and position" + lTokenValue = sWhat.split("|") + if len(lTokenValue) != (nErrorEnd - nErrorStart + 1): + print("Error. Text processor: number of replacements != number of tokens.") + return + for i, sValue in zip(range(nErrorStart, nErrorEnd+1), lTokenValue): + self.lToken[i]["sValue"] = sValue -def option (): - pass + def _jump (self, sWhat): + try: + nFrom, nTo = sWhat.split(">") + self.lToken[int(nFrom)]["iJump"] = int(nTo) + except: + print("# Error. Jump failed: ", sWhat) + traceback.print_exc() + return #### Analyse tokens -def morph (): - pass - -def morphex (): - pass - -def analyse (): - pass - -def analysex (): - pass - - -#### Go outside scope - -def nextToken (): - pass - -def prevToken (): - pass - -def look (): - pass - -def lookAndCheck (): +def g_morph (dToken, sPattern, bStrict=True): + "analyse a token, return True if in morphologies" + if "lMorph" in dToken: + lMorph = dToken["lMorph"] + else: + if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]): + return False + if not _dAnalyses[dToken["sValue"]]: + return False + lMorph = _dAnalyses[dToken["sValue"]] + zPattern = re.compile(sPattern) + if bStrict: + return all(zPattern.search(sMorph) for sMorph in lMorph) + return any(zPattern.search(sMorph) for sMorph in lMorph) + +def g_morphex (dToken, sPattern, sNegPattern): + "analyse a token, return True if not in morphologies and in morphologies" + if "lMorph" in dToken: + lMorph = dToken["lMorph"] + else: + if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]): + return False + if not _dAnalyses[dToken["sValue"]]: + return False + lMorph = _dAnalyses[dToken["sValue"]] + # check negative condition + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(sMorph) for sMorph in lMorph): + return False + # search sPattern + zPattern = re.compile(sPattern) + return any(zPattern.search(sMorph) for sMorph in lMorph) + +def g_analyse (dToken, sPattern, bStrict=True): + "analyse a token, return True if in morphologies (disambiguation off)" + if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]): + return False + if not _dAnalyses[dToken["sValue"]]: + return False + zPattern = re.compile(sPattern) + if bStrict: + return all(zPattern.search(sMorph) for sMorph in _dAnalyses[dToken["sValue"]]) + return any(zPattern.search(sMorph) for sMorph in _dAnalyses[dToken["sValue"]]) + + +def g_analysex (dToken, sPattern, sNegPattern): + "analyse a token, return True if not in morphologies and in morphologies (disambiguation off)" + if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]): + return False + if not _dAnalyses[dToken["sValue"]]: + return False + # check negative condition + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(sMorph) for sMorph in _dAnalyses[dToken["sValue"]]): + return False + # search sPattern + zPattern = re.compile(sPattern) + return any(zPattern.search(sMorph) for sMorph in _dAnalyses[dToken["sValue"]]) + + +#### Go outside the rule scope + +def g_nextToken (i): + pass + +def g_prevToken (i): + pass + +def g_look (): + pass + +def g_lookAndCheck (): pass #### Disambiguator -def select (): - pass - -def exclude (): - pass - -def define (): - pass +def g_select (dToken, sPattern, lDefault=None): + "select morphologies for according to , always return True" + if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]): + return True + if len(_dAnalyses[dToken["sValue"]]) == 1: + return True + lMorph = dToken["lMorph"] or _dAnalyses[dToken["sValue"]] + lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ] + if lSelect: + if len(lSelect) != len(lMorph): + dToken["lMorph"] = lSelect + elif lDefault: + dToken["lMorph"] = lDefault + return True + + +def g_exclude (dToken, sPattern, lDefault=None): + "select morphologies for according to , always return True" + if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]): + return True + if len(_dAnalyses[dToken["sValue"]]) == 1: + return True + lMorph = dToken["lMorph"] or _dAnalyses[dToken["sValue"]] + lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ] + if lSelect: + if len(lSelect) != len(lMorph): + dToken["lMorph"] = lSelect + elif lDefault: + dToken["lMorph"] = lDefault + return True + + +def g_define (dToken, lMorph): + "set morphologies of , always return True" + dToken["lMorph"] = lMorph + return True + + +#### CALLABLES (generated code) + +${graph_callables}