Index: compile_rules.py ================================================================== --- compile_rules.py +++ compile_rules.py @@ -1,5 +1,8 @@ +""" +Grammalecte: compile rules +""" import re import traceback import json @@ -18,10 +21,11 @@ sWORDLIMITLEFT = r"(?" try: return re.compile(sRegex).groups except: traceback.print_exc() print(sRegex) @@ -117,11 +122,11 @@ sRuleId = sLineId #### GRAPH CALL if s.startswith("@@@@"): if bParagraph: - print("Error. Graph call can’t be made only after the first pass (sentence by sentence)") + print("Error. Graph call can be made only after the first pass (sentence by sentence)") exit() return ["@@@@", s[4:], sLineId] #### OPTIONS sOption = False # False or [a-z0-9]+ name @@ -211,18 +216,18 @@ else: print("# Unknown case mode [" + cCaseMode + "] at line " + sLineId) ## check regex try: - z = re.compile(sRegex) + re.compile(sRegex) except: print("# Regex error at line ", nIdLine) print(sRegex) traceback.print_exc() return None ## groups in non grouping parenthesis - for x in re.finditer("\(\?:[^)]*\([[\w -]", sRegex): + for x in re.finditer(r"\(\?:[^)]*\([[\w -]", sRegex): print("# Warning: groups inside non grouping parenthesis in regex at line " + sLineId) #### PARSE ACTIONS lActions = [] nAction = 1 @@ -237,12 +242,10 @@ return [sOption, sRegex, bCaseInsensitive, sLineId, sRuleId, nPriority, lActions, tGroups] def createAction (sIdAction, sAction, nGroup): "returns an action to perform as a tuple (condition, action type, action[, iGroup [, message, URL ]])" - global lFUNCTIONS - m = re.search(r"([-~=>])(\d*|)>>", sAction) if not m: print("# No action at line " + sIdAction) return None @@ -249,11 +252,11 @@ #### CONDITION sCondition = sAction[:m.start()].strip() if sCondition: sCondition = prepareFunction(sCondition) lFUNCTIONS.append(("_c_"+sIdAction, sCondition)) - for x in re.finditer("[.](?:group|start|end)[(](\d+)[)]", sCondition): + for x in re.finditer(r"[.](?:group|start|end)[(](\d+)[)]", sCondition): if int(x.group(1)) > nGroup: print("# Error in groups in condition at line " + sIdAction + " ("+str(nGroup)+" groups only)") if ".match" in sCondition: print("# Error. JS compatibility. Don't use .match() in condition, use .search()") sCondition = "_c_"+sIdAction @@ -284,11 +287,11 @@ sURL = mURL.group(1).strip() sMsg = sMsg[:mURL.start(0)].strip() if sMsg[0:1] == "=": sMsg = prepareFunction(sMsg[1:]) lFUNCTIONS.append(("_m_"+sIdAction, sMsg)) - for x in re.finditer("group[(](\d+)[)]", sMsg): + for x in re.finditer(r"group[(](\d+)[)]", sMsg): if int(x.group(1)) > nGroup: print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)") sMsg = "=_m_"+sIdAction else: for x in re.finditer(r"\\(\d+)", sMsg): @@ -300,11 +303,11 @@ if sAction[0:1] == "=" or cAction == "=": if "define" in sAction and not re.search(r"define\(\\\d+ *, *\[.*\] *\)", sAction): print("# Error in action at line " + sIdAction + ": second argument for define must be a list of strings") sAction = prepareFunction(sAction) sAction = sAction.replace("m.group(i[4])", "m.group("+str(iGroup)+")") - for x in re.finditer("group[(](\d+)[)]", sAction): + for x in re.finditer(r"group[(](\d+)[)]", sAction): if int(x.group(1)) > nGroup: print("# Error in groups in replacement at line " + sIdAction + " ("+str(nGroup)+" groups only)") else: for x in re.finditer(r"\\(\d+)", sAction): if int(x.group(1)) > nGroup: @@ -350,19 +353,21 @@ print("# Unknown action at line " + sIdAction) return None def _calcRulesStats (lRules): + "count rules and actions" d = {'=':0, '~': 0, '-': 0, '>': 0} for aRule in lRules: if aRule[0] != "@@@@": for aAction in aRule[6]: d[aAction[1]] = d[aAction[1]] + 1 return (d, len(lRules)) def displayStats (lParagraphRules, lSentenceRules): + "display rules numbers" print(" {:>18} {:>18} {:>18} {:>18}".format("DISAMBIGUATOR", "TEXT PROCESSOR", "GRAMMAR CHECKING", "REGEX")) d, nRule = _calcRulesStats(lParagraphRules) print("§ {:>10} actions {:>10} actions {:>10} actions in {:>8} rules".format(d['='], d['~'], d['-'], nRule)) d, nRule = _calcRulesStats(lSentenceRules) print("s {:>10} actions {:>10} actions {:>10} actions in {:>8} rules".format(d['='], d['~'], d['-'], nRule)) @@ -401,11 +406,11 @@ elif sLine.startswith("OPTSOFTWARE:"): lOpt = [ [s, {}] for s in sLine[12:].strip().split() ] # don’t use tuples (s, {}), because unknown to JS elif sLine.startswith("OPT/"): m = re.match("OPT/([a-z0-9]+):(.+)$", sLine) for i, sOpt in enumerate(m.group(2).split()): - lOpt[i][1][m.group(1)] = eval(sOpt) + lOpt[i][1][m.group(1)] = eval(sOpt) elif sLine.startswith("OPTPRIORITY/"): m = re.match("OPTPRIORITY/([a-z0-9]+): *([0-9])$", sLine) dOptPriority[m.group(1)] = int(m.group(2)) elif sLine.startswith("OPTLANG/"): m = re.match("OPTLANG/([a-z][a-z](?:_[A-Z][A-Z]|)):(.+)$", sLine) @@ -425,10 +430,11 @@ dOptions.update({ "dOpt"+k: v for k, v in lOpt }) return dOptions, dOptPriority def printBookmark (nLevel, sComment, nLine): + "print bookmark within the rules file" print(" {:>6}: {}".format(nLine, " " * nLevel + sComment)) def make (spLang, sLang, bJavaScript): "compile rules, returns a dictionary of values" @@ -571,20 +577,20 @@ displayStats(lParagraphRules, lSentenceRules) print("Unnamed rules: " + str(nRULEWITHOUTNAME)) - d = { "callables": sPyCallables, - "callablesJS": sJSCallables, - "gctests": sGCTests, - "gctestsJS": sGCTestsJS, - "paragraph_rules": mergeRulesByOption(lParagraphRules), - "sentence_rules": mergeRulesByOption(lSentenceRules), - "paragraph_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)), - "sentence_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) } - d.update(dOptions) + dVars = { "callables": sPyCallables, + "callablesJS": sJSCallables, + "gctests": sGCTests, + "gctestsJS": sGCTestsJS, + "paragraph_rules": mergeRulesByOption(lParagraphRules), + "sentence_rules": mergeRulesByOption(lSentenceRules), + "paragraph_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)), + "sentence_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) } + dVars.update(dOptions) # compile graph rules - d2 = crg.make(lGraphRule, dDEF, sLang, bJavaScript) - d.update(d2) + dVars2 = crg.make(lGraphRule, dDEF, sLang, bJavaScript) + dVars.update(dVars2) - return d + return dVars Index: compile_rules_graph.py ================================================================== --- compile_rules_graph.py +++ compile_rules_graph.py @@ -1,6 +1,9 @@ -# Create a Direct Acyclic Rule Graph (DARG) +""" +Grammalecte: compile rules +Create a Direct Acyclic Rule Graphs (DARGs) +""" import re import traceback import json @@ -10,10 +13,11 @@ dACTIONS = {} dFUNCTIONS = {} def prepareFunction (s, bTokenValue=False): + "convert simple rule syntax to a string of Python code" s = s.replace("__also__", "bCondMemo") s = s.replace("__else__", "not bCondMemo") s = re.sub(r"(morph|analyse|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s) s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset], dTags', s) s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset], dTags', s) @@ -37,11 +41,11 @@ def genTokenLines (sTokenLine, dDef): "tokenize a string and return a list of lines of tokens" lToken = sTokenLine.split() lTokenLines = None - for i, sToken in enumerate(lToken): + for sToken in lToken: # optional token? bNullPossible = sToken.startswith("?") and sToken.endswith("¿") if bNullPossible: sToken = sToken[1:-1] # token with definition? @@ -94,10 +98,11 @@ for aRule in lTokenLines: yield aRule def createRule (iLine, sRuleName, sTokenLine, iActionBlock, sActions, nPriority, dDef): + "generator: create rule as list" # print(iLine, "//", sRuleName, "//", sTokenLine, "//", sActions, "//", nPriority) for lToken in genTokenLines(sTokenLine, dDef): # Calculate positions dPos = {} # key: iGroup, value: iToken iGroup = 0 @@ -119,29 +124,33 @@ lResult.extend(["##"+str(iLine), sActionId]) yield lResult def changeReferenceToken (sText, dPos): + "change group reference in with values in " for i in range(len(dPos), 0, -1): sText = sText.replace("\\"+str(i), "\\"+str(dPos[i])) return sText def checkTokenNumbers (sText, sActionId, nToken): + "check if token references in greater than (debugging)" for x in re.finditer(r"\\(\d+)", sText): if int(x.group(1)) > nToken: print("# Error in token index at line " + sActionId + " ("+str(nToken)+" tokens only)") print(sText) def checkIfThereIsCode (sText, sActionId): + "check if there is code in (debugging)" if re.search("[.]\\w+[(]|sugg\\w+[(]|\\([0-9]|\\[[0-9]", sText): print("# Warning at line " + sActionId + ": This message looks like code. Line should probably begin with =") print(sText) def createAction (sActionId, sAction, nPriority, nToken, dPos): + "create action rule as a list" # Option sOption = False m = re.match("/(\\w+)/", sAction) if m: sOption = m.group(1) @@ -367,11 +376,10 @@ print(sActionName, aAction) print("\nFunctions:") print(sPyCallables) # Result - d = { + return { "graph_callables": sPyCallables, "rules_graphs": dAllGraph, "rules_actions": dACTIONS } - return d Index: compile_rules_js_convert.py ================================================================== --- compile_rules_js_convert.py +++ compile_rules_js_convert.py @@ -1,6 +1,8 @@ -# Convert Python code to JavaScript code +""" +Convert Python code and regexes to JavaScript code +""" import copy import re import json @@ -116,10 +118,11 @@ lNegLookBeforeRegex = None return (sRegex, lNegLookBeforeRegex) def pyRuleToJS (lRule, dJSREGEXES, sWORDLIMITLEFT): + "modify Python rules -> JS rules" lRuleJS = copy.deepcopy(lRule) # graph rules if lRuleJS[0] == "@@@@": return lRuleJS del lRule[-1] # tGroups positioning codes are useless for Python @@ -133,10 +136,11 @@ lRuleJS.append(lNegLookBehindRegex) return lRuleJS def writeRulesToJSArray (lRules): + "create rules as a string of arrays (to be bundled in a JSON string)" sArray = "[\n" for sOption, aRuleGroup in lRules: if sOption != "@@@@": sArray += ' ["' + sOption + '", [\n' if sOption else " [false, [\n" for sRegex, bCaseInsensitive, sLineId, sRuleId, nPriority, lActions, aGroups, aNegLookBehindRegex in aRuleGroup: @@ -157,9 +161,10 @@ sArray += "]" return sArray def groupsPositioningCodeToList (sGroupsPositioningCode): + "convert to a list of codes (numbers or strings)" if not sGroupsPositioningCode: return None return [ int(sCode) if sCode.isdigit() or (sCode[0:1] == "-" and sCode[1:].isdigit()) else sCode \ for sCode in sGroupsPositioningCode.split(",") ] Index: make.py ================================================================== --- make.py +++ make.py @@ -32,11 +32,11 @@ def getConfig (sLang): "load config.ini in at gc_lang/, returns xConfigParser object" xConfig = configparser.SafeConfigParser() xConfig.optionxform = str try: - xConfig.read_file(open("gc_lang/" + sLang + "/config.ii", "r", encoding="utf-8")) + xConfig.read_file(open("gc_lang/" + sLang + "/config.ini", "r", encoding="utf-8")) except FileNotFoundError: print("# Error. Can’t read config file [" + sLang + "]") exit() return xConfig