Index: compile_rules.py ================================================================== --- compile_rules.py +++ compile_rules.py @@ -1,12 +1,13 @@ import re import sys import traceback -import copy import json from distutils import file_util + +import compile_rules_js_convert as jsconv dDEF = {} lFUNCTIONS = [] @@ -57,73 +58,10 @@ s = re.sub(r"isNextVerb\(\s*\)", 'isNextVerb(dDA, s[m.end():], m.end())', s) # isNextVerb(s) s = re.sub(r"\bspell *[(]", '_oDict.isValid(', s) s = re.sub(r"[\\](\d+)", 'm.group(\\1)', s) return s - -def py2js (sCode): - "convert Python code to JavaScript code" - # Python 2.x unicode strings - sCode = re.sub('\\b[ur]"', '"', sCode) - sCode = re.sub("\\b[ur]'", "'", sCode) - # operators - sCode = sCode.replace(" and ", " && ") - sCode = sCode.replace(" or ", " || ") - sCode = re.sub("\\bnot\\b", "!", sCode) - sCode = re.sub("(.+) if (.+) else (.+)", "(\\2) ? \\1 : \\3", sCode) - # boolean - sCode = sCode.replace("False", "false") - sCode = sCode.replace("True", "true") - sCode = sCode.replace("bool", "Boolean") - # methods - sCode = sCode.replace(".__len__()", ".length") - sCode = sCode.replace(".endswith", ".endsWith") - sCode = sCode.replace(".find", ".indexOf") - sCode = sCode.replace(".startswith", ".startsWith") - sCode = sCode.replace(".lower", ".toLowerCase") - sCode = sCode.replace(".upper", ".toUpperCase") - sCode = sCode.replace(".isdigit", ".gl_isDigit") - sCode = sCode.replace(".isupper", ".gl_isUpperCase") - sCode = sCode.replace(".islower", ".gl_isLowerCase") - sCode = sCode.replace(".istitle", ".gl_isTitle") - sCode = sCode.replace(".capitalize", ".gl_toCapitalize") - sCode = sCode.replace(".strip", ".gl_trim") - sCode = sCode.replace(".lstrip", ".gl_trimLeft") - sCode = sCode.replace(".rstrip", ".gl_trimRight") - sCode = sCode.replace('.replace("."', ".replace(/\./g") - sCode = sCode.replace('.replace("..."', ".replace(/\.\.\./g") - sCode = re.sub('.replace\("([^"]+)" ?,', ".replace(/\\1/g,", sCode) - # regex - sCode = re.sub('re.search\("([^"]+)", *(m.group\(\\d\))\)', "(\\2.search(/\\1/) >= 0)", sCode) - sCode = re.sub(".search\\(/\\(\\?i\\)([^/]+)/\\) >= 0\\)", ".search(/\\1/i) >= 0)", sCode) - sCode = re.sub('(look\\(sx?[][.a-z:()]*), "\\(\\?i\\)([^"]+)"', "\\1, /\\2/i", sCode) - sCode = re.sub('(look\\(sx?[][.a-z:()]*), "([^"]+)"', "\\1, /\\2/", sCode) - sCode = re.sub('(look_chk1\\(dDA, sx?[][.a-z:()]*, [0-9a-z.()]+), "\\(\\?i\\)([^"]+)"', "\\1, /\\2/i", sCode) - sCode = re.sub('(look_chk1\\(dDA, sx?[][.a-z:()]*, [0-9a-z.()]+), "([^"]+)"', "\\1, /\\2/i", sCode) - sCode = sCode.replace("(? lists - sCode = re.sub("\((m\.start\[\\d+\], m\[\\d+\])\)", "[\\1]", sCode) - # regex - sCode = sCode.replace("\w[\w-]+", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st][a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]+") - sCode = sCode.replace(r"/\w/", "/[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st]/") - sCode = sCode.replace(r"[\w-]", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]") - sCode = sCode.replace(r"[\w,]", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st,]") - return sCode - def uppercase (s, sLang): "(flag i is not enough): converts regex to uppercase regex: 'foo' becomes '[Ff][Oo][Oo]', but 'Bar' becomes 'B[Aa][Rr]'." sUp = "" nState = 0 @@ -215,11 +153,11 @@ s = s[i+4:] # JS groups positioning codes m = re.search("@@\\S+", sRegex) if m: - tGroups = groupsPositioningCodeToList(sRegex[m.start()+2:]) + tGroups = jsconv.groupsPositioningCodeToList(sRegex[m.start()+2:]) sRegex = sRegex[:m.start()].strip() # JS regex m = re.search(".+i?", sRegex) if m: dJSREGEXES[sLineId] = m.group(0) @@ -403,92 +341,10 @@ return [sCondition, cAction, ""] else: print("# Unknown action at line " + sIdAction) return None - -def regex2js (sRegex): - "converts Python regex to JS regex and returns JS regex and list of negative lookbefore assertions" - # Latin letters: http://unicode-table.com/fr/ - # 0-9 and _ - # A-Z - # a-z - # À-Ö 00C0-00D6 (upper case) - # Ø-ß 00D8-00DF (upper case) - # à-ö 00E0-00F6 (lower case) - # ø-ÿ 00F8-00FF (lower case) - # Ā-ʯ 0100-02AF (mixed) - # -> a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ - bCaseInsensitive = False - if "(?i)" in sRegex: - sRegex = sRegex.replace("(?i)", "") - bCaseInsensitive = True - lNegLookBeforeRegex = [] - if sWORDLIMITLEFT in sRegex: - sRegex = sRegex.replace(sWORDLIMITLEFT, "") - lNegLookBeforeRegex = ["[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ.,–-]$"] - sRegex = sRegex.replace("[\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ") - sRegex = sRegex.replace("\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ]") - sRegex = sRegex.replace("[.]", r"\.") - if not sRegex.startswith(""): - sRegex = sRegex.replace("/", r"\/") - m = re.search(r"\(\?"): - sRegex = sRegex.replace('', '/').replace('i', '/ig').replace('', '/g') - else: - sRegex = "/" + sRegex + "/g" - if bCaseInsensitive and not sRegex.endswith("/ig"): - sRegex = sRegex + "i" - if not lNegLookBeforeRegex: - lNegLookBeforeRegex = None - return (sRegex, lNegLookBeforeRegex) - - -def pyRuleToJS (lRule): - lRuleJS = copy.deepcopy(lRule) - del lRule[-1] # tGroups positioning codes are useless for Python - # error messages - for aAction in lRuleJS[6]: - if aAction[1] == "-": - aAction[2] = aAction[2].replace(" ", " ") # nbsp --> nnbsp - aAction[4] = aAction[4].replace("« ", "« ").replace(" »", " »").replace(" :", " :").replace(" :", " :") - # js regexes - lRuleJS[1], lNegLookBehindRegex = regex2js( dJSREGEXES.get(lRuleJS[3], lRuleJS[1]) ) - lRuleJS.append(lNegLookBehindRegex) - return lRuleJS - - -def writeRulesToJSArray (lRules): - sArray = "[\n" - for sOption, aRuleGroup in lRules: - sArray += ' ["' + sOption + '", [\n' if sOption else " [false, [\n" - for sRegex, bCaseInsensitive, sLineId, sRuleId, nPriority, lActions, aGroups, aNegLookBehindRegex in aRuleGroup: - sArray += ' [' + sRegex + ", " - sArray += "true, " if bCaseInsensitive else "false, " - sArray += '"' + sLineId + '", ' - sArray += '"' + sRuleId + '", ' - sArray += str(nPriority) + ", " - sArray += json.dumps(lActions, ensure_ascii=False) + ", " - sArray += json.dumps(aGroups, ensure_ascii=False) + ", " - sArray += json.dumps(aNegLookBehindRegex, ensure_ascii=False) + "],\n" - sArray += " ]],\n" - sArray += "]" - return sArray - - -def groupsPositioningCodeToList (sGroupsPositioningCode): - if not sGroupsPositioningCode: - return None - return [ int(sCode) if sCode.isdigit() or (sCode[0:1] == "-" and sCode[1:].isdigit()) else sCode \ - for sCode in sGroupsPositioningCode.split(",") ] - def _calcRulesStats (lRules): d = {'=':0, '~': 0, '-': 0, '>': 0} for aRule in lRules: for aAction in aRule[6]: @@ -638,14 +494,14 @@ else: aRule = createRule(sLine, nLine, sLang, bParagraph, dOptPriority) if aRule: if bParagraph: lParagraphRules.append(aRule) - lParagraphRulesJS.append(pyRuleToJS(aRule)) + lParagraphRulesJS.append(jsconv.pyRuleToJS(aRule, dJSREGEXES, sWORDLIMITLEFT)) else: lSentenceRules.append(aRule) - lSentenceRulesJS.append(pyRuleToJS(aRule)) + lSentenceRulesJS.append(jsconv.pyRuleToJS(aRule, dJSREGEXES, sWORDLIMITLEFT)) # creating file with all functions callable by rules print(" creating callables...") sPyCallables = "# generated code, do not edit\n" sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n" @@ -665,11 +521,11 @@ print("# Unknown function type in [" + sFuncName + "]") continue sPyCallables += "def {} ({}):\n".format(sFuncName, sParams) sPyCallables += " return " + sReturn + "\n" sJSCallables += " {}: function ({})".format(sFuncName, sParams) + " {\n" - sJSCallables += " return " + py2js(sReturn) + ";\n" + sJSCallables += " return " + jsconv.py2js(sReturn) + ";\n" sJSCallables += " },\n" sJSCallables += "}\n" displayStats(lParagraphRules, lSentenceRules) @@ -679,10 +535,10 @@ "callablesJS": sJSCallables, "gctests": sGCTests, "gctestsJS": sGCTestsJS, "paragraph_rules": mergeRulesByOption(lParagraphRules), "sentence_rules": mergeRulesByOption(lSentenceRules), - "paragraph_rules_JS": writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)), - "sentence_rules_JS": writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) } + "paragraph_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)), + "sentence_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) } d.update(dOptions) return d ADDED compile_rules_js_convert.py Index: compile_rules_js_convert.py ================================================================== --- compile_rules_js_convert.py +++ compile_rules_js_convert.py @@ -0,0 +1,150 @@ +# Convert Python code to JavaScript code + +import copy +import re +import json + + +def py2js (sCode): + "convert Python code to JavaScript code" + # Python 2.x unicode strings + sCode = re.sub('\\b[ur]"', '"', sCode) + sCode = re.sub("\\b[ur]'", "'", sCode) + # operators + sCode = sCode.replace(" and ", " && ") + sCode = sCode.replace(" or ", " || ") + sCode = re.sub("\\bnot\\b", "!", sCode) + sCode = re.sub("(.+) if (.+) else (.+)", "(\\2) ? \\1 : \\3", sCode) + # boolean + sCode = sCode.replace("False", "false") + sCode = sCode.replace("True", "true") + sCode = sCode.replace("bool", "Boolean") + # methods + sCode = sCode.replace(".__len__()", ".length") + sCode = sCode.replace(".endswith", ".endsWith") + sCode = sCode.replace(".find", ".indexOf") + sCode = sCode.replace(".startswith", ".startsWith") + sCode = sCode.replace(".lower", ".toLowerCase") + sCode = sCode.replace(".upper", ".toUpperCase") + sCode = sCode.replace(".isdigit", ".gl_isDigit") + sCode = sCode.replace(".isupper", ".gl_isUpperCase") + sCode = sCode.replace(".islower", ".gl_isLowerCase") + sCode = sCode.replace(".istitle", ".gl_isTitle") + sCode = sCode.replace(".capitalize", ".gl_toCapitalize") + sCode = sCode.replace(".strip", ".gl_trim") + sCode = sCode.replace(".lstrip", ".gl_trimLeft") + sCode = sCode.replace(".rstrip", ".gl_trimRight") + sCode = sCode.replace('.replace("."', ".replace(/\./g") + sCode = sCode.replace('.replace("..."', ".replace(/\.\.\./g") + sCode = re.sub('.replace\("([^"]+)" ?,', ".replace(/\\1/g,", sCode) + # regex + sCode = re.sub('re.search\("([^"]+)", *(m.group\(\\d\))\)', "(\\2.search(/\\1/) >= 0)", sCode) + sCode = re.sub(".search\\(/\\(\\?i\\)([^/]+)/\\) >= 0\\)", ".search(/\\1/i) >= 0)", sCode) + sCode = re.sub('(look\\(sx?[][.a-z:()]*), "\\(\\?i\\)([^"]+)"', "\\1, /\\2/i", sCode) + sCode = re.sub('(look\\(sx?[][.a-z:()]*), "([^"]+)"', "\\1, /\\2/", sCode) + sCode = re.sub('(look_chk1\\(dDA, sx?[][.a-z:()]*, [0-9a-z.()]+), "\\(\\?i\\)([^"]+)"', "\\1, /\\2/i", sCode) + sCode = re.sub('(look_chk1\\(dDA, sx?[][.a-z:()]*, [0-9a-z.()]+), "([^"]+)"', "\\1, /\\2/i", sCode) + sCode = sCode.replace("(? lists + sCode = re.sub("\((m\.start\[\\d+\], m\[\\d+\])\)", "[\\1]", sCode) + # regex + sCode = sCode.replace("\w[\w-]+", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st][a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]+") + sCode = sCode.replace(r"/\w/", "/[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st]/") + sCode = sCode.replace(r"[\w-]", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]") + sCode = sCode.replace(r"[\w,]", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st,]") + return sCode + + +def regex2js (sRegex, sWORDLIMITLEFT): + "converts Python regex to JS regex and returns JS regex and list of negative lookbefore assertions" + # Latin letters: http://unicode-table.com/fr/ + # 0-9 and _ + # A-Z + # a-z + # À-Ö 00C0-00D6 (upper case) + # Ø-ß 00D8-00DF (upper case) + # à-ö 00E0-00F6 (lower case) + # ø-ÿ 00F8-00FF (lower case) + # Ā-ʯ 0100-02AF (mixed) + # -> a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ + bCaseInsensitive = False + if "(?i)" in sRegex: + sRegex = sRegex.replace("(?i)", "") + bCaseInsensitive = True + lNegLookBeforeRegex = [] + if sWORDLIMITLEFT in sRegex: + sRegex = sRegex.replace(sWORDLIMITLEFT, "") + lNegLookBeforeRegex = ["[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ.,–-]$"] + sRegex = sRegex.replace("[\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ") + sRegex = sRegex.replace("\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ]") + sRegex = sRegex.replace("[.]", r"\.") + if not sRegex.startswith(""): + sRegex = sRegex.replace("/", r"\/") + m = re.search(r"\(\?"): + sRegex = sRegex.replace('', '/').replace('i', '/ig').replace('', '/g') + else: + sRegex = "/" + sRegex + "/g" + if bCaseInsensitive and not sRegex.endswith("/ig"): + sRegex = sRegex + "i" + if not lNegLookBeforeRegex: + lNegLookBeforeRegex = None + return (sRegex, lNegLookBeforeRegex) + + +def pyRuleToJS (lRule, dJSREGEXES, sWORDLIMITLEFT): + lRuleJS = copy.deepcopy(lRule) + del lRule[-1] # tGroups positioning codes are useless for Python + # error messages + for aAction in lRuleJS[6]: + if aAction[1] == "-": + aAction[2] = aAction[2].replace(" ", " ") # nbsp --> nnbsp + aAction[4] = aAction[4].replace("« ", "« ").replace(" »", " »").replace(" :", " :").replace(" :", " :") + # js regexes + lRuleJS[1], lNegLookBehindRegex = regex2js(dJSREGEXES.get(lRuleJS[3], lRuleJS[1]), sWORDLIMITLEFT) + lRuleJS.append(lNegLookBehindRegex) + return lRuleJS + + +def writeRulesToJSArray (lRules): + sArray = "[\n" + for sOption, aRuleGroup in lRules: + sArray += ' ["' + sOption + '", [\n' if sOption else " [false, [\n" + for sRegex, bCaseInsensitive, sLineId, sRuleId, nPriority, lActions, aGroups, aNegLookBehindRegex in aRuleGroup: + sArray += ' [' + sRegex + ", " + sArray += "true, " if bCaseInsensitive else "false, " + sArray += '"' + sLineId + '", ' + sArray += '"' + sRuleId + '", ' + sArray += str(nPriority) + ", " + sArray += json.dumps(lActions, ensure_ascii=False) + ", " + sArray += json.dumps(aGroups, ensure_ascii=False) + ", " + sArray += json.dumps(aNegLookBehindRegex, ensure_ascii=False) + "],\n" + sArray += " ]],\n" + sArray += "]" + return sArray + + +def groupsPositioningCodeToList (sGroupsPositioningCode): + if not sGroupsPositioningCode: + return None + return [ int(sCode) if sCode.isdigit() or (sCode[0:1] == "-" and sCode[1:].isdigit()) else sCode \ + for sCode in sGroupsPositioningCode.split(",") ]