Index: compile_rules.py
==================================================================
--- compile_rules.py
+++ compile_rules.py
@@ -1,12 +1,13 @@
import re
import sys
import traceback
-import copy
import json
from distutils import file_util
+
+import compile_rules_js_convert as jsconv
dDEF = {}
lFUNCTIONS = []
@@ -57,73 +58,10 @@
s = re.sub(r"isNextVerb\(\s*\)", 'isNextVerb(dDA, s[m.end():], m.end())', s) # isNextVerb(s)
s = re.sub(r"\bspell *[(]", '_oDict.isValid(', s)
s = re.sub(r"[\\](\d+)", 'm.group(\\1)', s)
return s
-
-def py2js (sCode):
- "convert Python code to JavaScript code"
- # Python 2.x unicode strings
- sCode = re.sub('\\b[ur]"', '"', sCode)
- sCode = re.sub("\\b[ur]'", "'", sCode)
- # operators
- sCode = sCode.replace(" and ", " && ")
- sCode = sCode.replace(" or ", " || ")
- sCode = re.sub("\\bnot\\b", "!", sCode)
- sCode = re.sub("(.+) if (.+) else (.+)", "(\\2) ? \\1 : \\3", sCode)
- # boolean
- sCode = sCode.replace("False", "false")
- sCode = sCode.replace("True", "true")
- sCode = sCode.replace("bool", "Boolean")
- # methods
- sCode = sCode.replace(".__len__()", ".length")
- sCode = sCode.replace(".endswith", ".endsWith")
- sCode = sCode.replace(".find", ".indexOf")
- sCode = sCode.replace(".startswith", ".startsWith")
- sCode = sCode.replace(".lower", ".toLowerCase")
- sCode = sCode.replace(".upper", ".toUpperCase")
- sCode = sCode.replace(".isdigit", ".gl_isDigit")
- sCode = sCode.replace(".isupper", ".gl_isUpperCase")
- sCode = sCode.replace(".islower", ".gl_isLowerCase")
- sCode = sCode.replace(".istitle", ".gl_isTitle")
- sCode = sCode.replace(".capitalize", ".gl_toCapitalize")
- sCode = sCode.replace(".strip", ".gl_trim")
- sCode = sCode.replace(".lstrip", ".gl_trimLeft")
- sCode = sCode.replace(".rstrip", ".gl_trimRight")
- sCode = sCode.replace('.replace("."', ".replace(/\./g")
- sCode = sCode.replace('.replace("..."', ".replace(/\.\.\./g")
- sCode = re.sub('.replace\("([^"]+)" ?,', ".replace(/\\1/g,", sCode)
- # regex
- sCode = re.sub('re.search\("([^"]+)", *(m.group\(\\d\))\)', "(\\2.search(/\\1/) >= 0)", sCode)
- sCode = re.sub(".search\\(/\\(\\?i\\)([^/]+)/\\) >= 0\\)", ".search(/\\1/i) >= 0)", sCode)
- sCode = re.sub('(look\\(sx?[][.a-z:()]*), "\\(\\?i\\)([^"]+)"', "\\1, /\\2/i", sCode)
- sCode = re.sub('(look\\(sx?[][.a-z:()]*), "([^"]+)"', "\\1, /\\2/", sCode)
- sCode = re.sub('(look_chk1\\(dDA, sx?[][.a-z:()]*, [0-9a-z.()]+), "\\(\\?i\\)([^"]+)"', "\\1, /\\2/i", sCode)
- sCode = re.sub('(look_chk1\\(dDA, sx?[][.a-z:()]*, [0-9a-z.()]+), "([^"]+)"', "\\1, /\\2/i", sCode)
- sCode = sCode.replace("(? lists
- sCode = re.sub("\((m\.start\[\\d+\], m\[\\d+\])\)", "[\\1]", sCode)
- # regex
- sCode = sCode.replace("\w[\w-]+", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st][a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]+")
- sCode = sCode.replace(r"/\w/", "/[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st]/")
- sCode = sCode.replace(r"[\w-]", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]")
- sCode = sCode.replace(r"[\w,]", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st,]")
- return sCode
-
def uppercase (s, sLang):
"(flag i is not enough): converts regex to uppercase regex: 'foo' becomes '[Ff][Oo][Oo]', but 'Bar' becomes 'B[Aa][Rr]'."
sUp = ""
nState = 0
@@ -215,11 +153,11 @@
s = s[i+4:]
# JS groups positioning codes
m = re.search("@@\\S+", sRegex)
if m:
- tGroups = groupsPositioningCodeToList(sRegex[m.start()+2:])
+ tGroups = jsconv.groupsPositioningCodeToList(sRegex[m.start()+2:])
sRegex = sRegex[:m.start()].strip()
# JS regex
m = re.search(".+i?", sRegex)
if m:
dJSREGEXES[sLineId] = m.group(0)
@@ -403,92 +341,10 @@
return [sCondition, cAction, ""]
else:
print("# Unknown action at line " + sIdAction)
return None
-
-def regex2js (sRegex):
- "converts Python regex to JS regex and returns JS regex and list of negative lookbefore assertions"
- # Latin letters: http://unicode-table.com/fr/
- # 0-9 and _
- # A-Z
- # a-z
- # À-Ö 00C0-00D6 (upper case)
- # Ø-ß 00D8-00DF (upper case)
- # à-ö 00E0-00F6 (lower case)
- # ø-ÿ 00F8-00FF (lower case)
- # Ā-ʯ 0100-02AF (mixed)
- # -> a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ
- bCaseInsensitive = False
- if "(?i)" in sRegex:
- sRegex = sRegex.replace("(?i)", "")
- bCaseInsensitive = True
- lNegLookBeforeRegex = []
- if sWORDLIMITLEFT in sRegex:
- sRegex = sRegex.replace(sWORDLIMITLEFT, "")
- lNegLookBeforeRegex = ["[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ.,–-]$"]
- sRegex = sRegex.replace("[\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ")
- sRegex = sRegex.replace("\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ]")
- sRegex = sRegex.replace("[.]", r"\.")
- if not sRegex.startswith(""):
- sRegex = sRegex.replace("/", r"\/")
- m = re.search(r"\(\?"):
- sRegex = sRegex.replace('', '/').replace('i', '/ig').replace('', '/g')
- else:
- sRegex = "/" + sRegex + "/g"
- if bCaseInsensitive and not sRegex.endswith("/ig"):
- sRegex = sRegex + "i"
- if not lNegLookBeforeRegex:
- lNegLookBeforeRegex = None
- return (sRegex, lNegLookBeforeRegex)
-
-
-def pyRuleToJS (lRule):
- lRuleJS = copy.deepcopy(lRule)
- del lRule[-1] # tGroups positioning codes are useless for Python
- # error messages
- for aAction in lRuleJS[6]:
- if aAction[1] == "-":
- aAction[2] = aAction[2].replace(" ", " ") # nbsp --> nnbsp
- aAction[4] = aAction[4].replace("« ", "« ").replace(" »", " »").replace(" :", " :").replace(" :", " :")
- # js regexes
- lRuleJS[1], lNegLookBehindRegex = regex2js( dJSREGEXES.get(lRuleJS[3], lRuleJS[1]) )
- lRuleJS.append(lNegLookBehindRegex)
- return lRuleJS
-
-
-def writeRulesToJSArray (lRules):
- sArray = "[\n"
- for sOption, aRuleGroup in lRules:
- sArray += ' ["' + sOption + '", [\n' if sOption else " [false, [\n"
- for sRegex, bCaseInsensitive, sLineId, sRuleId, nPriority, lActions, aGroups, aNegLookBehindRegex in aRuleGroup:
- sArray += ' [' + sRegex + ", "
- sArray += "true, " if bCaseInsensitive else "false, "
- sArray += '"' + sLineId + '", '
- sArray += '"' + sRuleId + '", '
- sArray += str(nPriority) + ", "
- sArray += json.dumps(lActions, ensure_ascii=False) + ", "
- sArray += json.dumps(aGroups, ensure_ascii=False) + ", "
- sArray += json.dumps(aNegLookBehindRegex, ensure_ascii=False) + "],\n"
- sArray += " ]],\n"
- sArray += "]"
- return sArray
-
-
-def groupsPositioningCodeToList (sGroupsPositioningCode):
- if not sGroupsPositioningCode:
- return None
- return [ int(sCode) if sCode.isdigit() or (sCode[0:1] == "-" and sCode[1:].isdigit()) else sCode \
- for sCode in sGroupsPositioningCode.split(",") ]
-
def _calcRulesStats (lRules):
d = {'=':0, '~': 0, '-': 0, '>': 0}
for aRule in lRules:
for aAction in aRule[6]:
@@ -638,14 +494,14 @@
else:
aRule = createRule(sLine, nLine, sLang, bParagraph, dOptPriority)
if aRule:
if bParagraph:
lParagraphRules.append(aRule)
- lParagraphRulesJS.append(pyRuleToJS(aRule))
+ lParagraphRulesJS.append(jsconv.pyRuleToJS(aRule, dJSREGEXES, sWORDLIMITLEFT))
else:
lSentenceRules.append(aRule)
- lSentenceRulesJS.append(pyRuleToJS(aRule))
+ lSentenceRulesJS.append(jsconv.pyRuleToJS(aRule, dJSREGEXES, sWORDLIMITLEFT))
# creating file with all functions callable by rules
print(" creating callables...")
sPyCallables = "# generated code, do not edit\n"
sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n"
@@ -665,11 +521,11 @@
print("# Unknown function type in [" + sFuncName + "]")
continue
sPyCallables += "def {} ({}):\n".format(sFuncName, sParams)
sPyCallables += " return " + sReturn + "\n"
sJSCallables += " {}: function ({})".format(sFuncName, sParams) + " {\n"
- sJSCallables += " return " + py2js(sReturn) + ";\n"
+ sJSCallables += " return " + jsconv.py2js(sReturn) + ";\n"
sJSCallables += " },\n"
sJSCallables += "}\n"
displayStats(lParagraphRules, lSentenceRules)
@@ -679,10 +535,10 @@
"callablesJS": sJSCallables,
"gctests": sGCTests,
"gctestsJS": sGCTestsJS,
"paragraph_rules": mergeRulesByOption(lParagraphRules),
"sentence_rules": mergeRulesByOption(lSentenceRules),
- "paragraph_rules_JS": writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)),
- "sentence_rules_JS": writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) }
+ "paragraph_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)),
+ "sentence_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) }
d.update(dOptions)
return d
ADDED compile_rules_js_convert.py
Index: compile_rules_js_convert.py
==================================================================
--- /dev/null
+++ compile_rules_js_convert.py
@@ -0,0 +1,150 @@
+# Convert Python code to JavaScript code
+
+import copy
+import re
+import json
+
+
+def py2js (sCode):
+ "convert Python code to JavaScript code"
+ # Python 2.x unicode strings
+ sCode = re.sub('\\b[ur]"', '"', sCode)
+ sCode = re.sub("\\b[ur]'", "'", sCode)
+ # operators
+ sCode = sCode.replace(" and ", " && ")
+ sCode = sCode.replace(" or ", " || ")
+ sCode = re.sub("\\bnot\\b", "!", sCode)
+ sCode = re.sub("(.+) if (.+) else (.+)", "(\\2) ? \\1 : \\3", sCode)
+ # boolean
+ sCode = sCode.replace("False", "false")
+ sCode = sCode.replace("True", "true")
+ sCode = sCode.replace("bool", "Boolean")
+ # methods
+ sCode = sCode.replace(".__len__()", ".length")
+ sCode = sCode.replace(".endswith", ".endsWith")
+ sCode = sCode.replace(".find", ".indexOf")
+ sCode = sCode.replace(".startswith", ".startsWith")
+ sCode = sCode.replace(".lower", ".toLowerCase")
+ sCode = sCode.replace(".upper", ".toUpperCase")
+ sCode = sCode.replace(".isdigit", ".gl_isDigit")
+ sCode = sCode.replace(".isupper", ".gl_isUpperCase")
+ sCode = sCode.replace(".islower", ".gl_isLowerCase")
+ sCode = sCode.replace(".istitle", ".gl_isTitle")
+ sCode = sCode.replace(".capitalize", ".gl_toCapitalize")
+ sCode = sCode.replace(".strip", ".gl_trim")
+ sCode = sCode.replace(".lstrip", ".gl_trimLeft")
+ sCode = sCode.replace(".rstrip", ".gl_trimRight")
+ sCode = sCode.replace('.replace("."', ".replace(/\./g")
+ sCode = sCode.replace('.replace("..."', ".replace(/\.\.\./g")
+ sCode = re.sub('.replace\("([^"]+)" ?,', ".replace(/\\1/g,", sCode)
+ # regex
+ sCode = re.sub('re.search\("([^"]+)", *(m.group\(\\d\))\)', "(\\2.search(/\\1/) >= 0)", sCode)
+ sCode = re.sub(".search\\(/\\(\\?i\\)([^/]+)/\\) >= 0\\)", ".search(/\\1/i) >= 0)", sCode)
+ sCode = re.sub('(look\\(sx?[][.a-z:()]*), "\\(\\?i\\)([^"]+)"', "\\1, /\\2/i", sCode)
+ sCode = re.sub('(look\\(sx?[][.a-z:()]*), "([^"]+)"', "\\1, /\\2/", sCode)
+ sCode = re.sub('(look_chk1\\(dDA, sx?[][.a-z:()]*, [0-9a-z.()]+), "\\(\\?i\\)([^"]+)"', "\\1, /\\2/i", sCode)
+ sCode = re.sub('(look_chk1\\(dDA, sx?[][.a-z:()]*, [0-9a-z.()]+), "([^"]+)"', "\\1, /\\2/i", sCode)
+ sCode = sCode.replace("(? lists
+ sCode = re.sub("\((m\.start\[\\d+\], m\[\\d+\])\)", "[\\1]", sCode)
+ # regex
+ sCode = sCode.replace("\w[\w-]+", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st][a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]+")
+ sCode = sCode.replace(r"/\w/", "/[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st]/")
+ sCode = sCode.replace(r"[\w-]", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]")
+ sCode = sCode.replace(r"[\w,]", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st,]")
+ return sCode
+
+
+def regex2js (sRegex, sWORDLIMITLEFT):
+ "converts Python regex to JS regex and returns JS regex and list of negative lookbefore assertions"
+ # Latin letters: http://unicode-table.com/fr/
+ # 0-9 and _
+ # A-Z
+ # a-z
+ # À-Ö 00C0-00D6 (upper case)
+ # Ø-ß 00D8-00DF (upper case)
+ # à-ö 00E0-00F6 (lower case)
+ # ø-ÿ 00F8-00FF (lower case)
+ # Ā-ʯ 0100-02AF (mixed)
+ # -> a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ
+ bCaseInsensitive = False
+ if "(?i)" in sRegex:
+ sRegex = sRegex.replace("(?i)", "")
+ bCaseInsensitive = True
+ lNegLookBeforeRegex = []
+ if sWORDLIMITLEFT in sRegex:
+ sRegex = sRegex.replace(sWORDLIMITLEFT, "")
+ lNegLookBeforeRegex = ["[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ.,–-]$"]
+ sRegex = sRegex.replace("[\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ")
+ sRegex = sRegex.replace("\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ]")
+ sRegex = sRegex.replace("[.]", r"\.")
+ if not sRegex.startswith(""):
+ sRegex = sRegex.replace("/", r"\/")
+ m = re.search(r"\(\?"):
+ sRegex = sRegex.replace('', '/').replace('i', '/ig').replace('', '/g')
+ else:
+ sRegex = "/" + sRegex + "/g"
+ if bCaseInsensitive and not sRegex.endswith("/ig"):
+ sRegex = sRegex + "i"
+ if not lNegLookBeforeRegex:
+ lNegLookBeforeRegex = None
+ return (sRegex, lNegLookBeforeRegex)
+
+
+def pyRuleToJS (lRule, dJSREGEXES, sWORDLIMITLEFT):
+ lRuleJS = copy.deepcopy(lRule)
+ del lRule[-1] # tGroups positioning codes are useless for Python
+ # error messages
+ for aAction in lRuleJS[6]:
+ if aAction[1] == "-":
+ aAction[2] = aAction[2].replace(" ", " ") # nbsp --> nnbsp
+ aAction[4] = aAction[4].replace("« ", "« ").replace(" »", " »").replace(" :", " :").replace(" :", " :")
+ # js regexes
+ lRuleJS[1], lNegLookBehindRegex = regex2js(dJSREGEXES.get(lRuleJS[3], lRuleJS[1]), sWORDLIMITLEFT)
+ lRuleJS.append(lNegLookBehindRegex)
+ return lRuleJS
+
+
+def writeRulesToJSArray (lRules):
+ sArray = "[\n"
+ for sOption, aRuleGroup in lRules:
+ sArray += ' ["' + sOption + '", [\n' if sOption else " [false, [\n"
+ for sRegex, bCaseInsensitive, sLineId, sRuleId, nPriority, lActions, aGroups, aNegLookBehindRegex in aRuleGroup:
+ sArray += ' [' + sRegex + ", "
+ sArray += "true, " if bCaseInsensitive else "false, "
+ sArray += '"' + sLineId + '", '
+ sArray += '"' + sRuleId + '", '
+ sArray += str(nPriority) + ", "
+ sArray += json.dumps(lActions, ensure_ascii=False) + ", "
+ sArray += json.dumps(aGroups, ensure_ascii=False) + ", "
+ sArray += json.dumps(aNegLookBehindRegex, ensure_ascii=False) + "],\n"
+ sArray += " ]],\n"
+ sArray += "]"
+ return sArray
+
+
+def groupsPositioningCodeToList (sGroupsPositioningCode):
+ if not sGroupsPositioningCode:
+ return None
+ return [ int(sCode) if sCode.isdigit() or (sCode[0:1] == "-" and sCode[1:].isdigit()) else sCode \
+ for sCode in sGroupsPositioningCode.split(",") ]