Overview
| Comment: | [build] compile_rules: separate js conversion from main code |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | build |
| Files: | files | file ages | folders |
| SHA3-256: |
7595474b17124dbfeb86f2fb545453ce |
| User & Date: | olr on 2017-09-04 10:05:47 |
| Other Links: | manifest | tags |
Context
|
2017-09-05
| ||
| 08:32 | [fx] WebExt: wrap only the bottom of textarea (unsatifying but good enough for now) check-in: 5c343989d1 user: olr tags: trunk, fx | |
|
2017-09-04
| ||
| 10:05 | [build] compile_rules: separate js conversion from main code check-in: 7595474b17 user: olr tags: trunk, build | |
| 08:36 | [fx] WebExt: wrapper CSS adjustements check-in: f3d67f776e user: olr tags: trunk, fx | |
Changes
Modified compile_rules.py from [b5c896415e] to [7fa2e820bf].
1 2 3 4 | import re import sys import traceback | < > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
import re
import sys
import traceback
import json
from distutils import file_util
import compile_rules_js_convert as jsconv
dDEF = {}
lFUNCTIONS = []
aRULESET = set() # set of rule-ids to check if there is several rules with the same id
nRULEWITHOUTNAME = 0
|
| ︙ | ︙ | |||
55 56 57 58 59 60 61 |
s = re.sub(r"isEndOfNG\(\s*\)", 'isEndOfNG(dDA, s[m.end():], m.end())', s) # isEndOfNG(s)
s = re.sub(r"isNextNotCOD\(\s*\)", 'isNextNotCOD(dDA, s[m.end():], m.end())', s) # isNextNotCOD(s)
s = re.sub(r"isNextVerb\(\s*\)", 'isNextVerb(dDA, s[m.end():], m.end())', s) # isNextVerb(s)
s = re.sub(r"\bspell *[(]", '_oDict.isValid(', s)
s = re.sub(r"[\\](\d+)", 'm.group(\\1)', s)
return s
| < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
s = re.sub(r"isEndOfNG\(\s*\)", 'isEndOfNG(dDA, s[m.end():], m.end())', s) # isEndOfNG(s)
s = re.sub(r"isNextNotCOD\(\s*\)", 'isNextNotCOD(dDA, s[m.end():], m.end())', s) # isNextNotCOD(s)
s = re.sub(r"isNextVerb\(\s*\)", 'isNextVerb(dDA, s[m.end():], m.end())', s) # isNextVerb(s)
s = re.sub(r"\bspell *[(]", '_oDict.isValid(', s)
s = re.sub(r"[\\](\d+)", 'm.group(\\1)', s)
return s
def uppercase (s, sLang):
"(flag i is not enough): converts regex to uppercase regex: 'foo' becomes '[Ff][Oo][Oo]', but 'Bar' becomes 'B[Aa][Rr]'."
sUp = ""
nState = 0
for i in range(0, len(s)):
c = s[i]
|
| ︙ | ︙ | |||
213 214 215 216 217 218 219 |
return None
sRegex = s[:i].strip()
s = s[i+4:]
# JS groups positioning codes
m = re.search("@@\\S+", sRegex)
if m:
| | | 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
return None
sRegex = s[:i].strip()
s = s[i+4:]
# JS groups positioning codes
m = re.search("@@\\S+", sRegex)
if m:
tGroups = jsconv.groupsPositioningCodeToList(sRegex[m.start()+2:])
sRegex = sRegex[:m.start()].strip()
# JS regex
m = re.search("<js>.+</js>i?", sRegex)
if m:
dJSREGEXES[sLineId] = m.group(0)
sRegex = sRegex[:m.start()].strip()
if "<js>" in sRegex or "</js>" in sRegex:
|
| ︙ | ︙ | |||
401 402 403 404 405 406 407 |
elif cAction == ">":
## no action, break loop if condition is False
return [sCondition, cAction, ""]
else:
print("# Unknown action at line " + sIdAction)
return None
| < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 339 340 341 342 343 344 345 346 347 348 349 350 351 352 |
elif cAction == ">":
## no action, break loop if condition is False
return [sCondition, cAction, ""]
else:
print("# Unknown action at line " + sIdAction)
return None
def _calcRulesStats (lRules):
d = {'=':0, '~': 0, '-': 0, '>': 0}
for aRule in lRules:
for aAction in aRule[6]:
d[aAction[1]] = d[aAction[1]] + 1
return (d, len(lRules))
|
| ︙ | ︙ | |||
636 637 638 639 640 641 642 |
if sLine == "[++]":
bParagraph = False
else:
aRule = createRule(sLine, nLine, sLang, bParagraph, dOptPriority)
if aRule:
if bParagraph:
lParagraphRules.append(aRule)
| | | | 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 |
if sLine == "[++]":
bParagraph = False
else:
aRule = createRule(sLine, nLine, sLang, bParagraph, dOptPriority)
if aRule:
if bParagraph:
lParagraphRules.append(aRule)
lParagraphRulesJS.append(jsconv.pyRuleToJS(aRule, dJSREGEXES, sWORDLIMITLEFT))
else:
lSentenceRules.append(aRule)
lSentenceRulesJS.append(jsconv.pyRuleToJS(aRule, dJSREGEXES, sWORDLIMITLEFT))
# creating file with all functions callable by rules
print(" creating callables...")
sPyCallables = "# generated code, do not edit\n"
sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n"
for sFuncName, sReturn in lFUNCTIONS:
cType = sFuncName[0:1]
|
| ︙ | ︙ | |||
663 664 665 666 667 668 669 |
sParams = "s, m, dDA"
else:
print("# Unknown function type in [" + sFuncName + "]")
continue
sPyCallables += "def {} ({}):\n".format(sFuncName, sParams)
sPyCallables += " return " + sReturn + "\n"
sJSCallables += " {}: function ({})".format(sFuncName, sParams) + " {\n"
| | | | | 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 |
sParams = "s, m, dDA"
else:
print("# Unknown function type in [" + sFuncName + "]")
continue
sPyCallables += "def {} ({}):\n".format(sFuncName, sParams)
sPyCallables += " return " + sReturn + "\n"
sJSCallables += " {}: function ({})".format(sFuncName, sParams) + " {\n"
sJSCallables += " return " + jsconv.py2js(sReturn) + ";\n"
sJSCallables += " },\n"
sJSCallables += "}\n"
displayStats(lParagraphRules, lSentenceRules)
print("Unnamed rules: " + str(nRULEWITHOUTNAME))
d = { "callables": sPyCallables,
"callablesJS": sJSCallables,
"gctests": sGCTests,
"gctestsJS": sGCTestsJS,
"paragraph_rules": mergeRulesByOption(lParagraphRules),
"sentence_rules": mergeRulesByOption(lSentenceRules),
"paragraph_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)),
"sentence_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) }
d.update(dOptions)
return d
|
Added compile_rules_js_convert.py version [e1dbc7ea44].
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
# Convert Python code to JavaScript code
import copy
import re
import json
def py2js (sCode):
"convert Python code to JavaScript code"
# Python 2.x unicode strings
sCode = re.sub('\\b[ur]"', '"', sCode)
sCode = re.sub("\\b[ur]'", "'", sCode)
# operators
sCode = sCode.replace(" and ", " && ")
sCode = sCode.replace(" or ", " || ")
sCode = re.sub("\\bnot\\b", "!", sCode)
sCode = re.sub("(.+) if (.+) else (.+)", "(\\2) ? \\1 : \\3", sCode)
# boolean
sCode = sCode.replace("False", "false")
sCode = sCode.replace("True", "true")
sCode = sCode.replace("bool", "Boolean")
# methods
sCode = sCode.replace(".__len__()", ".length")
sCode = sCode.replace(".endswith", ".endsWith")
sCode = sCode.replace(".find", ".indexOf")
sCode = sCode.replace(".startswith", ".startsWith")
sCode = sCode.replace(".lower", ".toLowerCase")
sCode = sCode.replace(".upper", ".toUpperCase")
sCode = sCode.replace(".isdigit", ".gl_isDigit")
sCode = sCode.replace(".isupper", ".gl_isUpperCase")
sCode = sCode.replace(".islower", ".gl_isLowerCase")
sCode = sCode.replace(".istitle", ".gl_isTitle")
sCode = sCode.replace(".capitalize", ".gl_toCapitalize")
sCode = sCode.replace(".strip", ".gl_trim")
sCode = sCode.replace(".lstrip", ".gl_trimLeft")
sCode = sCode.replace(".rstrip", ".gl_trimRight")
sCode = sCode.replace('.replace("."', ".replace(/\./g")
sCode = sCode.replace('.replace("..."', ".replace(/\.\.\./g")
sCode = re.sub('.replace\("([^"]+)" ?,', ".replace(/\\1/g,", sCode)
# regex
sCode = re.sub('re.search\("([^"]+)", *(m.group\(\\d\))\)', "(\\2.search(/\\1/) >= 0)", sCode)
sCode = re.sub(".search\\(/\\(\\?i\\)([^/]+)/\\) >= 0\\)", ".search(/\\1/i) >= 0)", sCode)
sCode = re.sub('(look\\(sx?[][.a-z:()]*), "\\(\\?i\\)([^"]+)"', "\\1, /\\2/i", sCode)
sCode = re.sub('(look\\(sx?[][.a-z:()]*), "([^"]+)"', "\\1, /\\2/", sCode)
sCode = re.sub('(look_chk1\\(dDA, sx?[][.a-z:()]*, [0-9a-z.()]+), "\\(\\?i\\)([^"]+)"', "\\1, /\\2/i", sCode)
sCode = re.sub('(look_chk1\\(dDA, sx?[][.a-z:()]*, [0-9a-z.()]+), "([^"]+)"', "\\1, /\\2/i", sCode)
sCode = sCode.replace("(?<!-)", "") # todo
# slices
sCode = sCode.replace("[:m.start()]", ".slice(0,m.index)")
sCode = sCode.replace("[m.end():]", ".slice(m.end[0])")
sCode = sCode.replace("[m.start():m.end()]", ".slice(m.index, m.end[0])")
sCode = re.sub("\\[(-?\\d+):(-?\\d+)\\]", ".slice(\\1,\\2)", sCode)
sCode = re.sub("\\[(-?\\d+):\\]", ".slice(\\1)", sCode)
sCode = re.sub("\\[:(-?\\d+)\\]", ".slice(0,\\1)", sCode)
# regex matches
sCode = sCode.replace(".end()", ".end[0]")
sCode = sCode.replace(".start()", ".index")
sCode = sCode.replace("m.group()", "m[0]")
sCode = re.sub("\\.start\\((\\d+)\\)", ".start[\\1]", sCode)
sCode = re.sub("m\\.group\\((\\d+)\\)", "m[\\1]", sCode)
# tuples -> lists
sCode = re.sub("\((m\.start\[\\d+\], m\[\\d+\])\)", "[\\1]", sCode)
# regex
sCode = sCode.replace("\w[\w-]+", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st][a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]+")
sCode = sCode.replace(r"/\w/", "/[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st]/")
sCode = sCode.replace(r"[\w-]", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]")
sCode = sCode.replace(r"[\w,]", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st,]")
return sCode
def regex2js (sRegex, sWORDLIMITLEFT):
"converts Python regex to JS regex and returns JS regex and list of negative lookbefore assertions"
# Latin letters: http://unicode-table.com/fr/
# 0-9 and _
# A-Z
# a-z
# À-Ö 00C0-00D6 (upper case)
# Ø-ß 00D8-00DF (upper case)
# à-ö 00E0-00F6 (lower case)
# ø-ÿ 00F8-00FF (lower case)
# Ā-ʯ 0100-02AF (mixed)
# -> a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ
bCaseInsensitive = False
if "(?i)" in sRegex:
sRegex = sRegex.replace("(?i)", "")
bCaseInsensitive = True
lNegLookBeforeRegex = []
if sWORDLIMITLEFT in sRegex:
sRegex = sRegex.replace(sWORDLIMITLEFT, "")
lNegLookBeforeRegex = ["[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ.,–-]$"]
sRegex = sRegex.replace("[\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ")
sRegex = sRegex.replace("\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ]")
sRegex = sRegex.replace("[.]", r"\.")
if not sRegex.startswith("<js>"):
sRegex = sRegex.replace("/", r"\/")
m = re.search(r"\(\?<!([^()]+)\)", sRegex) # Negative lookbefore assertion should always be at the beginning of regex
if m:
lNegLookBeforeRegex.append(m.group(1)+"$")
sRegex = sRegex.replace(m.group(0), "")
if "(?<" in sRegex:
print("# Warning. Lookbefore assertion not changed in:\n ")
print(sRegex)
if sRegex.startswith("<js>"):
sRegex = sRegex.replace('<js>', '/').replace('</js>i', '/ig').replace('</js>', '/g')
else:
sRegex = "/" + sRegex + "/g"
if bCaseInsensitive and not sRegex.endswith("/ig"):
sRegex = sRegex + "i"
if not lNegLookBeforeRegex:
lNegLookBeforeRegex = None
return (sRegex, lNegLookBeforeRegex)
def pyRuleToJS (lRule, dJSREGEXES, sWORDLIMITLEFT):
lRuleJS = copy.deepcopy(lRule)
del lRule[-1] # tGroups positioning codes are useless for Python
# error messages
for aAction in lRuleJS[6]:
if aAction[1] == "-":
aAction[2] = aAction[2].replace(" ", " ") # nbsp --> nnbsp
aAction[4] = aAction[4].replace("« ", "« ").replace(" »", " »").replace(" :", " :").replace(" :", " :")
# js regexes
lRuleJS[1], lNegLookBehindRegex = regex2js(dJSREGEXES.get(lRuleJS[3], lRuleJS[1]), sWORDLIMITLEFT)
lRuleJS.append(lNegLookBehindRegex)
return lRuleJS
def writeRulesToJSArray (lRules):
sArray = "[\n"
for sOption, aRuleGroup in lRules:
sArray += ' ["' + sOption + '", [\n' if sOption else " [false, [\n"
for sRegex, bCaseInsensitive, sLineId, sRuleId, nPriority, lActions, aGroups, aNegLookBehindRegex in aRuleGroup:
sArray += ' [' + sRegex + ", "
sArray += "true, " if bCaseInsensitive else "false, "
sArray += '"' + sLineId + '", '
sArray += '"' + sRuleId + '", '
sArray += str(nPriority) + ", "
sArray += json.dumps(lActions, ensure_ascii=False) + ", "
sArray += json.dumps(aGroups, ensure_ascii=False) + ", "
sArray += json.dumps(aNegLookBehindRegex, ensure_ascii=False) + "],\n"
sArray += " ]],\n"
sArray += "]"
return sArray
def groupsPositioningCodeToList (sGroupsPositioningCode):
if not sGroupsPositioningCode:
return None
return [ int(sCode) if sCode.isdigit() or (sCode[0:1] == "-" and sCode[1:].isdigit()) else sCode \
for sCode in sGroupsPositioningCode.split(",") ]
|