Overview
Comment: | [build] compile_rules: separate js conversion from main code |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | build |
Files: | files | file ages | folders |
SHA3-256: |
7595474b17124dbfeb86f2fb545453ce |
User & Date: | olr on 2017-09-04 10:05:47 |
Other Links: | manifest | tags |
Context
2017-09-05
| ||
08:32 | [fx] WebExt: wrap only the bottom of textarea (unsatifying but good enough for now) check-in: 5c343989d1 user: olr tags: trunk, fx | |
2017-09-04
| ||
10:05 | [build] compile_rules: separate js conversion from main code check-in: 7595474b17 user: olr tags: trunk, build | |
08:36 | [fx] WebExt: wrapper CSS adjustements check-in: f3d67f776e user: olr tags: trunk, fx | |
Changes
Modified compile_rules.py from [b5c896415e] to [7fa2e820bf].
1 2 3 4 | import re import sys import traceback | < > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | import re import sys import traceback import json from distutils import file_util import compile_rules_js_convert as jsconv dDEF = {} lFUNCTIONS = [] aRULESET = set() # set of rule-ids to check if there is several rules with the same id nRULEWITHOUTNAME = 0 |
︙ | ︙ | |||
55 56 57 58 59 60 61 | s = re.sub(r"isEndOfNG\(\s*\)", 'isEndOfNG(dDA, s[m.end():], m.end())', s) # isEndOfNG(s) s = re.sub(r"isNextNotCOD\(\s*\)", 'isNextNotCOD(dDA, s[m.end():], m.end())', s) # isNextNotCOD(s) s = re.sub(r"isNextVerb\(\s*\)", 'isNextVerb(dDA, s[m.end():], m.end())', s) # isNextVerb(s) s = re.sub(r"\bspell *[(]", '_oDict.isValid(', s) s = re.sub(r"[\\](\d+)", 'm.group(\\1)', s) return s | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | s = re.sub(r"isEndOfNG\(\s*\)", 'isEndOfNG(dDA, s[m.end():], m.end())', s) # isEndOfNG(s) s = re.sub(r"isNextNotCOD\(\s*\)", 'isNextNotCOD(dDA, s[m.end():], m.end())', s) # isNextNotCOD(s) s = re.sub(r"isNextVerb\(\s*\)", 'isNextVerb(dDA, s[m.end():], m.end())', s) # isNextVerb(s) s = re.sub(r"\bspell *[(]", '_oDict.isValid(', s) s = re.sub(r"[\\](\d+)", 'm.group(\\1)', s) return s def uppercase (s, sLang): "(flag i is not enough): converts regex to uppercase regex: 'foo' becomes '[Ff][Oo][Oo]', but 'Bar' becomes 'B[Aa][Rr]'." sUp = "" nState = 0 for i in range(0, len(s)): c = s[i] |
︙ | ︙ | |||
213 214 215 216 217 218 219 | return None sRegex = s[:i].strip() s = s[i+4:] # JS groups positioning codes m = re.search("@@\\S+", sRegex) if m: | | | 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | return None sRegex = s[:i].strip() s = s[i+4:] # JS groups positioning codes m = re.search("@@\\S+", sRegex) if m: tGroups = jsconv.groupsPositioningCodeToList(sRegex[m.start()+2:]) sRegex = sRegex[:m.start()].strip() # JS regex m = re.search("<js>.+</js>i?", sRegex) if m: dJSREGEXES[sLineId] = m.group(0) sRegex = sRegex[:m.start()].strip() if "<js>" in sRegex or "</js>" in sRegex: |
︙ | ︙ | |||
401 402 403 404 405 406 407 | elif cAction == ">": ## no action, break loop if condition is False return [sCondition, cAction, ""] else: print("# Unknown action at line " + sIdAction) return None | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 339 340 341 342 343 344 345 346 347 348 349 350 351 352 | elif cAction == ">": ## no action, break loop if condition is False return [sCondition, cAction, ""] else: print("# Unknown action at line " + sIdAction) return None def _calcRulesStats (lRules): d = {'=':0, '~': 0, '-': 0, '>': 0} for aRule in lRules: for aAction in aRule[6]: d[aAction[1]] = d[aAction[1]] + 1 return (d, len(lRules)) |
︙ | ︙ | |||
636 637 638 639 640 641 642 | if sLine == "[++]": bParagraph = False else: aRule = createRule(sLine, nLine, sLang, bParagraph, dOptPriority) if aRule: if bParagraph: lParagraphRules.append(aRule) | | | | 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 | if sLine == "[++]": bParagraph = False else: aRule = createRule(sLine, nLine, sLang, bParagraph, dOptPriority) if aRule: if bParagraph: lParagraphRules.append(aRule) lParagraphRulesJS.append(jsconv.pyRuleToJS(aRule, dJSREGEXES, sWORDLIMITLEFT)) else: lSentenceRules.append(aRule) lSentenceRulesJS.append(jsconv.pyRuleToJS(aRule, dJSREGEXES, sWORDLIMITLEFT)) # creating file with all functions callable by rules print(" creating callables...") sPyCallables = "# generated code, do not edit\n" sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n" for sFuncName, sReturn in lFUNCTIONS: cType = sFuncName[0:1] |
︙ | ︙ | |||
663 664 665 666 667 668 669 | sParams = "s, m, dDA" else: print("# Unknown function type in [" + sFuncName + "]") continue sPyCallables += "def {} ({}):\n".format(sFuncName, sParams) sPyCallables += " return " + sReturn + "\n" sJSCallables += " {}: function ({})".format(sFuncName, sParams) + " {\n" | | | | | 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 | sParams = "s, m, dDA" else: print("# Unknown function type in [" + sFuncName + "]") continue sPyCallables += "def {} ({}):\n".format(sFuncName, sParams) sPyCallables += " return " + sReturn + "\n" sJSCallables += " {}: function ({})".format(sFuncName, sParams) + " {\n" sJSCallables += " return " + jsconv.py2js(sReturn) + ";\n" sJSCallables += " },\n" sJSCallables += "}\n" displayStats(lParagraphRules, lSentenceRules) print("Unnamed rules: " + str(nRULEWITHOUTNAME)) d = { "callables": sPyCallables, "callablesJS": sJSCallables, "gctests": sGCTests, "gctestsJS": sGCTestsJS, "paragraph_rules": mergeRulesByOption(lParagraphRules), "sentence_rules": mergeRulesByOption(lSentenceRules), "paragraph_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)), "sentence_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) } d.update(dOptions) return d |
Added compile_rules_js_convert.py version [e1dbc7ea44].
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | # Convert Python code to JavaScript code import copy import re import json def py2js (sCode): "convert Python code to JavaScript code" # Python 2.x unicode strings sCode = re.sub('\\b[ur]"', '"', sCode) sCode = re.sub("\\b[ur]'", "'", sCode) # operators sCode = sCode.replace(" and ", " && ") sCode = sCode.replace(" or ", " || ") sCode = re.sub("\\bnot\\b", "!", sCode) sCode = re.sub("(.+) if (.+) else (.+)", "(\\2) ? \\1 : \\3", sCode) # boolean sCode = sCode.replace("False", "false") sCode = sCode.replace("True", "true") sCode = sCode.replace("bool", "Boolean") # methods sCode = sCode.replace(".__len__()", ".length") sCode = sCode.replace(".endswith", ".endsWith") sCode = sCode.replace(".find", ".indexOf") sCode = sCode.replace(".startswith", ".startsWith") sCode = sCode.replace(".lower", ".toLowerCase") sCode = sCode.replace(".upper", ".toUpperCase") sCode = sCode.replace(".isdigit", ".gl_isDigit") sCode = sCode.replace(".isupper", ".gl_isUpperCase") sCode = sCode.replace(".islower", ".gl_isLowerCase") sCode = sCode.replace(".istitle", ".gl_isTitle") sCode = sCode.replace(".capitalize", ".gl_toCapitalize") sCode = sCode.replace(".strip", ".gl_trim") sCode = sCode.replace(".lstrip", ".gl_trimLeft") sCode = sCode.replace(".rstrip", ".gl_trimRight") sCode = sCode.replace('.replace("."', ".replace(/\./g") sCode = sCode.replace('.replace("..."', ".replace(/\.\.\./g") sCode = re.sub('.replace\("([^"]+)" ?,', ".replace(/\\1/g,", sCode) # regex sCode = re.sub('re.search\("([^"]+)", *(m.group\(\\d\))\)', "(\\2.search(/\\1/) >= 0)", sCode) sCode = re.sub(".search\\(/\\(\\?i\\)([^/]+)/\\) >= 0\\)", ".search(/\\1/i) >= 0)", sCode) sCode = re.sub('(look\\(sx?[][.a-z:()]*), "\\(\\?i\\)([^"]+)"', "\\1, /\\2/i", sCode) sCode = re.sub('(look\\(sx?[][.a-z:()]*), "([^"]+)"', "\\1, /\\2/", sCode) sCode = re.sub('(look_chk1\\(dDA, sx?[][.a-z:()]*, [0-9a-z.()]+), "\\(\\?i\\)([^"]+)"', "\\1, /\\2/i", sCode) sCode = re.sub('(look_chk1\\(dDA, sx?[][.a-z:()]*, [0-9a-z.()]+), "([^"]+)"', "\\1, /\\2/i", sCode) sCode = sCode.replace("(?<!-)", "") # todo # slices sCode = sCode.replace("[:m.start()]", ".slice(0,m.index)") sCode = sCode.replace("[m.end():]", ".slice(m.end[0])") sCode = sCode.replace("[m.start():m.end()]", ".slice(m.index, m.end[0])") sCode = re.sub("\\[(-?\\d+):(-?\\d+)\\]", ".slice(\\1,\\2)", sCode) sCode = re.sub("\\[(-?\\d+):\\]", ".slice(\\1)", sCode) sCode = re.sub("\\[:(-?\\d+)\\]", ".slice(0,\\1)", sCode) # regex matches sCode = sCode.replace(".end()", ".end[0]") sCode = sCode.replace(".start()", ".index") sCode = sCode.replace("m.group()", "m[0]") sCode = re.sub("\\.start\\((\\d+)\\)", ".start[\\1]", sCode) sCode = re.sub("m\\.group\\((\\d+)\\)", "m[\\1]", sCode) # tuples -> lists sCode = re.sub("\((m\.start\[\\d+\], m\[\\d+\])\)", "[\\1]", sCode) # regex sCode = sCode.replace("\w[\w-]+", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st][a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]+") sCode = sCode.replace(r"/\w/", "/[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st]/") sCode = sCode.replace(r"[\w-]", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]") sCode = sCode.replace(r"[\w,]", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st,]") return sCode def regex2js (sRegex, sWORDLIMITLEFT): "converts Python regex to JS regex and returns JS regex and list of negative lookbefore assertions" # Latin letters: http://unicode-table.com/fr/ # 0-9 and _ # A-Z # a-z # À-Ö 00C0-00D6 (upper case) # Ø-ß 00D8-00DF (upper case) # à-ö 00E0-00F6 (lower case) # ø-ÿ 00F8-00FF (lower case) # Ā-ʯ 0100-02AF (mixed) # -> a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ bCaseInsensitive = False if "(?i)" in sRegex: sRegex = sRegex.replace("(?i)", "") bCaseInsensitive = True lNegLookBeforeRegex = [] if sWORDLIMITLEFT in sRegex: sRegex = sRegex.replace(sWORDLIMITLEFT, "") lNegLookBeforeRegex = ["[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ.,–-]$"] sRegex = sRegex.replace("[\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ") sRegex = sRegex.replace("\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ]") sRegex = sRegex.replace("[.]", r"\.") if not sRegex.startswith("<js>"): sRegex = sRegex.replace("/", r"\/") m = re.search(r"\(\?<!([^()]+)\)", sRegex) # Negative lookbefore assertion should always be at the beginning of regex if m: lNegLookBeforeRegex.append(m.group(1)+"$") sRegex = sRegex.replace(m.group(0), "") if "(?<" in sRegex: print("# Warning. Lookbefore assertion not changed in:\n ") print(sRegex) if sRegex.startswith("<js>"): sRegex = sRegex.replace('<js>', '/').replace('</js>i', '/ig').replace('</js>', '/g') else: sRegex = "/" + sRegex + "/g" if bCaseInsensitive and not sRegex.endswith("/ig"): sRegex = sRegex + "i" if not lNegLookBeforeRegex: lNegLookBeforeRegex = None return (sRegex, lNegLookBeforeRegex) def pyRuleToJS (lRule, dJSREGEXES, sWORDLIMITLEFT): lRuleJS = copy.deepcopy(lRule) del lRule[-1] # tGroups positioning codes are useless for Python # error messages for aAction in lRuleJS[6]: if aAction[1] == "-": aAction[2] = aAction[2].replace(" ", " ") # nbsp --> nnbsp aAction[4] = aAction[4].replace("« ", "« ").replace(" »", " »").replace(" :", " :").replace(" :", " :") # js regexes lRuleJS[1], lNegLookBehindRegex = regex2js(dJSREGEXES.get(lRuleJS[3], lRuleJS[1]), sWORDLIMITLEFT) lRuleJS.append(lNegLookBehindRegex) return lRuleJS def writeRulesToJSArray (lRules): sArray = "[\n" for sOption, aRuleGroup in lRules: sArray += ' ["' + sOption + '", [\n' if sOption else " [false, [\n" for sRegex, bCaseInsensitive, sLineId, sRuleId, nPriority, lActions, aGroups, aNegLookBehindRegex in aRuleGroup: sArray += ' [' + sRegex + ", " sArray += "true, " if bCaseInsensitive else "false, " sArray += '"' + sLineId + '", ' sArray += '"' + sRuleId + '", ' sArray += str(nPriority) + ", " sArray += json.dumps(lActions, ensure_ascii=False) + ", " sArray += json.dumps(aGroups, ensure_ascii=False) + ", " sArray += json.dumps(aNegLookBehindRegex, ensure_ascii=False) + "],\n" sArray += " ]],\n" sArray += "]" return sArray def groupsPositioningCodeToList (sGroupsPositioningCode): if not sGroupsPositioningCode: return None return [ int(sCode) if sCode.isdigit() or (sCode[0:1] == "-" and sCode[1:].isdigit()) else sCode \ for sCode in sGroupsPositioningCode.split(",") ] |