Grammalecte  Check-in [ca8457058e]

Overview
Comment:[build] conversion to JS update
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | build | rg
Files: files | file ages | folders
SHA3-256: ca8457058ef6d52215f754befe3cff77d717ca79ecc13cc538301e4a9241259a
User & Date: olr on 2018-09-17 09:32:14
Other Links: branch diff | manifest | tags
Context
2018-09-17
09:34
[core][js] gc engine: update \w replacements, + remove useless code check-in: 1173853ba9 user: olr tags: core, rg
09:32
[build] conversion to JS update check-in: ca8457058e user: olr tags: build, rg
09:00
[graphspell] tokenizer: add chars to \w replacement (JS still sucks) check-in: c185b3fc04 user: olr tags: graphspell, rg
Changes

Modified compile_rules_js_convert.py from [3e88e00dab] to [9d9f89854e].

47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
    sCode = re.sub(r'.replace\("([^"]+)" ?,', ".replace(/\\1/g,", sCode)
    # regex
    #sCode = re.sub('re.search\\("([^"]+)", *(m.group\\(\\d\\))\\)', "(\\2.search(/\\1/) >= 0)", sCode)
    #sCode = re.sub(".search\\(/\\(\\?i\\)([^/]+)/\\) >= 0\\)", ".search(/\\1/i) >= 0)", sCode)
    #sCode = re.sub('(look\\(sSentence0?[][.a-z0-9:()"+-]*), "\\(\\?i\\)([^"]+)"', "\\1, /\\2/i", sCode)
    #sCode = re.sub('(look\\(sSentence0?[][.a-z0-9:()"+-]*), "([^"]+)"', "\\1, /\\2/", sCode)
    sCode = re.sub('m\\.group\\((\\d+)\\) +in +(a[a-zA-Z]+)', "\\2.has(m[\\1])", sCode)
    sCode = sCode.replace("(?<!-)", "")  # todo
    # slices
    sCode = sCode.replace("[:m.start()]", ".slice(0,m.index)")
    sCode = sCode.replace("[m.end():]", ".slice(m.end[0])")
    sCode = sCode.replace("[m.start():m.end()]", ".slice(m.index, m.end[0])")
    sCode = sCode.replace('[lToken[nLastToken]["nEnd"]:]', '.slice(lToken[nLastToken]["nEnd"])')
    sCode = sCode.replace('[:lToken[1+nTokenOffset]["nStart"]]', '.slice(0,lToken[1+nTokenOffset]["nStart"])')
    sCode = re.sub("\\[(-?\\d+):(-?\\d+)\\]", ".slice(\\1,\\2)", sCode)
    sCode = re.sub("\\[(-?\\d+):\\]", ".slice(\\1)", sCode)
    sCode = re.sub("\\[:(-?\\d+)\\]", ".slice(0,\\1)", sCode)
    # regex matches
    sCode = sCode.replace(".end()", ".end[0]")
    sCode = sCode.replace(".start()", ".index")
    sCode = sCode.replace("m.group()", "m[0]")
    sCode = re.sub("\\.start\\((\\d+)\\)", ".start[\\1]", sCode)
    sCode = re.sub("m\\.group\\((\\d+)\\)", "m[\\1]", sCode)
    # tuples -> lists
    sCode = re.sub("\\((m\\.start\\[\\d+\\], m\\[\\d+\\])\\)", "[\\1]", sCode)
    # regex
    sCode = sCode.replace(r"\w[\w-]+", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st][a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]+")
    sCode = sCode.replace(r"/\w/", "/[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st]/")
    sCode = sCode.replace(r"[\w-]", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]")
    sCode = sCode.replace(r"[\w,]", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st,]")
    return sCode


def regex2js (sRegex, sWORDLIMITLEFT):
    "converts Python regex to JS regex and returns JS regex and list of negative lookbefore assertions"
    #   Latin letters: http://unicode-table.com/fr/
    #   0-9  and  _







|


















<
<
|
|







47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72


73
74
75
76
77
78
79
80
81
    sCode = re.sub(r'.replace\("([^"]+)" ?,', ".replace(/\\1/g,", sCode)
    # regex
    #sCode = re.sub('re.search\\("([^"]+)", *(m.group\\(\\d\\))\\)', "(\\2.search(/\\1/) >= 0)", sCode)
    #sCode = re.sub(".search\\(/\\(\\?i\\)([^/]+)/\\) >= 0\\)", ".search(/\\1/i) >= 0)", sCode)
    #sCode = re.sub('(look\\(sSentence0?[][.a-z0-9:()"+-]*), "\\(\\?i\\)([^"]+)"', "\\1, /\\2/i", sCode)
    #sCode = re.sub('(look\\(sSentence0?[][.a-z0-9:()"+-]*), "([^"]+)"', "\\1, /\\2/", sCode)
    sCode = re.sub('m\\.group\\((\\d+)\\) +in +(a[a-zA-Z]+)', "\\2.has(m[\\1])", sCode)
    sCode = re.sub('(lToken\\S+) +in +(a[a-zA-Z]+)', "\\2.has(\\1)", sCode)
    # slices
    sCode = sCode.replace("[:m.start()]", ".slice(0,m.index)")
    sCode = sCode.replace("[m.end():]", ".slice(m.end[0])")
    sCode = sCode.replace("[m.start():m.end()]", ".slice(m.index, m.end[0])")
    sCode = sCode.replace('[lToken[nLastToken]["nEnd"]:]', '.slice(lToken[nLastToken]["nEnd"])')
    sCode = sCode.replace('[:lToken[1+nTokenOffset]["nStart"]]', '.slice(0,lToken[1+nTokenOffset]["nStart"])')
    sCode = re.sub("\\[(-?\\d+):(-?\\d+)\\]", ".slice(\\1,\\2)", sCode)
    sCode = re.sub("\\[(-?\\d+):\\]", ".slice(\\1)", sCode)
    sCode = re.sub("\\[:(-?\\d+)\\]", ".slice(0,\\1)", sCode)
    # regex matches
    sCode = sCode.replace(".end()", ".end[0]")
    sCode = sCode.replace(".start()", ".index")
    sCode = sCode.replace("m.group()", "m[0]")
    sCode = re.sub("\\.start\\((\\d+)\\)", ".start[\\1]", sCode)
    sCode = re.sub("m\\.group\\((\\d+)\\)", "m[\\1]", sCode)
    # tuples -> lists
    sCode = re.sub("\\((m\\.start\\[\\d+\\], m\\[\\d+\\])\\)", "[\\1]", sCode)
    # regex


    sCode = sCode.replace(r"[\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿ")
    sCode = sCode.replace(r"\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿ]")
    return sCode


def regex2js (sRegex, sWORDLIMITLEFT):
    "converts Python regex to JS regex and returns JS regex and list of negative lookbefore assertions"
    #   Latin letters: http://unicode-table.com/fr/
    #   0-9  and  _