55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
|
s = re.sub(r"isEndOfNG\(\s*\)", 'isEndOfNG(dDA, s[m.end():], m.end())', s) # isEndOfNG(s)
s = re.sub(r"isNextNotCOD\(\s*\)", 'isNextNotCOD(dDA, s[m.end():], m.end())', s) # isNextNotCOD(s)
s = re.sub(r"isNextVerb\(\s*\)", 'isNextVerb(dDA, s[m.end():], m.end())', s) # isNextVerb(s)
s = re.sub(r"\bspell *[(]", '_oDict.isValid(', s)
s = re.sub(r"[\\](\d+)", 'm.group(\\1)', s)
return s
def py2js (sCode):
"convert Python code to JavaScript code"
# Python 2.x unicode strings
sCode = re.sub('\\b[ur]"', '"', sCode)
sCode = re.sub("\\b[ur]'", "'", sCode)
# operators
sCode = sCode.replace(" and ", " && ")
sCode = sCode.replace(" or ", " || ")
sCode = re.sub("\\bnot\\b", "!", sCode)
sCode = re.sub("(.+) if (.+) else (.+)", "(\\2) ? \\1 : \\3", sCode)
# boolean
sCode = sCode.replace("False", "false")
sCode = sCode.replace("True", "true")
sCode = sCode.replace("bool", "Boolean")
# methods
sCode = sCode.replace(".__len__()", ".length")
sCode = sCode.replace(".endswith", ".endsWith")
sCode = sCode.replace(".find", ".indexOf")
sCode = sCode.replace(".startswith", ".startsWith")
sCode = sCode.replace(".lower", ".toLowerCase")
sCode = sCode.replace(".upper", ".toUpperCase")
sCode = sCode.replace(".isdigit", "._isDigit")
sCode = sCode.replace(".isupper", "._isUpperCase")
sCode = sCode.replace(".islower", "._isLowerCase")
sCode = sCode.replace(".istitle", "._isTitle")
sCode = sCode.replace(".capitalize", "._toCapitalize")
sCode = sCode.replace(".strip", "._trim")
sCode = sCode.replace(".lstrip", "._trimLeft")
sCode = sCode.replace(".rstrip", "._trimRight")
sCode = sCode.replace('.replace("."', ".replace(/\./g")
sCode = sCode.replace('.replace("..."', ".replace(/\.\.\./g")
sCode = re.sub('.replace\("([^"]+)" ?,', ".replace(/\\1/g,", sCode)
# regex
sCode = re.sub('re.search\("([^"]+)", *(m.group\(\\d\))\)', "(\\2.search(/\\1/) >= 0)", sCode)
sCode = re.sub(".search\\(/\\(\\?i\\)([^/]+)/\\) >= 0\\)", ".search(/\\1/i) >= 0)", sCode)
sCode = re.sub('(look\\(sx?[][.a-z:()]*), "\\(\\?i\\)([^"]+)"', "\\1, /\\2/i", sCode)
sCode = re.sub('(look\\(sx?[][.a-z:()]*), "([^"]+)"', "\\1, /\\2/", sCode)
sCode = re.sub('(look_chk1\\(dDA, sx?[][.a-z:()]*, [0-9a-z.()]+), "\\(\\?i\\)([^"]+)"', "\\1, /\\2/i", sCode)
sCode = re.sub('(look_chk1\\(dDA, sx?[][.a-z:()]*, [0-9a-z.()]+), "([^"]+)"', "\\1, /\\2/i", sCode)
sCode = sCode.replace("(?<!-)", "") # todo
# slices
sCode = sCode.replace("[:m.start()]", ".slice(0,m.index)")
sCode = sCode.replace("[m.end():]", ".slice(m.end[0])")
sCode = sCode.replace("[m.start():m.end()]", ".slice(m.index, m.end[0])")
sCode = re.sub("\\[(-?\\d+):(-?\\d+)\\]", ".slice(\\1,\\2)", sCode)
sCode = re.sub("\\[(-?\\d+):\\]", ".slice(\\1)", sCode)
sCode = re.sub("\\[:(-?\\d+)\\]", ".slice(0,\\1)", sCode)
# regex matches
sCode = sCode.replace(".end()", ".end[0]")
sCode = sCode.replace(".start()", ".index")
sCode = sCode.replace("m.group()", "m[0]")
sCode = re.sub("\\.start\\((\\d+)\\)", ".start[\\1]", sCode)
sCode = re.sub("m\\.group\\((\\d+)\\)", "m[\\1]", sCode)
# tuples -> lists
sCode = re.sub("\((m\.start\[\\d+\], m\[\\d+\])\)", "[\\1]", sCode)
# regex
sCode = sCode.replace("\w[\w-]+", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st][a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]+")
sCode = sCode.replace(r"/\w/", "/[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st]/")
sCode = sCode.replace(r"[\w-]", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]")
sCode = sCode.replace(r"[\w,]", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st,]")
return sCode
def uppercase (s, sLang):
"(flag i is not enough): converts regex to uppercase regex: 'foo' becomes '[Ff][Oo][Oo]', but 'Bar' becomes 'B[Aa][Rr]'."
sUp = ""
nState = 0
for i in range(0, len(s)):
c = s[i]
|
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
|
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
|
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
|
#### ACTION
sAction = sAction[m.end():].strip()
cAction = m.group(1)
if cAction == "-":
## error
iMsg = sAction.find(" # ")
if iMsg == -1:
sMsg = "# Error. Error message not found."
sURL = ""
print(sMsg + " Action id: " + sIdAction)
else:
sMsg = sAction[iMsg+3:].strip()
sAction = sAction[:iMsg].strip()
sURL = ""
mURL = re.search("[|] *(https?://.*)", sMsg)
if mURL:
sURL = mURL.group(1).strip()
sMsg = sMsg[:mURL.start(0)].strip()
if sMsg[0:1] == "=":
sMsg = prepareFunction(sMsg[1:])
lFUNCTIONS.append(("m_"+sIdAction, sMsg))
for x in re.finditer("group[(](\d+)[)]", sMsg):
if int(x.group(1)) > nGroup:
print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)")
sMsg = "=m_"+sIdAction
else:
for x in re.finditer(r"\\(\d+)", sMsg):
if int(x.group(1)) > nGroup:
print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)")
if re.search("[.]\\w+[(]", sMsg):
print("# Error in message at line " + sIdAction + ": This message looks like code. Line should begin with =")
sMsg = sAction[iMsg+3:].strip()
sAction = sAction[:iMsg].strip()
sURL = ""
mURL = re.search("[|] *(https?://.*)", sMsg)
if mURL:
sURL = mURL.group(1).strip()
sMsg = sMsg[:mURL.start(0)].strip()
if sMsg[0:1] == "=":
sMsg = prepareFunction(sMsg[1:])
lFUNCTIONS.append(("m_"+sIdAction, sMsg))
for x in re.finditer("group[(](\d+)[)]", sMsg):
if int(x.group(1)) > nGroup:
print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)")
sMsg = "=m_"+sIdAction
else:
for x in re.finditer(r"\\(\d+)", sMsg):
if int(x.group(1)) > nGroup:
print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)")
if re.search("[.]\\w+[(]", sMsg):
print("# Error in message at line " + sIdAction + ": This message looks like code. Line should begin with =")
if sAction[0:1] == "=" or cAction == "=":
if "define" in sAction and not re.search(r"define\(\\\d+ *, *\[.*\] *\)", sAction):
print("# Error in action at line " + sIdAction + ": second argument for define must be a list of strings")
sAction = prepareFunction(sAction)
sAction = sAction.replace("m.group(i[4])", "m.group("+str(iGroup)+")")
for x in re.finditer("group[(](\d+)[)]", sAction):
if int(x.group(1)) > nGroup:
print("# Error in groups in replacement at line " + sIdAction + " ("+str(nGroup)+" groups only)")
else:
for x in re.finditer(r"\\(\d+)", sAction):
if int(x.group(1)) > nGroup:
print("# Error in groups in replacement at line " + sIdAction + " ("+str(nGroup)+" groups only)")
if re.search("[.]\\w+[(]", sAction):
if re.search("[.]\\w+[(]|sugg\\w+[(]", sAction):
print("# Error in action at line " + sIdAction + ": This action looks like code. Line should begin with =")
if cAction == "-":
## error detected --> suggestion
if not sAction:
print("# Error in action at line " + sIdAction + ": This action is empty.")
if sAction[0:1] == "=":
|
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
|
339
340
341
342
343
344
345
346
347
348
349
350
351
352
|
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
|
elif cAction == ">":
## no action, break loop if condition is False
return [sCondition, cAction, ""]
else:
print("# Unknown action at line " + sIdAction)
return None
def regex2js (sRegex):
"converts Python regex to JS regex and returns JS regex and list of negative lookbefore assertions"
# Latin letters: http://unicode-table.com/fr/
# 0-9 and _
# A-Z
# a-z
# À-Ö 00C0-00D6 (upper case)
# Ø-ß 00D8-00DF (upper case)
# à-ö 00E0-00F6 (lower case)
# ø-ÿ 00F8-00FF (lower case)
# Ā-ʯ 0100-02AF (mixed)
# -> a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ
bCaseInsensitive = False
if "(?i)" in sRegex:
sRegex = sRegex.replace("(?i)", "")
bCaseInsensitive = True
lNegLookBeforeRegex = []
if sWORDLIMITLEFT in sRegex:
sRegex = sRegex.replace(sWORDLIMITLEFT, "")
lNegLookBeforeRegex = ["[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ.,–-]$"]
sRegex = sRegex.replace("[\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ")
sRegex = sRegex.replace("\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ]")
sRegex = sRegex.replace("[.]", r"\.")
if not sRegex.startswith("<js>"):
sRegex = sRegex.replace("/", r"\/")
m = re.search(r"\(\?<!([^()]+)\)", sRegex) # Negative lookbefore assertion should always be at the beginning of regex
if m:
lNegLookBeforeRegex.append(m.group(1)+"$")
sRegex = sRegex.replace(m.group(0), "")
if "(?<" in sRegex:
print("# Warning. Lookbefore assertion not changed in:\n ")
print(sRegex)
if sRegex.startswith("<js>"):
sRegex = sRegex.replace('<js>', '/').replace('</js>i', '/ig').replace('</js>', '/g')
else:
sRegex = "/" + sRegex + "/g"
if bCaseInsensitive and not sRegex.endswith("/ig"):
sRegex = sRegex + "i"
if not lNegLookBeforeRegex:
lNegLookBeforeRegex = None
return (sRegex, lNegLookBeforeRegex)
def pyRuleToJS (lRule):
lRuleJS = copy.deepcopy(lRule)
del lRule[-1] # tGroups positioning codes are useless for Python
# error messages
for aAction in lRuleJS[6]:
if aAction[1] == "-":
aAction[4] = aAction[4].replace("« ", "« ").replace(" »", " »")
# js regexes
lRuleJS[1], lNegLookBehindRegex = regex2js( dJSREGEXES.get(lRuleJS[3], lRuleJS[1]) )
lRuleJS.append(lNegLookBehindRegex)
return lRuleJS
def writeRulesToJSArray (lRules):
sArray = "[\n"
for sOption, aRuleGroup in lRules:
sArray += ' ["' + sOption + '", [\n' if sOption else " [false, [\n"
for sRegex, bCaseInsensitive, sLineId, sRuleId, nPriority, lActions, aGroups, aNegLookBehindRegex in aRuleGroup:
sArray += ' [' + sRegex + ", "
sArray += "true, " if bCaseInsensitive else "false, "
sArray += '"' + sLineId + '", '
sArray += '"' + sRuleId + '", '
sArray += str(nPriority) + ", "
sArray += json.dumps(lActions, ensure_ascii=False) + ", "
sArray += json.dumps(aGroups, ensure_ascii=False) + ", "
sArray += json.dumps(aNegLookBehindRegex, ensure_ascii=False) + "],\n"
sArray += " ]],\n"
sArray += "]"
return sArray
def groupsPositioningCodeToList (sGroupsPositioningCode):
if not sGroupsPositioningCode:
return None
return [ int(sCode) if sCode.isdigit() or (sCode[0:1] == "-" and sCode[1:].isdigit()) else sCode \
for sCode in sGroupsPositioningCode.split(",") ]
def _calcRulesStats (lRules):
d = {'=':0, '~': 0, '-': 0, '>': 0}
for aRule in lRules:
for aAction in aRule[6]:
d[aAction[1]] = d[aAction[1]] + 1
return (d, len(lRules))
|
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
|
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
|
+
+
+
-
+
|
elif sLine.startswith("OPTPRIORITY/"):
m = re.match("OPTPRIORITY/([a-z0-9]+): *([0-9])$", sLine)
dOptPriority[m.group(1)] = int(m.group(2))
elif sLine.startswith("OPTLANG/"):
m = re.match("OPTLANG/([a-z][a-z](?:_[A-Z][A-Z]|)):(.+)$", sLine)
sLang = m.group(1)[:2]
dOptLabel[sLang] = { "__optiontitle__": m.group(2).strip() }
elif sLine.startswith("OPTDEFAULTUILANG:"):
m = re.match("OPTDEFAULTUILANG: *([a-z][a-z](?:_[A-Z][A-Z]|))$", sLine)
sDefaultUILang = m.group(1)[:2]
elif sLine.startswith("OPTLABEL/"):
m = re.match("OPTLABEL/([a-z0-9]+):(.+)$", sLine)
dOptLabel[sLang][m.group(1)] = list(map(str.strip, m.group(2).split("|"))) if "|" in m.group(2) else [m.group(2).strip(), ""]
else:
print("# Error. Wrong option line in:\n ")
print(sLine)
print(" options defined for: " + ", ".join([ t[0] for t in lOpt ]))
dOptions = { "lStructOpt": lStructOpt, "dOptLabel": dOptLabel }
dOptions = { "lStructOpt": lStructOpt, "dOptLabel": dOptLabel, "sDefaultUILang": sDefaultUILang }
dOptions.update({ "dOpt"+k: v for k, v in lOpt })
return dOptions, dOptPriority
def printBookmark (nLevel, sComment, nLine):
print(" {:>6}: {}".format(nLine, " " * nLevel + sComment))
|
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
|
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
|
-
+
-
-
+
+
|
sParams = "s, m, dDA"
else:
print("# Unknown function type in [" + sFuncName + "]")
continue
sPyCallables += "def {} ({}):\n".format(sFuncName, sParams)
sPyCallables += " return " + sReturn + "\n"
sJSCallables += " {}: function ({})".format(sFuncName, sParams) + " {\n"
sJSCallables += " return " + py2js(sReturn) + ";\n"
sJSCallables += " return " + jsconv.py2js(sReturn) + ";\n"
sJSCallables += " },\n"
sJSCallables += "}\n"
displayStats(lParagraphRules, lSentenceRules)
print("Unnamed rules: " + str(nRULEWITHOUTNAME))
d = { "callables": sPyCallables,
"callablesJS": sJSCallables,
"gctests": sGCTests,
"gctestsJS": sGCTestsJS,
"paragraph_rules": mergeRulesByOption(lParagraphRules),
"sentence_rules": mergeRulesByOption(lSentenceRules),
"paragraph_rules_JS": writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)),
"sentence_rules_JS": writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) }
"paragraph_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)),
"sentence_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) }
d.update(dOptions)
return d
|