108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
|
-
+
+
+
+
+
+
+
+
+
|
def createRule (s, nIdLine, sLang, bParagraph, dOptPriority):
"returns rule as list [option name, regex, bCaseInsensitive, identifier, list of actions]"
global dJSREGEXES
global nRULEWITHOUTNAME
#### OPTIONS
sLineId = str(nIdLine) + ("p" if bParagraph else "s")
sRuleId = sLineId
#### GRAPH CALL
if s.startswith("@@@@"):
if bParagraph:
print("Error. Graph call can’t be made only after the first pass (sentence by sentence)")
exit()
return ["@@@@", s[4:], sLineId]
#### OPTIONS
sOption = False # False or [a-z0-9]+ name
nPriority = 4 # Default is 4, value must be between 0 and 9
tGroups = None # code for groups positioning (only useful for JavaScript)
cCaseMode = 'i' # i: case insensitive, s: case sensitive, u: uppercasing allowed
cWordLimitLeft = '[' # [: word limit, <: no specific limit
cWordLimitRight = ']' # ]: word limit, >: no specific limit
m = re.match("^__(?P<borders_and_case>[[<]\\w[]>])(?P<option>/[a-zA-Z0-9]+|)(?P<ruleid>\\(\\w+\\)|)(?P<priority>![0-9]|)__ *", s)
|
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
|
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
|
+
-
-
+
+
|
print("# Unknown action at line " + sIdAction)
return None
def _calcRulesStats (lRules):
d = {'=':0, '~': 0, '-': 0, '>': 0}
for aRule in lRules:
if aRule[0] != "@@@@":
for aAction in aRule[6]:
d[aAction[1]] = d[aAction[1]] + 1
for aAction in aRule[6]:
d[aAction[1]] = d[aAction[1]] + 1
return (d, len(lRules))
def displayStats (lParagraphRules, lSentenceRules):
print(" {:>18} {:>18} {:>18} {:>18}".format("DISAMBIGUATOR", "TEXT PROCESSOR", "GRAMMAR CHECKING", "REGEX"))
d, nRule = _calcRulesStats(lParagraphRules)
print("§ {:>10} actions {:>10} actions {:>10} actions in {:>8} rules".format(d['='], d['~'], d['-'], nRule))
|
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
|
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
|
-
-
-
+
+
+
+
-
+
-
-
+
-
-
-
+
+
+
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
+
|
except:
print("Error. Rules file in project [" + sLang + "] not found.")
exit()
# removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
print(" parsing rules...")
global dDEF
lLine = []
lRuleLine = []
lTest = []
lOpt = []
zBookmark = re.compile("^!!+")
zGraphLink = re.compile(r"^@@@@GRAPHLINK>(\w+)@@@@")
bGraph = False
lGraphRule = []
for i, sLine in enumerate(lRules, 1):
if sLine.startswith('#END'):
# arbitrary end
printBookmark(0, "BREAK BY #END", i)
break
elif sLine.startswith("#"):
# comment
pass
elif sLine.startswith("@@@@"):
elif sLine.startswith("DEF:"):
m = re.match(r"^@@@@GRAPHLINK>(\w+)@@@@", sLine.strip())
if m:
# definition
#lRuleLine.append(["@GRAPHLINK", m.group(1)])
printBookmark(1, "@GRAPHLINK: " + m.group(1), i)
elif sLine.startswith("DEF:"):
m = re.match("DEF: +([a-zA-Z_][a-zA-Z_0-9]*) +(.+)$", sLine.strip())
if m:
dDEF["{"+m.group(1)+"}"] = m.group(2)
else:
print("Error in definition: ", end="")
print(sLine.strip())
elif sLine.startswith("TEST:"):
# test
lTest.append("{:<8}".format(i) + " " + sLine[5:].strip())
elif sLine.startswith("TODO:"):
# todo
pass
elif sLine.startswith(("OPTGROUP/", "OPTSOFTWARE:", "OPT/", "OPTLANG/", "OPTDEFAULTUILANG:", "OPTLABEL/", "OPTPRIORITY/")):
# options
lOpt.append(sLine)
elif re.match("[ \t]*$", sLine):
pass
elif sLine.startswith("!!"):
m = zBookmark.search(sLine)
# bookmark
m = re.match("!!+", sLine)
nExMk = len(m.group(0))
if sLine[nExMk:].strip():
printBookmark(nExMk-2, sLine[nExMk:].strip(), i)
# Graph rules
elif sLine.startswith("@@@@GRAPH:"):
# rules graph call
m = re.match(r"@@@@GRAPH: *(\w+)", sLine.strip())
if m:
printBookmark(1, "@GRAPH: " + m.group(1), i)
lRuleLine.append([i, "@@@@"+m.group(1)])
bGraph = True
lGraphRule.append([i, sLine])
bGraph = True
elif sLine.startswith("@@@@END_GRAPH"):
lGraphRule.append([i, sLine])
bGraph = False
elif bGraph:
lGraphRule.append([i, sLine])
# Regex rules
elif re.match("[ \t]*$", sLine):
# empty line
pass
elif sLine.startswith((" ", "\t")):
# rule (continuation)
lRuleLine[len(lRuleLine)-1][1] += " " + sLine.strip()
lRuleLine[-1][1] += " " + sLine.strip()
else:
# new rule
lRuleLine.append([i, sLine.strip()])
# generating options files
print(" parsing options...")
try:
dOptions, dOptPriority = prepareOptions(lOpt)
except:
|
547
548
549
550
551
552
553
554
555
|
577
578
579
580
581
582
583
584
585
586
587
588
589
|
+
+
+
+
|
"gctests": sGCTests,
"gctestsJS": sGCTestsJS,
"paragraph_rules": mergeRulesByOption(lParagraphRules),
"sentence_rules": mergeRulesByOption(lSentenceRules),
"paragraph_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)),
"sentence_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) }
d.update(dOptions)
# compile graph rules
d2 = crg.make(lGraphRule, sLang, bJavaScript)
d.update(d2)
return d
|