| 
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124 | 
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
 | 
-
+
+
+
+
+
+
+
+
+
 | 
def createRule (s, nIdLine, sLang, bParagraph, dOptPriority):
    "returns rule as list [option name, regex, bCaseInsensitive, identifier, list of actions]"
    global dJSREGEXES
    global nRULEWITHOUTNAME
sLineId = str(nIdLine) + ("p" if bParagraph else "s")
    sRuleId = sLineId
    #### GRAPH CALL
    if s.startswith("@@@@"):
        if bParagraph:
            print("Error. Graph call can’t be made only after the first pass (sentence by sentence)")
            exit()
        return ["@@@@", s[4:], sLineId]
    #### OPTIONS
    sOption = False         # False or [a-z0-9]+ name
    nPriority = 4           # Default is 4, value must be between 0 and 9
    tGroups = None          # code for groups positioning (only useful for JavaScript)
    cCaseMode = 'i'         # i: case insensitive,  s: case sensitive,  u: uppercasing allowed
    cWordLimitLeft = '['    # [: word limit, <: no specific limit
    cWordLimitRight = ']'   # ]: word limit, >: no specific limit
    m = re.match("^__(?P<borders_and_case>[[<]\\w[]>])(?P<option>/[a-zA-Z0-9]+|)(?P<ruleid>\\(\\w+\\)|)(?P<priority>![0-9]|)__ *", s)    #### OPTIONS | 
| 
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356 | 
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
 | 
+
-
-
+
+
 | 
        print("# Unknown action at line " + sIdAction)
        return None
def _calcRulesStats (lRules):
    d = {'=':0, '~': 0, '-': 0, '>': 0}
    for aRule in lRules:
        if aRule[0] != "@@@@":
        for aAction in aRule[6]:
            d[aAction[1]] = d[aAction[1]] + 1
for aAction in aRule[6]:
                d[aAction[1]] = d[aAction[1]] + 1
    return (d, len(lRules))
def displayStats (lParagraphRules, lSentenceRules):
    print("  {:>18} {:>18} {:>18} {:>18}".format("DISAMBIGUATOR", "TEXT PROCESSOR", "GRAMMAR CHECKING", "REGEX"))
    d, nRule = _calcRulesStats(lParagraphRules)
    print("§ {:>10} actions {:>10} actions {:>10} actions  in {:>8} rules".format(d['='], d['~'], d['-'], nRule)) | 
| 
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450451
452
453454
455
456
457
458
459
460
461
462463
464
465
466
467
468
469
470471
472
473
474
475476
477
478
479
480
481
482
483
484 | 
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
 | 
-
-
+
+
+
-
+
-
+
+
+
+
-
+
+
+
+
-
+
+
+
-
+
+
 | 
    # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
    print("  parsing rules...")
    global dDEF
    lLine = []
    lRuleLine = []
    lTest = []
    lOpt = []
for i, sLine in enumerate(lRules, 1):
        if sLine.startswith('#END'):
            # arbitrary end
            printBookmark(0, "BREAK BY #END", i)
            break
        elif sLine.startswith("#"):
            # comment
            pass
        elif sLine.startswith("@@@@"):
            # rules graph call    zBookmark = re.compile("^!!+")    zGraphLink = re.compile(r"^@@@@GRAPHLINK>(\w+)@@@@")            m = re.match(r"m = re.match(r"@@@@GRAPH: *(\w+)@@@@", sLine.strip())
            if m:
                #lRuleLine.append(["@GRAPHLINK", m.group(1)])^@@@@GRAPHLINK>(\w+)@@@@", sLine.strip())                printBookmark(1, "@GRAPHprintBookmark(1, "@GRAPH: " + m.group(1), i)
                lRuleLine.append([i, "@@@@"+m.group(1)])
        elif sLine.startswith("DEF:"):
            # definition
            m = re.match("DEF: +([a-zA-Z_][a-zA-Z_0-9]*) +(.+)$", sLine.strip())
            if m:
                dDEF["{"+m.group(1)+"}"] = m.group(2)
            else:
                print("Error in definition: ", end="")
                print(sLine.strip())
        elif sLine.startswith("TEST:"):
            # testLINK: " + m.group(1), i)            lTest.append("lTest.append("r{:<7}".format(i) + "  " + sLine[5:].strip())
        elif sLine.startswith("TODO:"):
            # todo
            pass
        elif sLine.startswith(("OPTGROUP/", "OPTSOFTWARE:", "OPT/", "OPTLANG/", "OPTDEFAULTUILANG:", "OPTLABEL/", "OPTPRIORITY/")):
            # options
            lOpt.append(sLine)
        elif re.match("[  \t]*$", sLine):
            # empty line
            pass
        elif sLine.startswith("!!"):{:<8}".format(i) + "  " + sLine[5:].strip())            # bookmark
            m = re.match("!!+", sLine)
            nExMk = len(m.group(0))
            if sLine[nExMk:].strip():
                printBookmark(nExMk-2, sLine[nExMk:].strip(), i)
        elif sLine.startswith(("    ", "\t")):
            # rule (continuation)m = zBookmark.search(sLine)            lRuleLine[lRuleLine[-1][1] += " " + sLine.strip()
        else:
            # new rule
            lRuleLine.append([i, sLine.strip()])
    # generating options files
    print("  parsing options...")
    try:
        dOptions, dOptPriority = prepareOptions(lOpt)
    except:len(lRuleLine)-1][1] += " " + sLine.strip() | 
| 
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555 | 
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
 | 
-
-
+
+
 | 
    displayStats(lParagraphRules, lSentenceRules)
    print("Unnamed rules: " + str(nRULEWITHOUTNAME))
    d = { "callables": sPyCallables,
          "callablesJS": sJSCallables,
          "gctests": sGCTests,
          "gctestsJS": sGCTestsJS,
"regex_gctests": sGCTests,
          "regex_gctestsJS": sGCTestsJS,
          "paragraph_rules": mergeRulesByOption(lParagraphRules),
          "sentence_rules": mergeRulesByOption(lSentenceRules),
          "paragraph_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)),
          "sentence_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) }
    d.update(dOptions)
    return d |