Grammalecte  Diff

Differences From Artifact [1ea2b6d97a]:

To Artifact [8c11a24e22]:


108
109
110
111
112
113
114
115
116
117









118
119
120
121
122
123
124
108
109
110
111
112
113
114

115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132







-


+
+
+
+
+
+
+
+
+









def createRule (s, nIdLine, sLang, bParagraph, dOptPriority):
    "returns rule as list [option name, regex, bCaseInsensitive, identifier, list of actions]"
    global dJSREGEXES
    global nRULEWITHOUTNAME

    #### OPTIONS
    sLineId = str(nIdLine) + ("p" if bParagraph else "s")
    sRuleId = sLineId

    #### GRAPH CALL
    if s.startswith("@@@@"):
        if bParagraph:
            print("Error. Graph call can’t be made only after the first pass (sentence by sentence)")
            exit()
        return ["@@@@", s[4:], sLineId]

    #### OPTIONS
    sOption = False         # False or [a-z0-9]+ name
    nPriority = 4           # Default is 4, value must be between 0 and 9
    tGroups = None          # code for groups positioning (only useful for JavaScript)
    cCaseMode = 'i'         # i: case insensitive,  s: case sensitive,  u: uppercasing allowed
    cWordLimitLeft = '['    # [: word limit, <: no specific limit
    cWordLimitRight = ']'   # ]: word limit, >: no specific limit
    m = re.match("^__(?P<borders_and_case>[[<]\\w[]>])(?P<option>/[a-zA-Z0-9]+|)(?P<ruleid>\\(\\w+\\)|)(?P<priority>![0-9]|)__ *", s)
341
342
343
344
345
346
347

348
349


350
351
352
353
354
355
356
349
350
351
352
353
354
355
356


357
358
359
360
361
362
363
364
365







+
-
-
+
+







        print("# Unknown action at line " + sIdAction)
        return None


def _calcRulesStats (lRules):
    d = {'=':0, '~': 0, '-': 0, '>': 0}
    for aRule in lRules:
        if aRule[0] != "@@@@":
        for aAction in aRule[6]:
            d[aAction[1]] = d[aAction[1]] + 1
            for aAction in aRule[6]:
                d[aAction[1]] = d[aAction[1]] + 1
    return (d, len(lRules))


def displayStats (lParagraphRules, lSentenceRules):
    print("  {:>18} {:>18} {:>18} {:>18}".format("DISAMBIGUATOR", "TEXT PROCESSOR", "GRAMMAR CHECKING", "REGEX"))
    d, nRule = _calcRulesStats(lParagraphRules)
    print("§ {:>10} actions {:>10} actions {:>10} actions  in {:>8} rules".format(d['='], d['~'], d['-'], nRule))
434
435
436
437
438
439
440
441
442
443
444
445

446
447
448

449
450

451

452
453
454


455

456
457
458
459
460
461
462

463

464

465
466

467
468

469
470
471


472
473
474
475

476

477

478
479
480
481
482
483
484
443
444
445
446
447
448
449


450
451
452
453
454
455
456
457
458
459
460

461
462
463

464
465
466
467
468
469
470
471
472
473
474
475

476
477
478
479
480
481
482
483
484
485
486

487
488
489
490
491
492
493

494
495
496
497
498
499
500
501
502
503







-
-



+



+


+
-
+


-
+
+

+







+
-
+

+


+


+


-
+
+




+
-
+

+







    # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
    print("  parsing rules...")
    global dDEF
    lLine = []
    lRuleLine = []
    lTest = []
    lOpt = []
    zBookmark = re.compile("^!!+")
    zGraphLink = re.compile(r"^@@@@GRAPHLINK>(\w+)@@@@")

    for i, sLine in enumerate(lRules, 1):
        if sLine.startswith('#END'):
            # arbitrary end
            printBookmark(0, "BREAK BY #END", i)
            break
        elif sLine.startswith("#"):
            # comment
            pass
        elif sLine.startswith("@@@@"):
            # rules graph call
            m = re.match(r"^@@@@GRAPHLINK>(\w+)@@@@", sLine.strip())
            m = re.match(r"@@@@GRAPH: *(\w+)@@@@", sLine.strip())
            if m:
                #lRuleLine.append(["@GRAPHLINK", m.group(1)])
                printBookmark(1, "@GRAPHLINK: " + m.group(1), i)
                printBookmark(1, "@GRAPH: " + m.group(1), i)
                lRuleLine.append([i, "@@@@"+m.group(1)])
        elif sLine.startswith("DEF:"):
            # definition
            m = re.match("DEF: +([a-zA-Z_][a-zA-Z_0-9]*) +(.+)$", sLine.strip())
            if m:
                dDEF["{"+m.group(1)+"}"] = m.group(2)
            else:
                print("Error in definition: ", end="")
                print(sLine.strip())
        elif sLine.startswith("TEST:"):
            # test
            lTest.append("{:<8}".format(i) + "  " + sLine[5:].strip())
            lTest.append("r{:<7}".format(i) + "  " + sLine[5:].strip())
        elif sLine.startswith("TODO:"):
            # todo
            pass
        elif sLine.startswith(("OPTGROUP/", "OPTSOFTWARE:", "OPT/", "OPTLANG/", "OPTDEFAULTUILANG:", "OPTLABEL/", "OPTPRIORITY/")):
            # options
            lOpt.append(sLine)
        elif re.match("[  \t]*$", sLine):
            # empty line
            pass
        elif sLine.startswith("!!"):
            m = zBookmark.search(sLine)
            # bookmark
            m = re.match("!!+", sLine)
            nExMk = len(m.group(0))
            if sLine[nExMk:].strip():
                printBookmark(nExMk-2, sLine[nExMk:].strip(), i)
        elif sLine.startswith(("    ", "\t")):
            # rule (continuation)
            lRuleLine[len(lRuleLine)-1][1] += " " + sLine.strip()
            lRuleLine[-1][1] += " " + sLine.strip()
        else:
            # new rule
            lRuleLine.append([i, sLine.strip()])

    # generating options files
    print("  parsing options...")
    try:
        dOptions, dOptPriority = prepareOptions(lOpt)
    except:
540
541
542
543
544
545
546
547
548


549
550
551
552
553
554
555
559
560
561
562
563
564
565


566
567
568
569
570
571
572
573
574







-
-
+
+








    displayStats(lParagraphRules, lSentenceRules)

    print("Unnamed rules: " + str(nRULEWITHOUTNAME))

    d = { "callables": sPyCallables,
          "callablesJS": sJSCallables,
          "gctests": sGCTests,
          "gctestsJS": sGCTestsJS,
          "regex_gctests": sGCTests,
          "regex_gctestsJS": sGCTestsJS,
          "paragraph_rules": mergeRulesByOption(lParagraphRules),
          "sentence_rules": mergeRulesByOption(lSentenceRules),
          "paragraph_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)),
          "sentence_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) }
    d.update(dOptions)

    return d