Overview
| Comment: | [build][core] graph calls within regex rules | 
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive | 
| Timelines: | family | ancestors | descendants | both | core | build | rg | 
| Files: | files | file ages | folders | 
| SHA3-256: | eeef098bd9f125077bc432ade4b12eed | 
| User & Date: | olr on 2018-06-11 13:21:42 | 
| Other Links: | branch diff | manifest | tags | 
Context
| 2018-06-11 | ||
| 18:20 | [misc] SublimeText syntax rules update check-in: cac524f6db user: olr tags: misc, rg | |
| 13:21 | [build][core] graph calls within regex rules check-in: eeef098bd9 user: olr tags: core, build, rg | |
| 09:26 | [build][core] named graphs check-in: 7e92a17d42 user: olr tags: core, build, rg | |
Changes
Modified compile_rules.py from [394c512707] to [8c11a24e22].
| ︙ | ︙ | |||
| 108 109 110 111 112 113 114 | 
def createRule (s, nIdLine, sLang, bParagraph, dOptPriority):
    "returns rule as list [option name, regex, bCaseInsensitive, identifier, list of actions]"
    global dJSREGEXES
    global nRULEWITHOUTNAME
 | < > > > > > > > > > | 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | 
def createRule (s, nIdLine, sLang, bParagraph, dOptPriority):
    "returns rule as list [option name, regex, bCaseInsensitive, identifier, list of actions]"
    global dJSREGEXES
    global nRULEWITHOUTNAME
    sLineId = str(nIdLine) + ("p" if bParagraph else "s")
    sRuleId = sLineId
    #### GRAPH CALL
    if s.startswith("@@@@"):
        if bParagraph:
            print("Error. Graph call can’t be made only after the first pass (sentence by sentence)")
            exit()
        return ["@@@@", s[4:], sLineId]
    #### OPTIONS
    sOption = False         # False or [a-z0-9]+ name
    nPriority = 4           # Default is 4, value must be between 0 and 9
    tGroups = None          # code for groups positioning (only useful for JavaScript)
    cCaseMode = 'i'         # i: case insensitive,  s: case sensitive,  u: uppercasing allowed
    cWordLimitLeft = '['    # [: word limit, <: no specific limit
    cWordLimitRight = ']'   # ]: word limit, >: no specific limit
    m = re.match("^__(?P<borders_and_case>[[<]\\w[]>])(?P<option>/[a-zA-Z0-9]+|)(?P<ruleid>\\(\\w+\\)|)(?P<priority>![0-9]|)__ *", s)
 | 
| ︙ | ︙ | |||
| 341 342 343 344 345 346 347 | 
        print("# Unknown action at line " + sIdAction)
        return None
def _calcRulesStats (lRules):
    d = {'=':0, '~': 0, '-': 0, '>': 0}
    for aRule in lRules:
 | > | | | 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 | 
        print("# Unknown action at line " + sIdAction)
        return None
def _calcRulesStats (lRules):
    d = {'=':0, '~': 0, '-': 0, '>': 0}
    for aRule in lRules:
        if aRule[0] != "@@@@":
            for aAction in aRule[6]:
                d[aAction[1]] = d[aAction[1]] + 1
    return (d, len(lRules))
def displayStats (lParagraphRules, lSentenceRules):
    print("  {:>18} {:>18} {:>18} {:>18}".format("DISAMBIGUATOR", "TEXT PROCESSOR", "GRAMMAR CHECKING", "REGEX"))
    d, nRule = _calcRulesStats(lParagraphRules)
    print("§ {:>10} actions {:>10} actions {:>10} actions  in {:>8} rules".format(d['='], d['~'], d['-'], nRule))
 | 
| ︙ | ︙ | |||
| 434 435 436 437 438 439 440 | 
    # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
    print("  parsing rules...")
    global dDEF
    lLine = []
    lRuleLine = []
    lTest = []
    lOpt = []
 | < < > > > | | > > > > > > | > > | > | 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 | 
    # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
    print("  parsing rules...")
    global dDEF
    lLine = []
    lRuleLine = []
    lTest = []
    lOpt = []
    for i, sLine in enumerate(lRules, 1):
        if sLine.startswith('#END'):
            # arbitrary end
            printBookmark(0, "BREAK BY #END", i)
            break
        elif sLine.startswith("#"):
            # comment
            pass
        elif sLine.startswith("@@@@"):
            # rules graph call
            m = re.match(r"@@@@GRAPH: *(\w+)@@@@", sLine.strip())
            if m:
                #lRuleLine.append(["@GRAPHLINK", m.group(1)])
                printBookmark(1, "@GRAPH: " + m.group(1), i)
                lRuleLine.append([i, "@@@@"+m.group(1)])
        elif sLine.startswith("DEF:"):
            # definition
            m = re.match("DEF: +([a-zA-Z_][a-zA-Z_0-9]*) +(.+)$", sLine.strip())
            if m:
                dDEF["{"+m.group(1)+"}"] = m.group(2)
            else:
                print("Error in definition: ", end="")
                print(sLine.strip())
        elif sLine.startswith("TEST:"):
            # test
            lTest.append("r{:<7}".format(i) + "  " + sLine[5:].strip())
        elif sLine.startswith("TODO:"):
            # todo
            pass
        elif sLine.startswith(("OPTGROUP/", "OPTSOFTWARE:", "OPT/", "OPTLANG/", "OPTDEFAULTUILANG:", "OPTLABEL/", "OPTPRIORITY/")):
            # options
            lOpt.append(sLine)
        elif re.match("[  \t]*$", sLine):
            # empty line
            pass
        elif sLine.startswith("!!"):
            # bookmark
            m = re.match("!!+", sLine)
            nExMk = len(m.group(0))
            if sLine[nExMk:].strip():
                printBookmark(nExMk-2, sLine[nExMk:].strip(), i)
        elif sLine.startswith(("    ", "\t")):
            # rule (continuation)
            lRuleLine[-1][1] += " " + sLine.strip()
        else:
            # new rule
            lRuleLine.append([i, sLine.strip()])
    # generating options files
    print("  parsing options...")
    try:
        dOptions, dOptPriority = prepareOptions(lOpt)
    except:
 | 
| ︙ | ︙ | 
Modified compile_rules_js_convert.py from [5ad87f3f46] to [f2cc9f3e39].
| ︙ | ︙ | |||
| 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 | 
    if not lNegLookBeforeRegex:
        lNegLookBeforeRegex = None
    return (sRegex, lNegLookBeforeRegex)
def pyRuleToJS (lRule, dJSREGEXES, sWORDLIMITLEFT):
    lRuleJS = copy.deepcopy(lRule)
    del lRule[-1] # tGroups positioning codes are useless for Python
    # error messages
    for aAction in lRuleJS[6]:
        if aAction[1] == "-":
            aAction[2] = aAction[2].replace(" ", " ") # nbsp --> nnbsp
            aAction[4] = aAction[4].replace("« ", "« ").replace(" »", " »").replace(" :", " :").replace(" :", " :")
    # js regexes
    lRuleJS[1], lNegLookBehindRegex = regex2js(dJSREGEXES.get(lRuleJS[3], lRuleJS[1]), sWORDLIMITLEFT)
    lRuleJS.append(lNegLookBehindRegex)
    return lRuleJS
def writeRulesToJSArray (lRules):
    sArray = "[\n"
    for sOption, aRuleGroup in lRules:
 | > > > > | | | | | | | | | | | > > > > > | 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | 
    if not lNegLookBeforeRegex:
        lNegLookBeforeRegex = None
    return (sRegex, lNegLookBeforeRegex)
def pyRuleToJS (lRule, dJSREGEXES, sWORDLIMITLEFT):
    lRuleJS = copy.deepcopy(lRule)
    # graph rules
    if lRuleJS[0] == "@@@@":
        return lRuleJS
    del lRule[-1] # tGroups positioning codes are useless for Python
    # error messages
    for aAction in lRuleJS[6]:
        if aAction[1] == "-":
            aAction[2] = aAction[2].replace(" ", " ") # nbsp --> nnbsp
            aAction[4] = aAction[4].replace("« ", "« ").replace(" »", " »").replace(" :", " :").replace(" :", " :")
    # js regexes
    lRuleJS[1], lNegLookBehindRegex = regex2js(dJSREGEXES.get(lRuleJS[3], lRuleJS[1]), sWORDLIMITLEFT)
    lRuleJS.append(lNegLookBehindRegex)
    return lRuleJS
def writeRulesToJSArray (lRules):
    sArray = "[\n"
    for sOption, aRuleGroup in lRules:
        if sOption != "@@@@":
            sArray += '  ["' + sOption + '", [\n'  if sOption  else  "  [false, [\n"
            for sRegex, bCaseInsensitive, sLineId, sRuleId, nPriority, lActions, aGroups, aNegLookBehindRegex in aRuleGroup:
                sArray += '    [' + sRegex + ", "
                sArray += "true, " if bCaseInsensitive  else "false, "
                sArray += '"' + sLineId + '", '
                sArray += '"' + sRuleId + '", '
                sArray += str(nPriority) + ", "
                sArray += json.dumps(lActions, ensure_ascii=False) + ", "
                sArray += json.dumps(aGroups, ensure_ascii=False) + ", "
                sArray += json.dumps(aNegLookBehindRegex, ensure_ascii=False) + "],\n"
            sArray += "  ]],\n"
        else:
            sArray += '  ["' + sOption + '", [\n'
            for sGraphName, sLineId in aRuleGroup:
                sArray += '    ["' + sGraphName + '", "' + sLineId + '"],\n"'
            sArray += "  ]],\n"
    sArray += "]"
    return sArray
def groupsPositioningCodeToList (sGroupsPositioningCode):
    if not sGroupsPositioningCode:
        return None
    return [ int(sCode)  if sCode.isdigit() or (sCode[0:1] == "-" and sCode[1:].isdigit())  else sCode \
             for sCode in sGroupsPositioningCode.split(",") ]
 | 
Modified gc_core/py/lang_core/gc_engine.py from [12b89317cd] to [0da090b219].
| ︙ | ︙ | |||
| 87 88 89 90 91 92 93 | 
def _loadRules ():
    from . import gc_rules
    global _rules
    _rules = gc_rules
    # compile rules regex
 | | > | | | | | | | < < < < | < < < < | > > > > > > > > > > > | > | 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 | 
def _loadRules ():
    from . import gc_rules
    global _rules
    _rules = gc_rules
    # compile rules regex
    for sOption, lRuleGroup in chain(_rules.lParagraphRules, _rules.lSentenceRules):
        if sOption != "@@@@":
            for aRule in lRuleGroup:
                try:
                    aRule[0] = re.compile(aRule[0])
                except:
                    echo("Bad regular expression in # " + str(aRule[2]))
                    aRule[0] = "(?i)<Grammalecte>"
#### Parsing
def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
    "analyses the paragraph sText and returns list of errors"
    #sText = unicodedata.normalize("NFC", sText)
    aErrors = None
    sRealText = sText
    dDA = {}        # Disambiguisator. Key = position; value = list of morphologies
    dPriority = {}  # Key = position; value = priority
    dOpt = _dOptions  if not dOptions  else dOptions
    bShowRuleId = option('idrule')
    # parse paragraph
    try:
        sNew, aErrors = _proofread(None, sText, sRealText, 0, True, dDA, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
        if sNew:
            sText = sNew
    except:
        raise
    # cleanup
    if " " in sText:
        sText = sText.replace(" ", ' ') # nbsp
    if " " in sText:
        sText = sText.replace(" ", ' ') # nnbsp
    if "'" in sText:
        sText = sText.replace("'", "’")
    if "‑" in sText:
        sText = sText.replace("‑", "-") # nobreakdash
    # parse sentences
    for iStart, iEnd in _getSentenceBoundaries(sText):
        if 4 < (iEnd - iStart) < 2000:
            dDA.clear()
            try:
                oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart)
                _, errs = _proofread(oSentence, sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dDA, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
                aErrors.update(errs)
            except:
                raise
    return aErrors.values() # this is a view (iterable)
_zEndOfSentence = re.compile(r'([.?!:;…][ .?!… »”")]*|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
_zEndOfParagraph = re.compile(r"\W*$")
def _getSentenceBoundaries (sText):
    iStart = _zBeginOfParagraph.match(sText).end()
    for m in _zEndOfSentence.finditer(sText):
        yield (iStart, m.end())
        iStart = m.end()
def _proofread (oSentence, s, sx, nOffset, bParagraph, dDA, dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext):
    dErrs = {}
    bChange = False
    for sOption, lRuleGroup in _getRules(bParagraph):
        if sOption == "@@@@":
            # graph rules
            for sGraphName, sLineId in lRuleGroup:
                if bDebug:
                    print(sGraphName, sLineId)
                bChange, errs = oSentence.parse(dAllGraph[sGraphName], dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext)
                dErrs.update(errs)
                if bChange:
                    oSentence.rewrite()
                    if bDebug:
                        print("~", oSentence.sSentence)
        elif not sOption or dOptions.get(sOption, False):
            # regex rules
            for zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions in lRuleGroup:
                if sRuleId not in _aIgnoredRules:
                    for m in zRegex.finditer(s):
                        bCondMemo = None
                        for sFuncCond, cActionType, sWhat, *eAct in lActions:
                            # action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ]
                            try:
 | 
| ︙ | ︙ | |||
| 323 324 325 326 327 328 329 | 
    if sFilter:
        try:
            zFilter = re.compile(sFilter)
        except:
            echo("# Error. List rules: wrong regex.")
            sFilter = None
    for sOption, lRuleGroup in chain(_getRules(True), _getRules(False)):
 | > | | | | 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 | 
    if sFilter:
        try:
            zFilter = re.compile(sFilter)
        except:
            echo("# Error. List rules: wrong regex.")
            sFilter = None
    for sOption, lRuleGroup in chain(_getRules(True), _getRules(False)):
        if sOption != "@@@@":
            for _, _, sLineId, sRuleId, _, _ in lRuleGroup:
                if not sFilter or zFilter.search(sRuleId):
                    yield (sOption, sLineId, sRuleId)
def displayRules (sFilter=None):
    echo("List of rules. Filter: << " + str(sFilter) + " >>")
    for sOption, sLineId, sRuleId in listRules(sFilter):
        echo("{:<10} {:<10} {}".format(sOption, sLineId, sRuleId))
 | 
| ︙ | ︙ | 
Modified gc_lang/fr/rules.grx from [bf0a5c739e] to [5101ac8a06].
| ︙ | ︙ | |||
| 12441 12442 12443 12444 12445 12446 12447 12448 12449 12450 12451 12452 12453 12454 | 
    -2>> =suggVerbMode(@, ":I", \1)
    # Après « quand » ou « lorsque », le verbe ne s’emploie pas au subjonctif mais à l’indicatif.
TEST: quand elle {{rencontrât}} son créateur
TEST: lorsqu’il y {{eût}} du grabuge, nous montâmes tous sur le pont.
!!
!!
!!
!!
!!
!!
 | > > > > | 12441 12442 12443 12444 12445 12446 12447 12448 12449 12450 12451 12452 12453 12454 12455 12456 12457 12458 | 
    -2>> =suggVerbMode(@, ":I", \1)
    # Après « quand » ou « lorsque », le verbe ne s’emploie pas au subjonctif mais à l’indicatif.
TEST: quand elle {{rencontrât}} son créateur
TEST: lorsqu’il y {{eût}} du grabuge, nous montâmes tous sur le pont.
@@@@GRAPH: test_graph@@@@
!!
!!
!!
!!
!!
!!
 | 
| ︙ | ︙ | |||
| 16512 16513 16514 16515 16516 16517 16518 | TEST: Éliante, cousine de Célimène, TEST: Arsinoé, amie de Célimène, TEST: Acaste, TEST: Clitandre, marquis TEST: Basque, valet de Célimène, TEST: Un garde de la maréchaussée de France, TEST: Dubois, valet d’Alceste. | < < | 16516 16517 16518 16519 16520 16521 16522 16523 16524 16525 16526 16527 16528 16529 16530 | TEST: Éliante, cousine de Célimène, TEST: Arsinoé, amie de Célimène, TEST: Acaste, TEST: Clitandre, marquis TEST: Basque, valet de Célimène, TEST: Un garde de la maréchaussée de France, TEST: Dubois, valet d’Alceste. TEST: La scène se passe à Paris, dans la maison de Célimène. TEST: ACTE I TEST: SCÈNE PREMIÈRE. Philinte, Alceste. TEST: PHILINTE. Qu’est-ce donc ? Qu’avez-vous ? TEST: ALCESTE, assis. Laissez-moi, je vous prie. TEST: PHILINTE. Mais encor, dites-moi, quelle bizarrerie… TEST: ALCESTE. Laissez-moi là, vous dis-je, et courez vous cacher. TEST: PHILINTE. Mais on entend les gens au moins sans se fâcher. | 
| ︙ | ︙ |