Index: compile_rules.py ================================================================== --- compile_rules.py +++ compile_rules.py @@ -110,13 +110,21 @@ def createRule (s, nIdLine, sLang, bParagraph, dOptPriority): "returns rule as list [option name, regex, bCaseInsensitive, identifier, list of actions]" global dJSREGEXES global nRULEWITHOUTNAME - #### OPTIONS sLineId = str(nIdLine) + ("p" if bParagraph else "s") sRuleId = sLineId + + #### GRAPH CALL + if s.startswith("@@@@"): + if bParagraph: + print("Error. Graph call can’t be made only after the first pass (sentence by sentence)") + exit() + return ["@@@@", s[4:], sLineId] + + #### OPTIONS sOption = False # False or [a-z0-9]+ name nPriority = 4 # Default is 4, value must be between 0 and 9 tGroups = None # code for groups positioning (only useful for JavaScript) cCaseMode = 'i' # i: case insensitive, s: case sensitive, u: uppercasing allowed cWordLimitLeft = '[' # [: word limit, <: no specific limit @@ -343,12 +351,13 @@ def _calcRulesStats (lRules): d = {'=':0, '~': 0, '-': 0, '>': 0} for aRule in lRules: - for aAction in aRule[6]: - d[aAction[1]] = d[aAction[1]] + 1 + if aRule[0] != "@@@@": + for aAction in aRule[6]: + d[aAction[1]] = d[aAction[1]] + 1 return (d, len(lRules)) def displayStats (lParagraphRules, lSentenceRules): print(" {:>18} {:>18} {:>18} {:>18}".format("DISAMBIGUATOR", "TEXT PROCESSOR", "GRAMMAR CHECKING", "REGEX")) @@ -436,47 +445,57 @@ global dDEF lLine = [] lRuleLine = [] lTest = [] lOpt = [] - zBookmark = re.compile("^!!+") - zGraphLink = re.compile(r"^@@@@GRAPHLINK>(\w+)@@@@") for i, sLine in enumerate(lRules, 1): if sLine.startswith('#END'): + # arbitrary end printBookmark(0, "BREAK BY #END", i) break elif sLine.startswith("#"): + # comment pass elif sLine.startswith("@@@@"): - m = re.match(r"^@@@@GRAPHLINK>(\w+)@@@@", sLine.strip()) + # rules graph call + m = re.match(r"@@@@GRAPH: *(\w+)@@@@", sLine.strip()) if m: #lRuleLine.append(["@GRAPHLINK", m.group(1)]) - printBookmark(1, "@GRAPHLINK: " + m.group(1), i) + printBookmark(1, "@GRAPH: " + m.group(1), i) + lRuleLine.append([i, "@@@@"+m.group(1)]) elif sLine.startswith("DEF:"): + # definition m = re.match("DEF: +([a-zA-Z_][a-zA-Z_0-9]*) +(.+)$", sLine.strip()) if m: dDEF["{"+m.group(1)+"}"] = m.group(2) else: print("Error in definition: ", end="") print(sLine.strip()) elif sLine.startswith("TEST:"): + # test lTest.append("r{:<7}".format(i) + " " + sLine[5:].strip()) elif sLine.startswith("TODO:"): + # todo pass elif sLine.startswith(("OPTGROUP/", "OPTSOFTWARE:", "OPT/", "OPTLANG/", "OPTDEFAULTUILANG:", "OPTLABEL/", "OPTPRIORITY/")): + # options lOpt.append(sLine) elif re.match("[  \t]*$", sLine): + # empty line pass elif sLine.startswith("!!"): - m = zBookmark.search(sLine) + # bookmark + m = re.match("!!+", sLine) nExMk = len(m.group(0)) if sLine[nExMk:].strip(): printBookmark(nExMk-2, sLine[nExMk:].strip(), i) elif sLine.startswith((" ", "\t")): - lRuleLine[len(lRuleLine)-1][1] += " " + sLine.strip() + # rule (continuation) + lRuleLine[-1][1] += " " + sLine.strip() else: + # new rule lRuleLine.append([i, sLine.strip()]) # generating options files print(" parsing options...") try: Index: compile_rules_js_convert.py ================================================================== --- compile_rules_js_convert.py +++ compile_rules_js_convert.py @@ -117,10 +117,13 @@ return (sRegex, lNegLookBeforeRegex) def pyRuleToJS (lRule, dJSREGEXES, sWORDLIMITLEFT): lRuleJS = copy.deepcopy(lRule) + # graph rules + if lRuleJS[0] == "@@@@": + return lRuleJS del lRule[-1] # tGroups positioning codes are useless for Python # error messages for aAction in lRuleJS[6]: if aAction[1] == "-": aAction[2] = aAction[2].replace(" ", " ") # nbsp --> nnbsp @@ -132,25 +135,31 @@ def writeRulesToJSArray (lRules): sArray = "[\n" for sOption, aRuleGroup in lRules: - sArray += ' ["' + sOption + '", [\n' if sOption else " [false, [\n" - for sRegex, bCaseInsensitive, sLineId, sRuleId, nPriority, lActions, aGroups, aNegLookBehindRegex in aRuleGroup: - sArray += ' [' + sRegex + ", " - sArray += "true, " if bCaseInsensitive else "false, " - sArray += '"' + sLineId + '", ' - sArray += '"' + sRuleId + '", ' - sArray += str(nPriority) + ", " - sArray += json.dumps(lActions, ensure_ascii=False) + ", " - sArray += json.dumps(aGroups, ensure_ascii=False) + ", " - sArray += json.dumps(aNegLookBehindRegex, ensure_ascii=False) + "],\n" - sArray += " ]],\n" + if sOption != "@@@@": + sArray += ' ["' + sOption + '", [\n' if sOption else " [false, [\n" + for sRegex, bCaseInsensitive, sLineId, sRuleId, nPriority, lActions, aGroups, aNegLookBehindRegex in aRuleGroup: + sArray += ' [' + sRegex + ", " + sArray += "true, " if bCaseInsensitive else "false, " + sArray += '"' + sLineId + '", ' + sArray += '"' + sRuleId + '", ' + sArray += str(nPriority) + ", " + sArray += json.dumps(lActions, ensure_ascii=False) + ", " + sArray += json.dumps(aGroups, ensure_ascii=False) + ", " + sArray += json.dumps(aNegLookBehindRegex, ensure_ascii=False) + "],\n" + sArray += " ]],\n" + else: + sArray += ' ["' + sOption + '", [\n' + for sGraphName, sLineId in aRuleGroup: + sArray += ' ["' + sGraphName + '", "' + sLineId + '"],\n"' + sArray += " ]],\n" sArray += "]" return sArray def groupsPositioningCodeToList (sGroupsPositioningCode): if not sGroupsPositioningCode: return None return [ int(sCode) if sCode.isdigit() or (sCode[0:1] == "-" and sCode[1:].isdigit()) else sCode \ for sCode in sGroupsPositioningCode.split(",") ] Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -89,17 +89,18 @@ def _loadRules (): from . import gc_rules global _rules _rules = gc_rules # compile rules regex - for lRuleGroup in chain(_rules.lParagraphRules, _rules.lSentenceRules): - for rule in lRuleGroup[1]: - try: - rule[0] = re.compile(rule[0]) - except: - echo("Bad regular expression in # " + str(rule[2])) - rule[0] = "(?i)" + for sOption, lRuleGroup in chain(_rules.lParagraphRules, _rules.lSentenceRules): + if sOption != "@@@@": + for aRule in lRuleGroup: + try: + aRule[0] = re.compile(aRule[0]) + except: + echo("Bad regular expression in # " + str(aRule[2])) + aRule[0] = "(?i)" #### Parsing def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False): @@ -112,11 +113,11 @@ dOpt = _dOptions if not dOptions else dOptions bShowRuleId = option('idrule') # parse paragraph try: - sNew, aErrors = _proofread(sText, sRealText, 0, True, dDA, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext) + sNew, aErrors = _proofread(None, sText, sRealText, 0, True, dDA, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext) if sNew: sText = sNew except: raise @@ -133,21 +134,13 @@ # parse sentences for iStart, iEnd in _getSentenceBoundaries(sText): if 4 < (iEnd - iStart) < 2000: dDA.clear() try: - # regex parser - _, errs = _proofread(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dDA, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext) - aErrors.update(errs) - # token parser oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart) - bChange, errs = oSentence.parse(dAllGraph["test_graph"], dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext) + _, errs = _proofread(oSentence, sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dDA, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext) aErrors.update(errs) - if bChange: - oSentence.rewrite() - if bDebug: - print("~", oSentence.sSentence) except: raise return aErrors.values() # this is a view (iterable) @@ -160,15 +153,27 @@ for m in _zEndOfSentence.finditer(sText): yield (iStart, m.end()) iStart = m.end() -def _proofread (s, sx, nOffset, bParagraph, dDA, dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext): +def _proofread (oSentence, s, sx, nOffset, bParagraph, dDA, dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext): dErrs = {} bChange = False for sOption, lRuleGroup in _getRules(bParagraph): - if not sOption or dOptions.get(sOption, False): + if sOption == "@@@@": + # graph rules + for sGraphName, sLineId in lRuleGroup: + if bDebug: + print(sGraphName, sLineId) + bChange, errs = oSentence.parse(dAllGraph[sGraphName], dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext) + dErrs.update(errs) + if bChange: + oSentence.rewrite() + if bDebug: + print("~", oSentence.sSentence) + elif not sOption or dOptions.get(sOption, False): + # regex rules for zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions in lRuleGroup: if sRuleId not in _aIgnoredRules: for m in zRegex.finditer(s): bCondMemo = None for sFuncCond, cActionType, sWhat, *eAct in lActions: @@ -325,13 +330,14 @@ zFilter = re.compile(sFilter) except: echo("# Error. List rules: wrong regex.") sFilter = None for sOption, lRuleGroup in chain(_getRules(True), _getRules(False)): - for _, _, sLineId, sRuleId, _, _ in lRuleGroup: - if not sFilter or zFilter.search(sRuleId): - yield (sOption, sLineId, sRuleId) + if sOption != "@@@@": + for _, _, sLineId, sRuleId, _, _ in lRuleGroup: + if not sFilter or zFilter.search(sRuleId): + yield (sOption, sLineId, sRuleId) def displayRules (sFilter=None): echo("List of rules. Filter: << " + str(sFilter) + " >>") for sOption, sLineId, sRuleId in listRules(sFilter): Index: gc_lang/fr/rules.grx ================================================================== --- gc_lang/fr/rules.grx +++ gc_lang/fr/rules.grx @@ -12443,10 +12443,14 @@ TEST: quand elle {{rencontrât}} son créateur TEST: lorsqu’il y {{eût}} du grabuge, nous montâmes tous sur le pont. + +@@@@GRAPH: test_graph@@@@ + + !! !! !! !! @@ -16514,13 +16518,11 @@ TEST: Acaste, TEST: Clitandre, marquis TEST: Basque, valet de Célimène, TEST: Un garde de la maréchaussée de France, TEST: Dubois, valet d’Alceste. - TEST: La scène se passe à Paris, dans la maison de Célimène. - TEST: ACTE I TEST: SCÈNE PREMIÈRE. Philinte, Alceste. TEST: PHILINTE. Qu’est-ce donc ? Qu’avez-vous ? TEST: ALCESTE, assis. Laissez-moi, je vous prie. TEST: PHILINTE. Mais encor, dites-moi, quelle bizarrerie…