Grammalecte: compile_rules_graph.py at [18e5cd0473]

File compile_rules_graph.py artifact 523cbe02bb part of check-in 18e5cd0473

"""
Grammalecte: compile rules
Create a Direct Acyclic Rule Graphs (DARGs)
"""

import re
import os
import time
import concurrent.futures

import darg
import compile_rules_js_convert as jsconv
import helpers
import graphspell


#### PROCESS POOL EXECUTOR ####
xProcessPoolExecutor = None

def initProcessPoolExecutor (nMultiCPU=None):
    "process pool executor initialisation"
    global xProcessPoolExecutor
    if xProcessPoolExecutor:
        # we shutdown the ProcessPoolExecutor which may have been launched previously
        print("  ProcessPoolExecutor shutdown.")
        xProcessPoolExecutor.shutdown(wait=False)
    nMaxCPU = max(os.cpu_count()-1, 1)
    if nMultiCPU is None or not (1 <= nMultiCPU <= nMaxCPU):
        nMultiCPU = nMaxCPU
    print("  CPU processes used for workers: ", nMultiCPU)
    xProcessPoolExecutor = concurrent.futures.ProcessPoolExecutor(max_workers=nMultiCPU)


def rewriteCode (sCode):
    "convert simple code syntax to a string of Python code"
    if sCode[0:1] == "=":
        sCode = sCode[1:]
    sCode = sCode.replace("__also__", "bCondMemo")
    sCode = sCode.replace("__else__", "not bCondMemo")
    sCode = sCode.replace("sContext", "_sAppContext")
    sCode = re.sub(r"\b(morph[0x]?|morphVC|value|tag|meta|info)[(]\\(\d+)", 'g_\\1(lToken[nTokenOffset+\\2]', sCode)
    sCode = re.sub(r"\b(morph[0x]?|morphVC|value|tag|meta|info)[(]\\-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1]', sCode)
    sCode = re.sub(r"\b(select|define|definefrom|rewrite|addmorph|setmeta)[(][\\](\d+)", 'g_\\1(lToken[nTokenOffset+\\2]', sCode)
    sCode = re.sub(r"\b(select|define|definefrom|rewrite|addmorph|setmeta)[(][\\]-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1]', sCode)
    sCode = re.sub(r"\b(agreement|suggAgree)[(][\\](\d+), *[\\](\d+)", 'g_\\1(lToken[nTokenOffset+\\2], lToken[nTokenOffset+\\3]', sCode)
    sCode = re.sub(r"\b(agreement|suggAgree)[(][\\](\d+), *[\\]-(\d+)", 'g_\\1(lToken[nTokenOffset+\\2], lToken[nLastToken-\\3+1]', sCode)
    sCode = re.sub(r"\b(agreement|suggAgree)[(][\\]-(\d+), *[\\](\d+)", 'g_\\1(lToken[nLastToken-\\2+1], lToken[nTokenOffset+\\3]', sCode)
    sCode = re.sub(r"\b(agreement|suggAgree)[(][\\]-(\d+), *[\\]-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1], lToken[nLastToken-\\3+1]', sCode)
    sCode = re.sub(r"\b(tagbefore|tagafter)[(][\\](\d+)", 'g_\\1(lToken[nTokenOffset+\\2], dTags', sCode)
    sCode = re.sub(r"\b(tagbefore|tagafter)[(][\\]-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1], dTags', sCode)
    sCode = re.sub(r"\bspace[(][\\](\d+)", 'g_space(lToken[nTokenOffset+\\1], lToken[nTokenOffset+\\1+1]', sCode)
    sCode = re.sub(r"\bspace[(][\\]-(\d+)", 'g_space(lToken[nLastToken-\\1+1], lToken[nLastToken-\\1+2]', sCode)
    sCode = re.sub(r"\bmorph2[(][\\](\d+)", 'g_morph2(lToken[nTokenOffset+\\1], lToken[nTokenOffset+\\1+1]', sCode)
    sCode = re.sub(r"\bmorph2[(][\\]-(\d+)", 'g_morph2(lToken[nLastToken-\\1+1], lToken[nLastToken-\\1+2]', sCode)
    sCode = re.sub(r"\b(morph0?|tag|meta|value|info)\(>1", 'g_\\1(lToken[nLastToken+1]', sCode)                      # next token
    sCode = re.sub(r"\b(morph0?|tag|meta|value|info)\(<1", 'g_\\1(lToken[nTokenOffset]', sCode)                      # previous token
    sCode = re.sub(r"\b(morph0?|tag|meta|value|info)\(>(\d+)", 'g_\\1(g_token(lToken, nLastToken+\\2)', sCode)       # next token
    sCode = re.sub(r"\b(morph0?|tag|meta|value|info)\(<(\d+)", 'g_\\1(g_token(lToken, nTokenOffset+1-\\2)', sCode)   # previous token
    sCode = re.sub(r"\bspace[(](>1)", 'g_space(lToken[nLastToken+1], g_token(lToken, nLastToken+2)', sCode)     # next token
    sCode = re.sub(r"\bspace[(](<1)", 'g_space(lToken[nTokenOffset], lToken[nTokenOffset+1]', sCode)            # previous token
    sCode = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', sCode)
    sCode = re.sub(r"\bbefore\(\s*", 'look(sSentence[:lToken[1+nTokenOffset]["nStart"]], ', sCode)          # before(sCode)
    sCode = re.sub(r"\bafter\(\s*", 'look(sSentence[lToken[nLastToken]["nEnd"]:], ', sCode)                 # after(sCode)
    sCode = re.sub(r"\bbefore0\(\s*", 'look(sSentence0[:lToken[1+nTokenOffset]["nStart"]], ', sCode)        # before0(sCode)
    sCode = re.sub(r"\bafter0\(\s*", 'look(sSentence0[lToken[nLastToken]["nEnd"]:], ', sCode)               # after0(sCode)
    sCode = re.sub(r"[\\](\d+)", 'lToken[nTokenOffset+\\1]["sValue"]', sCode)
    sCode = re.sub(r"[\\]-(\d+)", 'lToken[nLastToken-\\1+1]["sValue"]', sCode)
    sCode = re.sub(r">1", 'lToken[nLastToken+1]["sValue"]', sCode)
    sCode = re.sub(r"<1", 'lToken[nTokenOffset]["sValue"]', sCode)
    return sCode


def changeReferenceToken (sText, dPos):
    "change group reference in <sText> with values in <dPos>"
    if "\\" not in sText:
        return sText
    for i in range(len(dPos), 0, -1):
        sText = re.sub("\\\\"+str(i)+"(?![0-9])", "\\\\"+str(dPos[i]), sText)
    return sText


def checkTokenNumbers (sText, sActionId, nToken):
    "check if token references in <sText> greater than <nToken> (debugging)"
    for x in re.finditer(r"\\(\d+)", sText):
        if int(x.group(1)) > nToken:
            print("# Error in token index at line " + sActionId + " ("+str(nToken)+" tokens only)")
            print(sText)


def checkIfThereIsCode (sText, sActionId):
    "check if there is code in <sText> (debugging)"
    if re.search(r"[.]\w+[(]|sugg\w+[(]|\(\\[0-9]|\[(?:[0-9]:|:)", sText):
        print("# Warning at line " + sActionId + ":  This message looks like code. Line should probably begin with =")
        print(sText)



class GraphBuilder:

    def __init__ (self, sGraphName, sGraphCode, sLang, dDef, dDecl, dOptPriority):
        self.sGraphName = sGraphName
        self.sGraphCode = sGraphCode
        self.sLang = sLang
        self.dDef = dDef
        self.dDecl = dDecl
        self.dOptPriority = dOptPriority
        self.dAntiPatterns = {}
        self.dActions = {}
        self.dFuncName = {}
        self.dFunctions = {}
        self.dLemmas = {}

    def createGraphAndActions (self, lRuleLine):
        "create a graph as a dictionary with <lRuleLine>"
        fStartTimer = time.time()
        print("{:>8,} rules in {:<30} ".format(len(lRuleLine), f"<{self.sGraphName}|{self.sGraphCode}>"), end="")
        lPreparedRule = []
        for i, sRuleName, sTokenLine, iActionBlock, lActions, nPriority in lRuleLine:
            for aRule in self.createRule(i, sRuleName, sTokenLine, iActionBlock, lActions, nPriority):
                lPreparedRule.append(aRule)
        # Debugging
        if False:
            print("\nRULES:")
            for e in lPreparedRule:
                if e[-2] == "##2211":
                    print(e)
        # Graph creation
        oDARG = darg.DARG(lPreparedRule, self.sLang)
        dGraph = oDARG.createGraph()
        print(oDARG, end="")
        # debugging
        if False:
            print("\nGRAPH:", self.sGraphName)
            for k, v in dGraph.items():
                print(k, "\t", v)
        print("\tin {:>8.2f} s".format(time.time()-fStartTimer))
        sPyCallables, sJSCallables = self.createCallables()
        return dGraph, self.dActions, sPyCallables, sJSCallables, self.dLemmas

    def _genTokenLines (self, sTokenLine):
        "tokenize a string and return a list of lines of tokens"
        lTokenLines = []
        nFirstNullable = 0
        nLastNullable = 0
        for n, sTokBlock in enumerate(sTokenLine.split(), 1):
            # replace merger characters by spaces
            if "␣" in sTokBlock:
                sTokBlock = sTokBlock.replace("␣", " ")
            # optional token?
            bNullPossible = sTokBlock.startswith("?") and sTokBlock.endswith("¿")
            if bNullPossible:
                sTokBlock = sTokBlock[1:-1]
                if nFirstNullable == 0:
                    nFirstNullable = n
                nLastNullable = n
            # token with definition?
            if sTokBlock.startswith("(") and sTokBlock.endswith(")"):
                nFirstNullable = -1
            if sTokBlock.startswith("({") and sTokBlock.endswith("})") and sTokBlock[1:-1] in self.dDef:
                sTokBlock = "(" + self.dDef[sTokBlock[1:-1]] + ")"
            elif sTokBlock.startswith("{") and sTokBlock.endswith("}") and sTokBlock in self.dDef:
                sTokBlock = self.dDef[sTokBlock]
            if ( (sTokBlock.startswith("[") and sTokBlock.endswith("]")) or (sTokBlock.startswith("([") and sTokBlock.endswith("])")) ):
                # multiple token
                bSelectedGroup = sTokBlock.startswith("(") and sTokBlock.endswith(")")
                if bSelectedGroup:
                    sTokBlock = sTokBlock[1:-1]
                lToken = self._createTokenList(sTokBlock)
                if not lTokenLines:
                    lTokenLines = [ ["("+s+")"]  for s  in lToken ]  if bSelectedGroup  else [ [s]  for s  in lToken ]
                    if bNullPossible:
                        lTokenLines.extend([ []  for i  in range(len(lToken)+1) ])
                else:
                    lNewTemp = []
                    if bNullPossible:
                        for aRule in lTokenLines:
                            for sElem in lToken:
                                aNewRule = list(aRule)
                                aNewRule.append(sElem)
                                lNewTemp.append(aNewRule)
                    else:
                        sElem1 = lToken.pop(0)
                        for aRule in lTokenLines:
                            for sElem in lToken:
                                aNewRule = list(aRule)
                                aNewRule.append("(" + sElem + ")"  if bSelectedGroup  else sElem)
                                lNewTemp.append(aNewRule)
                            aRule.append("(" + sElem1 + ")"  if bSelectedGroup  else sElem1)
                    lTokenLines.extend(lNewTemp)
            else:
                # simple token
                if not lTokenLines:
                    lTokenLines = [[sTokBlock], []]  if bNullPossible  else [[sTokBlock]]
                else:
                    if bNullPossible:
                        lNewTemp = []
                        for aRule in lTokenLines:
                            lNew = list(aRule)
                            lNew.append(sTokBlock)
                            lNewTemp.append(lNew)
                        lTokenLines.extend(lNewTemp)
                    else:
                        for aRule in lTokenLines:
                            aRule.append(sTokBlock)
        nLastNullable = nLastNullable - n - 1
        for aRule in lTokenLines:
            yield aRule, nFirstNullable, nLastNullable

    def _createTokenList (self, sTokBlock):
        "return a list of tokens from a block of tokens"
        lToken = []
        for sToken in sTokBlock[1:-1].split("|"):
            if "+" in sToken and not sToken.startswith("+"):
                for sCode in self.dDecl:
                    if sToken.endswith(sCode):
                        sToken = sToken[:-len(sCode)]
                        lToken.append(sToken)
                        for sSuffix in self.dDecl[sCode]:
                            lToken.append(sToken+sSuffix)
                        break
            else:
                lToken.append(sToken)
        return lToken

    def createRule (self, iLine, sRuleName, sTokenLine, iActionBlock, lActions, nPriority):
        "generator: create rule as list"
        # print(iLine, "//", sRuleName, "//", sTokenLine, "//", lActions, "//", nPriority)
        if sTokenLine.startswith("!!") and sTokenLine.endswith("¡¡"):
            # antipattern
            sTokenLine = sTokenLine[2:-2].strip()
            if sRuleName not in self.dAntiPatterns:
                self.dAntiPatterns[sRuleName]= []
            for lToken, _, _ in self._genTokenLines(sTokenLine):
                self.dAntiPatterns[sRuleName].append(lToken)
        else:
            # pattern
            for lToken, nFirstNullable, nLastNullable in self._genTokenLines(sTokenLine):
                if sRuleName in self.dAntiPatterns and lToken in self.dAntiPatterns[sRuleName]:
                    # <lToken> matches an antipattern -> discard
                    continue
                # Calculate positions
                dPos = {}   # key: iGroup, value: iToken
                iGroup = 0
                #if iLine == 15818: # debug
                #    print(" ".join(lToken))
                for i, sToken in enumerate(lToken):
                    if sToken.startswith("(") and sToken.endswith(")"):
                        lToken[i] = sToken[1:-1]
                        iGroup += 1
                        dPos[iGroup] = i + 1    # we add 1, for we count tokens from 1 to n (not from 0)
                    # check lemmas
                    if sToken.startswith(">") and sToken != ">" and sToken[1:] not in self.dLemmas:
                        self.dLemmas[sToken[1:]] = iLine
                # Parse actions
                for iAction, (iActionLine, sAction) in enumerate(lActions, 1):
                    sAction = sAction.strip()
                    if sAction:
                        sActionId = f"{self.sGraphCode}__{sRuleName}__b{iActionBlock}_a{iAction}"
                        aAction = self.createAction(sActionId, sAction, nPriority, len(lToken), dPos, iActionLine, nFirstNullable, nLastNullable)
                        if aAction:
                            sActionName = self.storeAction(sActionId, aAction)
                            lResult = list(lToken)
                            lResult.extend(["##"+str(iLine), sActionName])
                            #if iLine == 13341:
                            #    print("  ".join(lToken))
                            #    print(sActionId, aAction)
                            yield lResult
                        else:
                            print("# Error on action at line:", iLine)
                            print(sTokenLine, "\n", lActions)
                            exit()
                    else:
                        print("No action found for ", iActionLine)
                        exit()

    def createAction (self, sActionId, sAction, nPriority, nToken, dPos, iActionLine, nFirstNullable, nLastNullable):
        "create action rule as a list"
        sLineId = "#" + str(iActionLine)

        # Option
        sOption = False
        m = re.match("/(\\w+)/", sAction)
        if m:
            sOption = m.group(1)
            sAction = sAction[m.end():].strip()
        if nPriority == -1:
            nPriority = self.dOptPriority.get(sOption, 4)

        # valid action?
        m = re.search(r"(?P<action>[-=~/!>&])(?P<start>-?\d+\.?|)(?P<end>:\.?-?\d+|)(?P<casing>:|)>>", sAction)
        if not m:
            print("\n# Error. No action found at: ", sLineId, sActionId)
            exit()

        # Condition
        sCondition = sAction[:m.start()].strip()
        if sCondition:
            sCondition = changeReferenceToken(sCondition, dPos)
            sCondition = self.createFunction("cond", sCondition)
        else:
            sCondition = ""

        # Case sensitivity
        bCaseSensitivity = not bool(m.group("casing"))

        # Action
        cAction = m.group("action")
        sAction = sAction[m.end():].strip()
        sAction = changeReferenceToken(sAction, dPos)

        # target
        cStartLimit = "<"
        cEndLimit = ">"
        if not m.group("start"):
            iStartAction = 1
            iEndAction = 0
        else:
            if cAction != "-" and (m.group("start").endswith(".") or m.group("end").startswith(":.")):
                print("\n# Error. Wrong selection on tokens at: ", sLineId ,sActionId)
                return None
            if m.group("start").endswith("."):
                cStartLimit = ">"
            iStartAction = int(m.group("start").rstrip("."))
            if not m.group("end"):
                iEndAction = iStartAction
            else:
                if m.group("end").startswith(":."):
                    cEndLimit = "<"
                iEndAction = int(m.group("end").lstrip(":."))
        if dPos and m.group("start"):
            iStartAction = dPos.get(iStartAction, iStartAction)
            if iEndAction:
                iEndAction = dPos.get(iEndAction, iEndAction)
        if iStartAction < 0:
            iStartAction += 1
        if iEndAction < 0:
            iEndAction += 1

        # check target
        if nFirstNullable > -1:
            if nFirstNullable > 0 and iStartAction > 0 and iEndAction != 0 and (iStartAction > nFirstNullable or iStartAction == nFirstNullable == iEndAction):
                print(f"# Error. At {sLineId}, {sActionId}, target start is bigger than first nullable token.")
            if nFirstNullable > 0 and iEndAction > 0 and iStartAction != 1 and (iEndAction > nFirstNullable or iStartAction == nFirstNullable == iEndAction):
                print(f"# Error. At {sLineId}, {sActionId}, target end is bigger than first nullable token.")
            if nLastNullable < 0 and iStartAction < 0 and iEndAction != 0 and ((iStartAction-1) < nLastNullable or (iStartAction-1) == nFirstNullable == (iEndAction-1)):
                print(f"# Error. At {sLineId}, {sActionId}, target start is lower than last nullable token.")
            if nLastNullable < 0 and iEndAction < 0 and iStartAction != 1 and ((iEndAction-1) < nLastNullable or (iStartAction-1) == nFirstNullable == (iEndAction-1)):
                print(f"# Error. At {sLineId}, {sActionId}, target end is lower than last nullable token.")

        if cAction == "-":
            ## error
            iMsg = sAction.find(" && ")
            if iMsg == -1:
                print("\n# Error. No message at: ", sLineId, sActionId)
                exit()
            else:
                sMsg = sAction[iMsg+4:].strip()
                sAction = sAction[:iMsg].strip()
                sURL = ""
                mURL = re.search("[|] *(https?://.*)", sMsg)
                if mURL:
                    sURL = mURL.group(1).strip()
                    sMsg = sMsg[:mURL.start(0)].strip()
                checkTokenNumbers(sMsg, sActionId, nToken)      # check tokens in message
                if sMsg[0:1] == "=":
                    sMsg = self.createFunction("msg", sMsg, True)
                else:
                    checkIfThereIsCode(sMsg, sActionId)

        # checking token consistancy
        checkTokenNumbers(sCondition, sActionId, nToken)    # check tokens in condition
        checkTokenNumbers(sAction, sActionId, nToken)       # check tokens in action

        if cAction == ">":
            ## no action, break loop if condition is False
            return [sLineId, sOption, sCondition, cAction, ""]

        if not sAction and cAction not in "!#":
            print(f"\n# Error in action at line <{sLineId}/{sActionId}>:  This action is empty.")
            exit()

        if sAction[0:1] != "=" and cAction != "=":
            checkIfThereIsCode(sAction, sActionId)

        if cAction == "-":
            ## error detected --> suggestion
            if sAction[0:1] == "=":
                sAction = self.createFunction("sugg", sAction, True)
            elif sAction.startswith('"') and sAction.endswith('"'):
                sAction = sAction[1:-1]
            if not sMsg:
                print(f"\n# Error in action at line <{sLineId}/{sActionId}>:  The message is empty.")
                exit()
            return [sLineId, sOption, sCondition, cAction, sAction, iStartAction, iEndAction, cStartLimit, cEndLimit, bCaseSensitivity, nPriority, sMsg, sURL]
        if cAction == "~":
            ## text processor
            if sAction[0:1] == "=":
                sAction = self.createFunction("tp", sAction, True)
            elif sAction.startswith('"') and sAction.endswith('"'):
                sAction = sAction[1:-1]
            elif sAction not in "␣*_":
                nToken = sAction.count("|") + 1
                if iStartAction > 0 and iEndAction > 0:
                    if (iEndAction - iStartAction + 1) != nToken:
                        print(f"\n# Error in action at line <{sLineId}/{sActionId}>: numbers of modified tokens modified.")
                elif iStartAction < 0 or iEndAction < 0 and iStartAction != iEndAction:
                    print(f"\n# Warning in action at line <{sLineId}/{sActionId}>: rewriting with possible token position modified.")
            return [sLineId, sOption, sCondition, cAction, sAction, iStartAction, iEndAction, bCaseSensitivity]
        if cAction in "!/&":
            ## tags
            return [sLineId, sOption, sCondition, cAction, sAction, iStartAction, iEndAction]
        if cAction == "=":
            ## disambiguator
            sAction = self.createFunction("da", sAction)
            return [sLineId, sOption, sCondition, cAction, sAction]
        print("\n# Unknown action at ", sLineId, sActionId)
        return None

    def storeAction (self, sActionId, aAction):
        "store <aAction> in <self.dActions> avoiding duplicates and return action name"
        nVar = 1
        while True:
            sActionName = sActionId + "_" + str(nVar)
            if sActionName not in self.dActions:
                self.dActions[sActionName] = aAction
                return sActionName
            if aAction == self.dActions[sActionName]:
                return sActionName
            nVar += 1

    def showActions (self):
        "debugging function"
        print("\nActions:")
        for sActionName, aAction in oFunctionManager.dActions.items():
            print(sActionName, aAction)

    def createFunction (self, sType, sCode, bStartWithEqual=False):
        "create a function (stored in <self.dFunctions>) and return function name"
        sCode = rewriteCode(sCode)
        sFuncName = self._getNameForCode(sType, sCode)
        self.dFunctions[sFuncName] = sCode
        return sFuncName  if not bStartWithEqual  else "="+sFuncName

    def _getNameForCode (self, sType, sCode):
        "create and get a name for a code"
        if sType not in self.dFuncName:
            self.dFuncName[sType] = {}
        if sCode not in self.dFuncName[sType]:
            self.dFuncName[sType][sCode] = len(self.dFuncName[sType])+1
        return "_g_" + sType + "_" + self.sGraphCode + "_" + str(self.dFuncName[sType][sCode])

    def createCallables (self):
        "return callables for Python and JavaScript"
        sPyCallables = ""
        sJSCallables = ""
        for sFuncName, sReturn in self.dFunctions.items():
            if sFuncName.startswith("_g_cond_"): # condition
                sParams = "lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, dTags, sSentence, sSentence0"
            elif sFuncName.startswith("_g_msg_"): # message
                sParams = "lToken, nTokenOffset, nLastToken"
            elif sFuncName.startswith("_g_sugg_"): # suggestion
                sParams = "lToken, nTokenOffset, nLastToken"
            elif sFuncName.startswith("_g_tp_"): # text preprocessor
                sParams = "lToken, nTokenOffset, nLastToken"
            elif sFuncName.startswith("_g_da_"): # disambiguator
                sParams = "lToken, nTokenOffset, nLastToken"
            else:
                print("# Unknown function type in [" + sFuncName + "]")
                continue
            # Python
            sPyCallables += f"def {sFuncName} ({sParams}):\n"
            sPyCallables += f"    return {sReturn}\n"
            # JavaScript
            sJSCallables += f"    {sFuncName}: function ({sParams}) {{\n"
            sJSCallables += "        return " + jsconv.py2js(sReturn) + ";\n"
            sJSCallables += "    },\n"
        return sPyCallables, sJSCallables


def processing (sGraphName, sGraphCode, sLang, lRuleLine, dDef, dDecl, dOptPriority):
    "to be run in a separate process"
    oGraphBuilder = GraphBuilder(sGraphName, sGraphCode, sLang, dDef, dDecl, dOptPriority)
    dGraph, dActions, sPy, sJS, dLemmas = oGraphBuilder.createGraphAndActions(lRuleLine)
    return (sGraphName, dGraph, dActions, sPy, sJS, dLemmas)


def make (lRule, sLang, dDef, dDecl, dOptPriority):
    "compile rules, returns a dictionary of values"
    # for clarity purpose, don’t create any file here

    # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
    print("  parsing graph rules...")
    lTokenLine = []
    lActions = []
    bActionBlock = False
    nPriority = -1
    dAllGraph = {}
    dGraphCode = {}
    sGraphName = ""
    iActionBlock = 0
    aRuleName = set()
    oDictionary = graphspell.SpellChecker("fr")

    for iLine, sLine in lRule:
        sLine = sLine.rstrip()
        if "\t" in sLine:
            # tabulation not allowed
            print("# Error. Tabulation at line: ", iLine)
            exit()
        elif sLine.startswith("@@@@GRAPH: "):
            # rules graph call
            m = re.match(r"@@@@GRAPH: *(\w+) *[|] *(\w+)", sLine.strip())
            if m:
                sGraphName = m.group(1)
                sGraphCode = m.group(2)
                if sGraphName in dAllGraph or sGraphCode in dGraphCode:
                    print(f"# Error at line {iLine}. Graph name <{sGraphName}> or graph code <{sGraphCode}> already exists.")
                    exit()
                dAllGraph[sGraphName] = []
                dGraphCode[sGraphName] = sGraphCode
            else:
                print("# Error. Graph name not found at line", iLine)
                exit()
        elif sLine.startswith("__") and sLine.endswith("__"):
            # new rule group
            m = re.match("__(\\w+)(!\\d|)__", sLine)
            if m:
                sRuleName = m.group(1)
                if sRuleName in aRuleName:
                    print(f"# Error at line {iLine}. Rule name <{sRuleName}> already exists.")
                    exit()
                aRuleName.add(sRuleName)
                iActionBlock = 1
                nPriority = int(m.group(2)[1:]) if m.group(2)  else -1
            else:
                print("# Syntax error in rule group: ", sLine, " -- line:", iLine)
                exit()
        elif re.match("    \\S", sLine):
            # tokens line
            lTokenLine.append([iLine, sLine.strip()])
        elif sLine.startswith("        ||"):
            # tokens line continuation
            iPrevLine, sPrevLine = lTokenLine[-1]
            lTokenLine[-1] = [iPrevLine, sPrevLine + " " + sLine.strip()[2:]]
        elif sLine.startswith("        <<- "):
            # actions
            lActions.append([iLine, sLine[12:].strip()])
            if not re.search(r"[-=~/!>&](?:-?\d\.?(?::\.?-?\d+|)|):?>>", sLine):
                bActionBlock = True
        elif sLine.startswith("        && "):
            # action message
            iPrevLine, sPrevLine = lActions[-1]
            lActions[-1] = [iPrevLine, sPrevLine + sLine]
        elif sLine.startswith("        ") and bActionBlock:
            # action line continuation
            iPrevLine, sPrevLine = lActions[-1]
            lActions[-1] = [iPrevLine, sPrevLine + " " + sLine.strip()]
            if re.search(r"[-=~/!>&](?:-?\d\.?(?::\.?-?\d+|)|):?>>", sLine):
                bActionBlock = False
        elif re.match("[  ]*$", sLine):
            # empty line to end merging
            if not lTokenLine:
                continue
            if bActionBlock or not lActions:
                print("# Error. No action found at line:", iLine)
                print(bActionBlock, lActions)
                exit()
            if not sGraphName:
                print("# Error. All rules must belong to a named graph. Line: ", iLine)
                exit()
            for j, sTokenLine in lTokenLine:
                dAllGraph[sGraphName].append((j, sRuleName, sTokenLine, iActionBlock, list(lActions), nPriority))
            lTokenLine.clear()
            lActions.clear()
            iActionBlock += 1
        else:
            print("# Unknown line at:", iLine)
            print(sLine)
            exit()

    # processing rules
    print("  processing graph rules...")
    initProcessPoolExecutor(len(dAllGraph))
    fStartTimer = time.time()
    # build graph
    lResult = []
    nRule = 0
    for sGraphName, lRuleLine in dAllGraph.items():
        nRule += len(lRuleLine)
        try:
            xFuture = xProcessPoolExecutor.submit(processing, sGraphName, dGraphCode[sGraphName], sLang, lRuleLine, dDef, dDecl, dOptPriority)
            lResult.append(xFuture)
        except (concurrent.futures.TimeoutError, concurrent.futures.CancelledError):
            return "Analysis aborted (time out or cancelled)"
        except concurrent.futures.BrokenExecutor:
            return "Executor broken. The server failed."
    # merging results
    xProcessPoolExecutor.shutdown(wait=True) # waiting that everything is finished
    dAllActions = {}
    sPyCallables = ""
    sJSCallables = ""
    for xFuture in lResult:
        sGraphName, dGraph, dActions, sPy, sJS, dLemmas = xFuture.result()
        dAllGraph[sGraphName] = dGraph
        dAllActions.update(dActions)
        sPyCallables += sPy
        sJSCallables += sJS
        # check lemmas
        for sLemma, iLine in dLemmas.items():
            if sLemma not in oDictionary.getLemma(sLemma):
                print(f"  # Error at line {iLine}: <{sLemma}> is not a known lemma")
    # create a dictionary of URL
    dTempURL = { "": 0 }
    i = 1
    for sKey, lValue in dAllActions.items():
        if lValue[3] == "-":
            if lValue[-1]:
                if lValue[-1] not in dTempURL:
                    dTempURL[lValue[-1]] = i
                    i += 1
                lValue[-1] = dTempURL[lValue[-1]]
            else:
                lValue[-1] = 0
    dURL = { v: k  for k, v in dTempURL.items() } # reversing key and values
    # end
    print("  Total: ", nRule, "rules, ", len(dAllActions), "actions")
    print("  Build time: {:.2f} s".format(time.time() - fStartTimer))

    return {
        # the graphs describe paths of tokens to actions which eventually execute callables
        "rules_graphs": str(dAllGraph), # helpers.convertDictToString(dAllGraph)
        "rules_actions": helpers.convertDictToString(dAllActions), # str(dAllActions)
        "rules_graph_URL": helpers.convertDictToString(dURL), # str(dURL)
        "rules_graphsJS": str(dAllGraph),
        "rules_actionsJS": jsconv.pyActionsToString(dAllActions),
        "rules_graph_URLJS": str(dURL),
        "graph_callables": sPyCallables,
        "graph_callablesJS": sJSCallables
    }
Grammalecte compile_rules_graph.py at [18e5cd0473]

File compile_rules_graph.py artifact 523cbe02bb part of check-in 18e5cd0473