Index: compile_rules_graph.py ================================================================== --- compile_rules_graph.py +++ compile_rules_graph.py @@ -4,17 +4,35 @@ """ import re import os import time +import concurrent.futures import darg import compile_rules_js_convert as jsconv + +#### PROCESS POOL EXECUTOR #### +xProcessPoolExecutor = None + +def initProcessPoolExecutor (nMultiCPU=None): + "process pool executor initialisation" + global xProcessPoolExecutor + if xProcessPoolExecutor: + # we shutdown the ProcessPoolExecutor which may have been launched previously + print(" ProcessPoolExecutor shutdown.") + xProcessPoolExecutor.shutdown(wait=False) + nMaxCPU = max(os.cpu_count()-1, 1) + if nMultiCPU is None or not (1 <= nMultiCPU <= nMaxCPU): + nMultiCPU = nMaxCPU + print(" CPU processes used for workers: ", nMultiCPU) + xProcessPoolExecutor = concurrent.futures.ProcessPoolExecutor(max_workers=nMultiCPU) + def rewriteCode (sCode): - "convert simple rule syntax to a string of Python code" + "convert simple code syntax to a string of Python code" if sCode[0:1] == "=": sCode = sCode[1:] sCode = sCode.replace("__also__", "bCondMemo") sCode = sCode.replace("__else__", "not bCondMemo") sCode = sCode.replace("sContext", "_sAppContext") @@ -70,11 +88,14 @@ class GraphBuilder: - def __init__ (self, dDef, dDecl, dOptPriority): + def __init__ (self, sGraphName, sGraphCode, sLang, dDef, dDecl, dOptPriority): + self.sGraphName = sGraphName + self.sGraphCode = sGraphCode + self.sLang = sLang self.dDef = dDef self.dDecl = dDecl self.dOptPriority = dOptPriority self.dAntiPatterns = {} self.dActions = {} @@ -156,14 +177,14 @@ break else: lToken.append(sToken) return lToken - def createGraphAndActions (self, sGraphName, lRuleLine, sLang): + def createGraphAndActions (self, lRuleLine): "create a graph as a dictionary with " fStartTimer = time.time() - print("{:>8,} rules in {:<24} ".format(len(lRuleLine), "<"+sGraphName+">"), end="") + print("{:>8,} rules in {:<30} ".format(len(lRuleLine), "<"+self.sGraphName+"|"+self.sGraphCode+">"), end="") lPreparedRule = [] for i, sRuleName, sTokenLine, iActionBlock, lActions, nPriority in lRuleLine: for aRule in self.createRule(i, sRuleName, sTokenLine, iActionBlock, lActions, nPriority): lPreparedRule.append(aRule) # Debugging @@ -171,20 +192,21 @@ print("\nRULES:") for e in lPreparedRule: if e[-2] == "##2211": print(e) # Graph creation - oDARG = darg.DARG(lPreparedRule, sLang) + oDARG = darg.DARG(lPreparedRule, self.sLang) dGraph = oDARG.createGraph() print(oDARG, end="") # debugging if False: - print("\nGRAPH:", sGraphName) + print("\nGRAPH:", self.sGraphName) for k, v in dGraph.items(): print(k, "\t", v) print("\tin {:>8.2f} s".format(time.time()-fStartTimer)) - return dGraph + sPyCallables, sJSCallables = self.createCallables() + return dGraph, self.dActions, sPyCallables, sJSCallables def createRule (self, iLine, sRuleName, sTokenLine, iActionBlock, lActions, nPriority): "generator: create rule as list" # print(iLine, "//", sRuleName, "//", sTokenLine, "//", lActions, "//", nPriority) if sTokenLine.startswith("!!") and sTokenLine.endswith("¡¡"): @@ -213,11 +235,11 @@ # Parse actions for iAction, (iActionLine, sAction) in enumerate(lActions): sAction = sAction.strip() if sAction: - sActionId = sRuleName + "__b" + str(iActionBlock) + "_a" + str(iAction) + sActionId = self.sGraphCode + "__" + sRuleName + "__b" + str(iActionBlock) + "_a" + str(iAction) aAction = self.createAction(sActionId, sAction, nPriority, len(lToken), dPos, iActionLine) if aAction: sActionName = self.storeAction(sActionId, aAction) lResult = list(lToken) lResult.extend(["##"+str(iLine), sActionName]) @@ -391,15 +413,14 @@ "create and get a name for a code" if sType not in self.dFuncName: self.dFuncName[sType] = {} if sCode not in self.dFuncName[sType]: self.dFuncName[sType][sCode] = len(self.dFuncName[sType])+1 - return "_g_" + sType + "_" + str(self.dFuncName[sType][sCode]) + return "_g_" + sType + "_" + self.sGraphCode + "_" + str(self.dFuncName[sType][sCode]) def createCallables (self): "return callables for Python and JavaScript" - print(" creating callables for graph rules...") sPyCallables = "" sJSCallables = "" for sFuncName, sReturn in self.dFunctions.items(): if sFuncName.startswith("_g_cond_"): # condition sParams = "lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, dTags, sSentence, sSentence0" @@ -421,10 +442,17 @@ sJSCallables += " {}: function ({})".format(sFuncName, sParams) + " {\n" sJSCallables += " return " + jsconv.py2js(sReturn) + ";\n" sJSCallables += " },\n" return sPyCallables, sJSCallables + +def processing (sGraphName, sGraphCode, sLang, lRuleLine, dDef, dDecl, dOptPriority): + "to be run in a separate process" + oGraphBuilder = GraphBuilder(sGraphName, sGraphCode, sLang, dDef, dDecl, dOptPriority) + dGraph, dActions, sPy, sJS = oGraphBuilder.createGraphAndActions(lRuleLine) + return (sGraphName, dGraph, dActions, sPy, sJS) + def make (lRule, sLang, dDef, dDecl, dOptPriority): "compile rules, returns a dictionary of values" # for clarity purpose, don’t create any file here @@ -433,10 +461,11 @@ lTokenLine = [] lActions = [] bActionBlock = False nPriority = -1 dAllGraph = {} + dGraphCode = {} sGraphName = "" iActionBlock = 0 aRuleName = set() for iLine, sLine in lRule: @@ -445,17 +474,19 @@ # tabulation not allowed print("Error. Tabulation at line: ", iLine) exit() elif sLine.startswith("@@@@GRAPH: "): # rules graph call - m = re.match(r"@@@@GRAPH: *(\w+)", sLine.strip()) + m = re.match(r"@@@@GRAPH: *(\w+) *[|] *(\w+)", sLine.strip()) if m: sGraphName = m.group(1) - if sGraphName in dAllGraph: - print("Error at line " + iLine + ". Graph name <" + sGraphName + "> already exists.") + sGraphCode = m.group(2) + if sGraphName in dAllGraph or sGraphCode in dGraphCode: + print("Error at line " + iLine + ". Graph name <" + sGraphName + "> or graph code <" + sGraphCode + "> already exists.") exit() dAllGraph[sGraphName] = [] + dGraphCode[sGraphName] = sGraphCode else: print("Error. Graph name not found at line", iLine) exit() elif sLine.startswith("__") and sLine.endswith("__"): # new rule group @@ -513,27 +544,47 @@ print("Unknown line at:", iLine) print(sLine) # processing rules print(" processing rules...") + initProcessPoolExecutor() fStartTimer = time.time() nRule = 0 - oGraphBuilder = GraphBuilder(dDef, dDecl, dOptPriority) + dAllActions = {} + sPyCallables = "" + sJSCallables = "" + lResult = [] + # buid graph for sGraphName, lRuleLine in dAllGraph.items(): + #dGraph, dActions, sPy, sJS = processing(sGraphName, dGraphCode[sGraphName], sLang, lRuleLine, dDef, dDecl, dOptPriority) nRule += len(lRuleLine) - dGraph = oGraphBuilder.createGraphAndActions(sGraphName, lRuleLine, sLang) + try: + xFuture = xProcessPoolExecutor.submit(processing, sGraphName, dGraphCode[sGraphName], sLang, lRuleLine, dDef, dDecl, dOptPriority) + lResult.append(xFuture) + except (concurrent.futures.TimeoutError, concurrent.futures.CancelledError): + return "Analysis aborted (time out or cancelled)" + except concurrent.futures.BrokenExecutor: + return "Executor broken. The server failed." + # merging results + xProcessPoolExecutor.shutdown(wait=True) + for xFuture in lResult: + sGraphName, dGraph, dActions, sPy, sJS = xFuture.result() + #print(dGraph) + #for k, v in dActions.items(): + # print(k, ":", v) + #input() dAllGraph[sGraphName] = dGraph - print(" Total: ", nRule, "rules, ", len(oGraphBuilder.dActions), "actions") + dAllActions.update(dActions) + sPyCallables += sPy + sJSCallables += sJS + print(" Total: ", nRule, "rules, ", len(dAllActions), "actions") print(" Build time: {:.2f} s".format(time.time() - fStartTimer)) - sPyCallables, sJSCallables = oGraphBuilder.createCallables() - #print(sPyCallables) - return { # the graphs describe paths of tokens to actions which eventually execute callables "rules_graphs": str(dAllGraph), "rules_graphsJS": str(dAllGraph), - "rules_actions": str(oGraphBuilder.dActions), - "rules_actionsJS": jsconv.pyActionsToString(oGraphBuilder.dActions), + "rules_actions": str(dAllActions), + "rules_actionsJS": jsconv.pyActionsToString(dAllActions), "graph_callables": sPyCallables, "graph_callablesJS": sJSCallables } Index: gc_lang/fr/rules.grx ================================================================== --- gc_lang/fr/rules.grx +++ gc_lang/fr/rules.grx @@ -1705,11 +1705,11 @@ @@@@ @@@@ @@@@ @@@@ -@@@@GRAPH: purge_tag_disambiguate _ +@@@@GRAPH: purge_tag_disambiguate|ptd _ @@@@ @@@@ @@@@ @@@@ @@ -2460,11 +2460,11 @@ @@@@ @@@@ @@@@ @@@@ -@@@@GRAPH: ocr _ +@@@@GRAPH: ocr|ocr _ @@@@ @@@@ @@@@ @@@@ @@ -3370,11 +3370,11 @@ @@@@ @@@@ @@@@ @@@@ -@@@@GRAPH: graphe1 _ +@@@@GRAPH: graphe1|g1 _ @@@@ @@@@ @@@@ @@@@ @@ -14207,11 +14207,11 @@ @@@@ @@@@ @@@@ @@@@ -@@@@GRAPH: purge_passe2 _ +@@@@GRAPH: purge_passe2|pp2 _ @@@@ @@@@ @@@@ @@@@ @@ -14479,11 +14479,11 @@ @@@@ @@@@ @@@@ @@@@ -@@@@GRAPH: purge_passe3 _ +@@@@GRAPH: purge_passe3|pp3 _ @@@@ @@@@ @@@@ @@@@ @@ -14961,11 +14961,11 @@ @@@@ @@@@ @@@@ @@@@ -@@@@GRAPH: groupes_nominaux _ +@@@@GRAPH: groupes_nominaux|gn _ @@@@ @@@@ @@@@ @@@@ @@ -20249,11 +20249,11 @@ @@@@ @@@@ @@@@ @@@@ -@@@@GRAPH: purge_ponctuations2 _ +@@@@GRAPH: purge_ponctuations2|ppc2 _ @@@@ @@@@ @@@@ @@@@ @@ -20304,11 +20304,11 @@ @@@@ @@@@ @@@@ @@@@ -@@@@GRAPH: verbes1 _ +@@@@GRAPH: verbes1|gv1 _ @@@@ @@@@ @@@@ @@@@ @@ -23634,11 +23634,11 @@ @@@@ @@@@ @@@@ @@@@ -@@@@GRAPH: verbes2 _ +@@@@GRAPH: verbes2|gv2 _ @@@@ @@@@ @@@@ @@@@