Overview
Comment: | [build][fr] graphcode for action id and use multiprocess features for faster building |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | fr | build |
Files: | files | file ages | folders |
SHA3-256: |
5cdb3649d7376a1acd975c188e69d554 |
User & Date: | olr on 2020-03-30 12:01:47 |
Other Links: | manifest | tags |
Context
2020-03-30
| ||
12:39 | [build] code cleaning check-in: c1ecdfead1 user: olr tags: trunk, build | |
12:01 | [build][fr] graphcode for action id and use multiprocess features for faster building check-in: 5cdb3649d7 user: olr tags: trunk, fr, build | |
01:34 | [build] graph builder: code clarification check-in: 4e32dddcf2 user: olr tags: trunk, build | |
Changes
Modified compile_rules_graph.py from [675bb102d9] to [cd58fd7450].
1 2 3 4 5 6 7 8 9 10 11 12 13 14 | """ Grammalecte: compile rules Create a Direct Acyclic Rule Graphs (DARGs) """ import re import os import time import darg import compile_rules_js_convert as jsconv def rewriteCode (sCode): | > > > > > > > > > > > > > > > > > > | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | """ Grammalecte: compile rules Create a Direct Acyclic Rule Graphs (DARGs) """ import re import os import time import concurrent.futures import darg import compile_rules_js_convert as jsconv #### PROCESS POOL EXECUTOR #### xProcessPoolExecutor = None def initProcessPoolExecutor (nMultiCPU=None): "process pool executor initialisation" global xProcessPoolExecutor if xProcessPoolExecutor: # we shutdown the ProcessPoolExecutor which may have been launched previously print(" ProcessPoolExecutor shutdown.") xProcessPoolExecutor.shutdown(wait=False) nMaxCPU = max(os.cpu_count()-1, 1) if nMultiCPU is None or not (1 <= nMultiCPU <= nMaxCPU): nMultiCPU = nMaxCPU print(" CPU processes used for workers: ", nMultiCPU) xProcessPoolExecutor = concurrent.futures.ProcessPoolExecutor(max_workers=nMultiCPU) def rewriteCode (sCode): "convert simple code syntax to a string of Python code" if sCode[0:1] == "=": sCode = sCode[1:] sCode = sCode.replace("__also__", "bCondMemo") sCode = sCode.replace("__else__", "not bCondMemo") sCode = sCode.replace("sContext", "_sAppContext") sCode = re.sub(r"\b(morph|morphVC|analyse|value|tag|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[nTokenOffset+\\2]', sCode) sCode = re.sub(r"\b(morph|morphVC|analyse|value|tag|displayInfo)[(]\\-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1]', sCode) |
︙ | ︙ | |||
68 69 70 71 72 73 74 | print("# Warning at line " + sActionId + ": This message looks like code. Line should probably begin with =") print(sText) class GraphBuilder: | | > > > | 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | print("# Warning at line " + sActionId + ": This message looks like code. Line should probably begin with =") print(sText) class GraphBuilder: def __init__ (self, sGraphName, sGraphCode, sLang, dDef, dDecl, dOptPriority): self.sGraphName = sGraphName self.sGraphCode = sGraphCode self.sLang = sLang self.dDef = dDef self.dDecl = dDecl self.dOptPriority = dOptPriority self.dAntiPatterns = {} self.dActions = {} self.dFuncName = {} self.dFunctions = {} |
︙ | ︙ | |||
154 155 156 157 158 159 160 | for sSuffix in self.dDecl[sCode]: lToken.append(sToken+sSuffix) break else: lToken.append(sToken) return lToken | | | | | > | | 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 | for sSuffix in self.dDecl[sCode]: lToken.append(sToken+sSuffix) break else: lToken.append(sToken) return lToken def createGraphAndActions (self, lRuleLine): "create a graph as a dictionary with <lRuleLine>" fStartTimer = time.time() print("{:>8,} rules in {:<30} ".format(len(lRuleLine), "<"+self.sGraphName+"|"+self.sGraphCode+">"), end="") lPreparedRule = [] for i, sRuleName, sTokenLine, iActionBlock, lActions, nPriority in lRuleLine: for aRule in self.createRule(i, sRuleName, sTokenLine, iActionBlock, lActions, nPriority): lPreparedRule.append(aRule) # Debugging if False: print("\nRULES:") for e in lPreparedRule: if e[-2] == "##2211": print(e) # Graph creation oDARG = darg.DARG(lPreparedRule, self.sLang) dGraph = oDARG.createGraph() print(oDARG, end="") # debugging if False: print("\nGRAPH:", self.sGraphName) for k, v in dGraph.items(): print(k, "\t", v) print("\tin {:>8.2f} s".format(time.time()-fStartTimer)) sPyCallables, sJSCallables = self.createCallables() return dGraph, self.dActions, sPyCallables, sJSCallables def createRule (self, iLine, sRuleName, sTokenLine, iActionBlock, lActions, nPriority): "generator: create rule as list" # print(iLine, "//", sRuleName, "//", sTokenLine, "//", lActions, "//", nPriority) if sTokenLine.startswith("!!") and sTokenLine.endswith("¡¡"): # antipattern sTokenLine = sTokenLine[2:-2].strip() |
︙ | ︙ | |||
211 212 213 214 215 216 217 | iGroup += 1 dPos[iGroup] = i + 1 # we add 1, for we count tokens from 1 to n (not from 0) # Parse actions for iAction, (iActionLine, sAction) in enumerate(lActions): sAction = sAction.strip() if sAction: | | | 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 | iGroup += 1 dPos[iGroup] = i + 1 # we add 1, for we count tokens from 1 to n (not from 0) # Parse actions for iAction, (iActionLine, sAction) in enumerate(lActions): sAction = sAction.strip() if sAction: sActionId = self.sGraphCode + "__" + sRuleName + "__b" + str(iActionBlock) + "_a" + str(iAction) aAction = self.createAction(sActionId, sAction, nPriority, len(lToken), dPos, iActionLine) if aAction: sActionName = self.storeAction(sActionId, aAction) lResult = list(lToken) lResult.extend(["##"+str(iLine), sActionName]) #if iLine == 13341: # print(" ".join(lToken)) |
︙ | ︙ | |||
389 390 391 392 393 394 395 | def _getNameForCode (self, sType, sCode): "create and get a name for a code" if sType not in self.dFuncName: self.dFuncName[sType] = {} if sCode not in self.dFuncName[sType]: self.dFuncName[sType][sCode] = len(self.dFuncName[sType])+1 | | < | 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 | def _getNameForCode (self, sType, sCode): "create and get a name for a code" if sType not in self.dFuncName: self.dFuncName[sType] = {} if sCode not in self.dFuncName[sType]: self.dFuncName[sType][sCode] = len(self.dFuncName[sType])+1 return "_g_" + sType + "_" + self.sGraphCode + "_" + str(self.dFuncName[sType][sCode]) def createCallables (self): "return callables for Python and JavaScript" sPyCallables = "" sJSCallables = "" for sFuncName, sReturn in self.dFunctions.items(): if sFuncName.startswith("_g_cond_"): # condition sParams = "lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, dTags, sSentence, sSentence0" elif sFuncName.startswith("_g_msg_"): # message sParams = "lToken, nTokenOffset, nLastToken" |
︙ | ︙ | |||
419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 | sPyCallables += " return " + sReturn + "\n" # JavaScript sJSCallables += " {}: function ({})".format(sFuncName, sParams) + " {\n" sJSCallables += " return " + jsconv.py2js(sReturn) + ";\n" sJSCallables += " },\n" return sPyCallables, sJSCallables def make (lRule, sLang, dDef, dDecl, dOptPriority): "compile rules, returns a dictionary of values" # for clarity purpose, don’t create any file here # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines print(" parsing rules...") lTokenLine = [] lActions = [] bActionBlock = False nPriority = -1 dAllGraph = {} sGraphName = "" iActionBlock = 0 aRuleName = set() for iLine, sLine in lRule: sLine = sLine.rstrip() if "\t" in sLine: # tabulation not allowed print("Error. Tabulation at line: ", iLine) exit() elif sLine.startswith("@@@@GRAPH: "): # rules graph call | > > > > > > > > | > | | > | 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 | sPyCallables += " return " + sReturn + "\n" # JavaScript sJSCallables += " {}: function ({})".format(sFuncName, sParams) + " {\n" sJSCallables += " return " + jsconv.py2js(sReturn) + ";\n" sJSCallables += " },\n" return sPyCallables, sJSCallables def processing (sGraphName, sGraphCode, sLang, lRuleLine, dDef, dDecl, dOptPriority): "to be run in a separate process" oGraphBuilder = GraphBuilder(sGraphName, sGraphCode, sLang, dDef, dDecl, dOptPriority) dGraph, dActions, sPy, sJS = oGraphBuilder.createGraphAndActions(lRuleLine) return (sGraphName, dGraph, dActions, sPy, sJS) def make (lRule, sLang, dDef, dDecl, dOptPriority): "compile rules, returns a dictionary of values" # for clarity purpose, don’t create any file here # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines print(" parsing rules...") lTokenLine = [] lActions = [] bActionBlock = False nPriority = -1 dAllGraph = {} dGraphCode = {} sGraphName = "" iActionBlock = 0 aRuleName = set() for iLine, sLine in lRule: sLine = sLine.rstrip() if "\t" in sLine: # tabulation not allowed print("Error. Tabulation at line: ", iLine) exit() elif sLine.startswith("@@@@GRAPH: "): # rules graph call m = re.match(r"@@@@GRAPH: *(\w+) *[|] *(\w+)", sLine.strip()) if m: sGraphName = m.group(1) sGraphCode = m.group(2) if sGraphName in dAllGraph or sGraphCode in dGraphCode: print("Error at line " + iLine + ". Graph name <" + sGraphName + "> or graph code <" + sGraphCode + "> already exists.") exit() dAllGraph[sGraphName] = [] dGraphCode[sGraphName] = sGraphCode else: print("Error. Graph name not found at line", iLine) exit() elif sLine.startswith("__") and sLine.endswith("__"): # new rule group m = re.match("__(\\w+)(!\\d|)__", sLine) if m: |
︙ | ︙ | |||
511 512 513 514 515 516 517 518 519 | iActionBlock += 1 else: print("Unknown line at:", iLine) print(sLine) # processing rules print(" processing rules...") fStartTimer = time.time() nRule = 0 | > > > > > | > > > > > > > > > > > | > > > > > > > | < < < | | | 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 | iActionBlock += 1 else: print("Unknown line at:", iLine) print(sLine) # processing rules print(" processing rules...") initProcessPoolExecutor() fStartTimer = time.time() nRule = 0 dAllActions = {} sPyCallables = "" sJSCallables = "" lResult = [] # buid graph for sGraphName, lRuleLine in dAllGraph.items(): #dGraph, dActions, sPy, sJS = processing(sGraphName, dGraphCode[sGraphName], sLang, lRuleLine, dDef, dDecl, dOptPriority) nRule += len(lRuleLine) try: xFuture = xProcessPoolExecutor.submit(processing, sGraphName, dGraphCode[sGraphName], sLang, lRuleLine, dDef, dDecl, dOptPriority) lResult.append(xFuture) except (concurrent.futures.TimeoutError, concurrent.futures.CancelledError): return "Analysis aborted (time out or cancelled)" except concurrent.futures.BrokenExecutor: return "Executor broken. The server failed." # merging results xProcessPoolExecutor.shutdown(wait=True) for xFuture in lResult: sGraphName, dGraph, dActions, sPy, sJS = xFuture.result() #print(dGraph) #for k, v in dActions.items(): # print(k, ":", v) #input() dAllGraph[sGraphName] = dGraph dAllActions.update(dActions) sPyCallables += sPy sJSCallables += sJS print(" Total: ", nRule, "rules, ", len(dAllActions), "actions") print(" Build time: {:.2f} s".format(time.time() - fStartTimer)) return { # the graphs describe paths of tokens to actions which eventually execute callables "rules_graphs": str(dAllGraph), "rules_graphsJS": str(dAllGraph), "rules_actions": str(dAllActions), "rules_actionsJS": jsconv.pyActionsToString(dAllActions), "graph_callables": sPyCallables, "graph_callablesJS": sJSCallables } |
Modified gc_lang/fr/rules.grx from [34e2885199] to [346c58fef3].
︙ | ︙ | |||
1703 1704 1705 1706 1707 1708 1709 | @@@@ @@@@ @@@@ @@@@ | | | 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 | @@@@ @@@@ @@@@ @@@@ @@@@GRAPH: purge_tag_disambiguate|ptd _ @@@@ @@@@ @@@@ @@@@ !!! !!! |
︙ | ︙ | |||
2458 2459 2460 2461 2462 2463 2464 | @@@@ @@@@ @@@@ @@@@ | | | 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 | @@@@ @@@@ @@@@ @@@@ @@@@GRAPH: ocr|ocr _ @@@@ @@@@ @@@@ @@@@ # This graph is parsed only if option <ocr> is activated. |
︙ | ︙ | |||
3368 3369 3370 3371 3372 3373 3374 | @@@@ @@@@ @@@@ @@@@ | | | 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 | @@@@ @@@@ @@@@ @@@@ @@@@GRAPH: graphe1|g1 _ @@@@ @@@@ @@@@ @@@@ !!!! Doublons (casse identique) !! |
︙ | ︙ | |||
14205 14206 14207 14208 14209 14210 14211 | @@@@ @@@@ @@@@ @@@@ | | | 14205 14206 14207 14208 14209 14210 14211 14212 14213 14214 14215 14216 14217 14218 14219 | @@@@ @@@@ @@@@ @@@@ @@@@GRAPH: purge_passe2|pp2 _ @@@@ @@@@ @@@@ @@@@ ## Seconde passe (il faut réorganiser tout le cycle de simplification) |
︙ | ︙ | |||
14477 14478 14479 14480 14481 14482 14483 | @@@@ @@@@ @@@@ @@@@ | | | 14477 14478 14479 14480 14481 14482 14483 14484 14485 14486 14487 14488 14489 14490 14491 | @@@@ @@@@ @@@@ @@@@ @@@@GRAPH: purge_passe3|pp3 _ @@@@ @@@@ @@@@ @@@@ __da_le_la_les_leur2__ [<start>|,|(] c’ ?[ne|n’]¿ >être [le|la|l’|les] @:[NA]¬:G |
︙ | ︙ | |||
14959 14960 14961 14962 14963 14964 14965 | @@@@ @@@@ @@@@ @@@@ | | | 14959 14960 14961 14962 14963 14964 14965 14966 14967 14968 14969 14970 14971 14972 14973 | @@@@ @@@@ @@@@ @@@@ @@@@GRAPH: groupes_nominaux|gn _ @@@@ @@@@ @@@@ @@@@ !! !! |
︙ | ︙ | |||
20247 20248 20249 20250 20251 20252 20253 | @@@@ @@@@ @@@@ @@@@ | | | 20247 20248 20249 20250 20251 20252 20253 20254 20255 20256 20257 20258 20259 20260 20261 | @@@@ @@@@ @@@@ @@@@ @@@@GRAPH: purge_ponctuations2|ppc2 _ @@@@ @@@@ @@@@ @@@@ __da_été2__ >avoir été |
︙ | ︙ | |||
20302 20303 20304 20305 20306 20307 20308 | TEST: En outre, les grosses institutions comme l’Éducation Nationale ne développent pas forcément en interne @@@@ @@@@ @@@@ @@@@ | | | 20302 20303 20304 20305 20306 20307 20308 20309 20310 20311 20312 20313 20314 20315 20316 | TEST: En outre, les grosses institutions comme l’Éducation Nationale ne développent pas forcément en interne @@@@ @@@@ @@@@ @@@@ @@@@GRAPH: verbes1|gv1 _ @@@@ @@@@ @@@@ @@@@ !!!! OCR !! |
︙ | ︙ | |||
23632 23633 23634 23635 23636 23637 23638 | <<- morph(\1, ":[NAM]") and morph(\2, ":[NAM]") />> enum @@@@ @@@@ @@@@ @@@@ | | | 23632 23633 23634 23635 23636 23637 23638 23639 23640 23641 23642 23643 23644 23645 23646 | <<- morph(\1, ":[NAM]") and morph(\2, ":[NAM]") />> enum @@@@ @@@@ @@@@ @@@@ @@@@GRAPH: verbes2|gv2 _ @@@@ @@@@ @@@@ @@@@ !! !! |
︙ | ︙ |