Overview
Comment: | [build] graph builder: code clarification |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | build |
Files: | files | file ages | folders |
SHA3-256: |
4e32dddcf2cc261aa2778cdba05b3aa0 |
User & Date: | olr on 2020-03-30 01:34:52 |
Other Links: | manifest | tags |
Context
2020-03-30
| ||
12:01 | [build][fr] graphcode for action id and use multiprocess features for faster building check-in: 5cdb3649d7 user: olr tags: trunk, fr, build | |
01:34 | [build] graph builder: code clarification check-in: 4e32dddcf2 user: olr tags: trunk, build | |
2020-03-29
| ||
23:13 | [fr] ajustements, nr: majuscules pour ministères (de dominiko) check-in: 7aee30d740 user: olr tags: trunk, fr | |
Changes
Modified compile_rules_graph.py from [163c3038b0] to [675bb102d9].
1 2 3 4 5 6 7 8 9 10 11 | """ Grammalecte: compile rules Create a Direct Acyclic Rule Graphs (DARGs) """ import re import darg import compile_rules_js_convert as jsconv | > > < < < < < | < < < < < < < < < < < < < < < < < < < < < < < < < < < | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | """ Grammalecte: compile rules Create a Direct Acyclic Rule Graphs (DARGs) """ import re import os import time import darg import compile_rules_js_convert as jsconv def rewriteCode (sCode): "convert simple rule syntax to a string of Python code" if sCode[0:1] == "=": sCode = sCode[1:] sCode = sCode.replace("__also__", "bCondMemo") sCode = sCode.replace("__else__", "not bCondMemo") sCode = sCode.replace("sContext", "_sAppContext") sCode = re.sub(r"\b(morph|morphVC|analyse|value|tag|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[nTokenOffset+\\2]', sCode) |
︙ | ︙ | |||
70 71 72 73 74 75 76 | sCode = re.sub(r"\banalyseWord[(]", 'analyse(', sCode) sCode = re.sub(r"[\\](\d+)", 'lToken[nTokenOffset+\\1]["sValue"]', sCode) sCode = re.sub(r"[\\]-(\d+)", 'lToken[nLastToken-\\1+1]["sValue"]', sCode) sCode = re.sub(r">1", 'lToken[nLastToken+1]["sValue"]', sCode) sCode = re.sub(r"<1", 'lToken[nTokenOffset]["sValue"]', sCode) return sCode | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | sCode = re.sub(r"\banalyseWord[(]", 'analyse(', sCode) sCode = re.sub(r"[\\](\d+)", 'lToken[nTokenOffset+\\1]["sValue"]', sCode) sCode = re.sub(r"[\\]-(\d+)", 'lToken[nLastToken-\\1+1]["sValue"]', sCode) sCode = re.sub(r">1", 'lToken[nLastToken+1]["sValue"]', sCode) sCode = re.sub(r"<1", 'lToken[nTokenOffset]["sValue"]', sCode) return sCode def changeReferenceToken (sText, dPos): "change group reference in <sText> with values in <dPos>" if "\\" not in sText: return sText for i in range(len(dPos), 0, -1): sText = re.sub("\\\\"+str(i)+"(?![0-9])", "\\\\"+str(dPos[i]), sText) |
︙ | ︙ | |||
222 223 224 225 226 227 228 | def checkIfThereIsCode (sText, sActionId): "check if there is code in <sText> (debugging)" if re.search(r"[.]\w+[(]|sugg\w+[(]|\(\\[0-9]|\[[0-9]", sText): print("# Warning at line " + sActionId + ": This message looks like code. Line should probably begin with =") print(sText) | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 | def checkIfThereIsCode (sText, sActionId): "check if there is code in <sText> (debugging)" if re.search(r"[.]\w+[(]|sugg\w+[(]|\(\\[0-9]|\[[0-9]", sText): print("# Warning at line " + sActionId + ": This message looks like code. Line should probably begin with =") print(sText) class GraphBuilder: def __init__ (self, dDef, dDecl, dOptPriority): self.dDef = dDef self.dDecl = dDecl self.dOptPriority = dOptPriority self.dAntiPatterns = {} self.dActions = {} self.dFuncName = {} self.dFunctions = {} def _genTokenLines (self, sTokenLine): "tokenize a string and return a list of lines of tokens" lTokenLines = [] for sTokBlock in sTokenLine.split(): # replace merger characters by spaces if "␣" in sTokBlock: sTokBlock = sTokBlock.replace("␣", " ") # optional token? bNullPossible = sTokBlock.startswith("?") and sTokBlock.endswith("¿") if bNullPossible: sTokBlock = sTokBlock[1:-1] # token with definition? if sTokBlock.startswith("({") and sTokBlock.endswith("})") and sTokBlock[1:-1] in self.dDef: sTokBlock = "(" + self.dDef[sTokBlock[1:-1]] + ")" elif sTokBlock.startswith("{") and sTokBlock.endswith("}") and sTokBlock in self.dDef: sTokBlock = self.dDef[sTokBlock] if ( (sTokBlock.startswith("[") and sTokBlock.endswith("]")) or (sTokBlock.startswith("([") and sTokBlock.endswith("])")) ): # multiple token bSelectedGroup = sTokBlock.startswith("(") and sTokBlock.endswith(")") if bSelectedGroup: sTokBlock = sTokBlock[1:-1] lToken = self._createTokenList(sTokBlock) if not lTokenLines: lTokenLines = [ ["("+s+")"] for s in lToken ] if bSelectedGroup else [ [s] for s in lToken ] if bNullPossible: lTokenLines.extend([ [] for i in range(len(lToken)+1) ]) else: lNewTemp = [] if bNullPossible: for aRule in lTokenLines: for sElem in lToken: aNewRule = list(aRule) aNewRule.append(sElem) lNewTemp.append(aNewRule) else: sElem1 = lToken.pop(0) for aRule in lTokenLines: for sElem in lToken: aNewRule = list(aRule) aNewRule.append("(" + sElem + ")" if bSelectedGroup else sElem) lNewTemp.append(aNewRule) aRule.append("(" + sElem1 + ")" if bSelectedGroup else sElem1) lTokenLines.extend(lNewTemp) else: # simple token if not lTokenLines: lTokenLines = [[sTokBlock], []] if bNullPossible else [[sTokBlock]] else: if bNullPossible: lNewTemp = [] for aRule in lTokenLines: lNew = list(aRule) lNew.append(sTokBlock) lNewTemp.append(lNew) lTokenLines.extend(lNewTemp) else: for aRule in lTokenLines: aRule.append(sTokBlock) for aRule in lTokenLines: yield aRule def _createTokenList (self, sTokBlock): "return a list of tokens from a block of tokens" lToken = [] for sToken in sTokBlock[1:-1].split("|"): if "+" in sToken and not sToken.startswith("+"): for sCode in self.dDecl: if sToken.endswith(sCode): sToken = sToken[:-len(sCode)] lToken.append(sToken) for sSuffix in self.dDecl[sCode]: lToken.append(sToken+sSuffix) break else: lToken.append(sToken) return lToken def createGraphAndActions (self, sGraphName, lRuleLine, sLang): "create a graph as a dictionary with <lRuleLine>" fStartTimer = time.time() print("{:>8,} rules in {:<24} ".format(len(lRuleLine), "<"+sGraphName+">"), end="") lPreparedRule = [] for i, sRuleName, sTokenLine, iActionBlock, lActions, nPriority in lRuleLine: for aRule in self.createRule(i, sRuleName, sTokenLine, iActionBlock, lActions, nPriority): lPreparedRule.append(aRule) # Debugging if False: print("\nRULES:") for e in lPreparedRule: if e[-2] == "##2211": print(e) # Graph creation oDARG = darg.DARG(lPreparedRule, sLang) dGraph = oDARG.createGraph() print(oDARG, end="") # debugging if False: print("\nGRAPH:", sGraphName) for k, v in dGraph.items(): print(k, "\t", v) print("\tin {:>8.2f} s".format(time.time()-fStartTimer)) return dGraph def createRule (self, iLine, sRuleName, sTokenLine, iActionBlock, lActions, nPriority): "generator: create rule as list" # print(iLine, "//", sRuleName, "//", sTokenLine, "//", lActions, "//", nPriority) if sTokenLine.startswith("!!") and sTokenLine.endswith("¡¡"): # antipattern sTokenLine = sTokenLine[2:-2].strip() if sRuleName not in self.dAntiPatterns: self.dAntiPatterns[sRuleName]= [] for lToken in self._genTokenLines(sTokenLine): self.dAntiPatterns[sRuleName].append(lToken) else: # pattern for lToken in self._genTokenLines(sTokenLine): if sRuleName in self.dAntiPatterns and lToken in self.dAntiPatterns[sRuleName]: # <lToken> matches an antipattern -> discard continue # Calculate positions dPos = {} # key: iGroup, value: iToken iGroup = 0 #if iLine == 15818: # debug # print(" ".join(lToken)) for i, sToken in enumerate(lToken): if sToken.startswith("(") and sToken.endswith(")"): lToken[i] = sToken[1:-1] iGroup += 1 dPos[iGroup] = i + 1 # we add 1, for we count tokens from 1 to n (not from 0) # Parse actions for iAction, (iActionLine, sAction) in enumerate(lActions): sAction = sAction.strip() if sAction: sActionId = sRuleName + "__b" + str(iActionBlock) + "_a" + str(iAction) aAction = self.createAction(sActionId, sAction, nPriority, len(lToken), dPos, iActionLine) if aAction: sActionName = self.storeAction(sActionId, aAction) lResult = list(lToken) lResult.extend(["##"+str(iLine), sActionName]) #if iLine == 13341: # print(" ".join(lToken)) # print(sActionId, aAction) yield lResult else: print(" # Error on action at line:", iLine) print(sTokenLine, "\n", lActions) else: print("No action found for ", iActionLine) def createAction (self, sActionId, sAction, nPriority, nToken, dPos, iActionLine): "create action rule as a list" # Option sOption = False m = re.match("/(\\w+)/", sAction) if m: sOption = m.group(1) sAction = sAction[m.end():].strip() if nPriority == -1: nPriority = self.dOptPriority.get(sOption, 4) # valid action? m = re.search(r"(?P<action>[-=~/!>])(?P<start>-?\d+\.?|)(?P<end>:\.?-?\d+|)(?P<casing>:|)>>", sAction) if not m: print("\n# Error. No action found at: ", sActionId) return None # Condition sCondition = sAction[:m.start()].strip() if sCondition: sCondition = changeReferenceToken(sCondition, dPos) sCondition = self.createFunction("cond", sCondition) else: sCondition = "" # Case sensitivity bCaseSensitivity = not bool(m.group("casing")) # Action cAction = m.group("action") sAction = sAction[m.end():].strip() sAction = changeReferenceToken(sAction, dPos) # target cStartLimit = "<" cEndLimit = ">" if not m.group("start"): iStartAction = 1 iEndAction = 0 else: if cAction != "-" and (m.group("start").endswith(".") or m.group("end").startswith(":.")): print("\n# Error. Wrong selection on tokens.", sActionId) return None if m.group("start").endswith("."): cStartLimit = ">" iStartAction = int(m.group("start").rstrip(".")) if not m.group("end"): iEndAction = iStartAction else: if m.group("end").startswith(":."): cEndLimit = "<" iEndAction = int(m.group("end").lstrip(":.")) if dPos and m.group("start"): iStartAction = dPos.get(iStartAction, iStartAction) if iEndAction: iEndAction = dPos.get(iEndAction, iEndAction) if iStartAction < 0: iStartAction += 1 if iEndAction < 0: iEndAction += 1 if cAction == "-": ## error iMsg = sAction.find(" # ") if iMsg == -1: sMsg = "# Error. Error message not found." sURL = "" print("\n" + sMsg + " Action id: " + sActionId) else: sMsg = sAction[iMsg+3:].strip() sAction = sAction[:iMsg].strip() sURL = "" mURL = re.search("[|] *(https?://.*)", sMsg) if mURL: sURL = mURL.group(1).strip() sMsg = sMsg[:mURL.start(0)].strip() checkTokenNumbers(sMsg, sActionId, nToken) if sMsg[0:1] == "=": sMsg = self.createFunction("msg", sMsg, True) else: checkIfThereIsCode(sMsg, sActionId) # checking consistancy checkTokenNumbers(sAction, sActionId, nToken) sLineId = "#" + str(iActionLine) if cAction == ">": ## no action, break loop if condition is False return [sLineId, sOption, sCondition, cAction, ""] if not sAction and cAction != "!": print("\n# Error in action at line <" + sActionId + ">: This action is empty.") if sAction[0:1] != "=" and cAction != "=": checkIfThereIsCode(sAction, sActionId) if cAction == "-": ## error detected --> suggestion if sAction[0:1] == "=": sAction = self.createFunction("sugg", sAction, True) elif sAction.startswith('"') and sAction.endswith('"'): sAction = sAction[1:-1] if not sMsg: print("\n# Error in action at line <" + sActionId + ">: The message is empty.") return [sLineId, sOption, sCondition, cAction, sAction, iStartAction, iEndAction, cStartLimit, cEndLimit, bCaseSensitivity, nPriority, sMsg, sURL] if cAction == "~": ## text processor if sAction[0:1] == "=": sAction = self.createFunction("tp", sAction, True) elif sAction.startswith('"') and sAction.endswith('"'): sAction = sAction[1:-1] elif sAction not in "␣*_": nToken = sAction.count("|") + 1 if iStartAction > 0 and iEndAction > 0: if (iEndAction - iStartAction + 1) != nToken: print("\n# Error in action at line <" + sActionId + ">: numbers of modified tokens modified.") elif iStartAction < 0 or iEndAction < 0 and iStartAction != iEndAction: print("\n# Warning in action at line <" + sActionName + ">: rewriting with possible token position modified.") return [sLineId, sOption, sCondition, cAction, sAction, iStartAction, iEndAction, bCaseSensitivity] if cAction in "!/": ## tags return [sLineId, sOption, sCondition, cAction, sAction, iStartAction, iEndAction] if cAction == "=": ## disambiguator if "define(" in sAction and not re.search(r"define\(\\-?\d+ *, *\[.*\] *\)", sAction): print("\n# Error in action at line <" + sActionId + ">: second argument for <define> must be a list of strings") sAction = self.createFunction("da", sAction) return [sLineId, sOption, sCondition, cAction, sAction] print("\n# Unknown action.", sActionId) return None def storeAction (self, sActionId, aAction): "store <aAction> in <self.dActions> avoiding duplicates and return action name" nVar = 0 while True: sActionName = sActionId + "_" + str(nVar) if sActionName not in self.dActions: self.dActions[sActionName] = aAction return sActionName if aAction == self.dActions[sActionName]: return sActionName nVar += 1 def showActions (self): "debugging function" print("\nActions:") for sActionName, aAction in oFunctionManager.dActions.items(): print(sActionName, aAction) def createFunction (self, sType, sCode, bStartWithEqual=False): "create a function (stored in <self.dFunctions>) and return function name" sCode = rewriteCode(sCode) sFuncName = self._getNameForCode(sType, sCode) self.dFunctions[sFuncName] = sCode return sFuncName if not bStartWithEqual else "="+sFuncName def _getNameForCode (self, sType, sCode): "create and get a name for a code" if sType not in self.dFuncName: self.dFuncName[sType] = {} if sCode not in self.dFuncName[sType]: self.dFuncName[sType][sCode] = len(self.dFuncName[sType])+1 return "_g_" + sType + "_" + str(self.dFuncName[sType][sCode]) def createCallables (self): "return callables for Python and JavaScript" print(" creating callables for graph rules...") sPyCallables = "" sJSCallables = "" for sFuncName, sReturn in self.dFunctions.items(): if sFuncName.startswith("_g_cond_"): # condition sParams = "lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, dTags, sSentence, sSentence0" elif sFuncName.startswith("_g_msg_"): # message sParams = "lToken, nTokenOffset, nLastToken" elif sFuncName.startswith("_g_sugg_"): # suggestion sParams = "lToken, nTokenOffset, nLastToken" elif sFuncName.startswith("_g_tp_"): # text preprocessor sParams = "lToken, nTokenOffset, nLastToken" elif sFuncName.startswith("_g_da_"): # disambiguator sParams = "lToken, nTokenOffset, nLastToken" else: print("# Unknown function type in [" + sFuncName + "]") continue # Python sPyCallables += "def {} ({}):\n".format(sFuncName, sParams) sPyCallables += " return " + sReturn + "\n" # JavaScript sJSCallables += " {}: function ({})".format(sFuncName, sParams) + " {\n" sJSCallables += " return " + jsconv.py2js(sReturn) + ";\n" sJSCallables += " },\n" return sPyCallables, sJSCallables def make (lRule, sLang, dDef, dDecl, dOptPriority): "compile rules, returns a dictionary of values" # for clarity purpose, don’t create any file here # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines |
︙ | ︙ | |||
444 445 446 447 448 449 450 | lActions.clear() iActionBlock += 1 else: print("Unknown line at:", iLine) print(sLine) # processing rules | | > > < < < < < | < | < < < < < < < < < < | > < < | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | < < < < < > | | > > | 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 | lActions.clear() iActionBlock += 1 else: print("Unknown line at:", iLine) print(sLine) # processing rules print(" processing rules...") fStartTimer = time.time() nRule = 0 oGraphBuilder = GraphBuilder(dDef, dDecl, dOptPriority) for sGraphName, lRuleLine in dAllGraph.items(): nRule += len(lRuleLine) dGraph = oGraphBuilder.createGraphAndActions(sGraphName, lRuleLine, sLang) dAllGraph[sGraphName] = dGraph print(" Total: ", nRule, "rules, ", len(oGraphBuilder.dActions), "actions") print(" Build time: {:.2f} s".format(time.time() - fStartTimer)) sPyCallables, sJSCallables = oGraphBuilder.createCallables() #print(sPyCallables) return { # the graphs describe paths of tokens to actions which eventually execute callables "rules_graphs": str(dAllGraph), "rules_graphsJS": str(dAllGraph), "rules_actions": str(oGraphBuilder.dActions), "rules_actionsJS": jsconv.pyActionsToString(oGraphBuilder.dActions), "graph_callables": sPyCallables, "graph_callablesJS": sJSCallables } |
Modified darg.py from [49bc8d2039] to [9b17f8d6af].
|
| < < < < < | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | """ RULE GRAPH BUILDER """ # by Olivier R. # License: MPL 2 import re class DARG: """DIRECT ACYCLIC RULE GRAPH""" # This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115) def __init__ (self, lRule, sLangCode): # Preparing DARG self.sLangCode = sLangCode self.nRule = len(lRule) self.aPreviousRule = [] Node.resetNextId() self.oRoot = Node() self.lUncheckedNodes = [] # list of nodes that have not been checked for duplication. self.lMinimizedNodes = {} # list of unique nodes that have been checked for duplication. self.nNode = 0 self.nArc = 0 # build lRule.sort() for aRule in lRule: self.insert(aRule) self.finish() self.countNodes() self.countArcs() # BUILD DARG def insert (self, aRule): "insert a new rule (tokens must be inserted in order)" if aRule < self.aPreviousRule: exit("# Error: tokens must be inserted in order.") |
︙ | ︙ | |||
94 95 96 97 98 99 100 | def countArcs (self): "count arcs within the whole graph" self.nArc = len(self.oRoot.dArcs) for oNode in self.lMinimizedNodes: self.nArc += len(oNode.dArcs) | | | | 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | def countArcs (self): "count arcs within the whole graph" self.nArc = len(self.oRoot.dArcs) for oNode in self.lMinimizedNodes: self.nArc += len(oNode.dArcs) def __str__ (self): "display informations about the rule graph" return " > DARG: {:>10,} rules, {:>10,} nodes, {:>10,} arcs".format(self.nRule, self.nNode, self.nArc) def createGraph (self): "create the graph as a dictionary" dGraph = { 0: self.oRoot.getNodeAsDict() } for oNode in self.lMinimizedNodes: sHashId = oNode.__hash__() if sHashId not in dGraph: |
︙ | ︙ |