Overview
Comment: | [build] check lemmas |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | build |
Files: | files | file ages | folders |
SHA3-256: |
b6e8d1bea57ddafc44ed66af87618ee0 |
User & Date: | olr on 2020-11-16 21:57:29 |
Other Links: | manifest | tags |
Context
2020-11-16
| ||
22:51 | [fr] mise à jour du dictionnaire check-in: c4896b5dd8 user: olr tags: fr, trunk | |
21:57 | [build] check lemmas check-in: b6e8d1bea5 user: olr tags: build, trunk | |
21:55 | [fr] ajustements check-in: 38a9f843dd user: olr tags: fr, trunk | |
Changes
Modified compile_rules_graph.py from [aa84907702] to [38bc0eb9da].
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | """ Grammalecte: compile rules Create a Direct Acyclic Rule Graphs (DARGs) """ import re import os import time import concurrent.futures import darg import compile_rules_js_convert as jsconv import helpers #### PROCESS POOL EXECUTOR #### xProcessPoolExecutor = None def initProcessPoolExecutor (nMultiCPU=None): "process pool executor initialisation" | > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | """ Grammalecte: compile rules Create a Direct Acyclic Rule Graphs (DARGs) """ import re import os import time import concurrent.futures import darg import compile_rules_js_convert as jsconv import helpers import graphspell #### PROCESS POOL EXECUTOR #### xProcessPoolExecutor = None def initProcessPoolExecutor (nMultiCPU=None): "process pool executor initialisation" |
︙ | ︙ | |||
100 101 102 103 104 105 106 107 108 109 110 111 112 113 | self.dDef = dDef self.dDecl = dDecl self.dOptPriority = dOptPriority self.dAntiPatterns = {} self.dActions = {} self.dFuncName = {} self.dFunctions = {} def _genTokenLines (self, sTokenLine): "tokenize a string and return a list of lines of tokens" lTokenLines = [] for sTokBlock in sTokenLine.split(): # replace merger characters by spaces if "␣" in sTokBlock: | > | 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | self.dDef = dDef self.dDecl = dDecl self.dOptPriority = dOptPriority self.dAntiPatterns = {} self.dActions = {} self.dFuncName = {} self.dFunctions = {} self.dLemmas = {} def _genTokenLines (self, sTokenLine): "tokenize a string and return a list of lines of tokens" lTokenLines = [] for sTokBlock in sTokenLine.split(): # replace merger characters by spaces if "␣" in sTokBlock: |
︙ | ︙ | |||
203 204 205 206 207 208 209 | # debugging if False: print("\nGRAPH:", self.sGraphName) for k, v in dGraph.items(): print(k, "\t", v) print("\tin {:>8.2f} s".format(time.time()-fStartTimer)) sPyCallables, sJSCallables = self.createCallables() | | | 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 | # debugging if False: print("\nGRAPH:", self.sGraphName) for k, v in dGraph.items(): print(k, "\t", v) print("\tin {:>8.2f} s".format(time.time()-fStartTimer)) sPyCallables, sJSCallables = self.createCallables() return dGraph, self.dActions, sPyCallables, sJSCallables, self.dLemmas def createRule (self, iLine, sRuleName, sTokenLine, iActionBlock, lActions, nPriority): "generator: create rule as list" # print(iLine, "//", sRuleName, "//", sTokenLine, "//", lActions, "//", nPriority) if sTokenLine.startswith("!!") and sTokenLine.endswith("¡¡"): # antipattern sTokenLine = sTokenLine[2:-2].strip() |
︙ | ︙ | |||
231 232 233 234 235 236 237 | #if iLine == 15818: # debug # print(" ".join(lToken)) for i, sToken in enumerate(lToken): if sToken.startswith("(") and sToken.endswith(")"): lToken[i] = sToken[1:-1] iGroup += 1 dPos[iGroup] = i + 1 # we add 1, for we count tokens from 1 to n (not from 0) | | > > | 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 | #if iLine == 15818: # debug # print(" ".join(lToken)) for i, sToken in enumerate(lToken): if sToken.startswith("(") and sToken.endswith(")"): lToken[i] = sToken[1:-1] iGroup += 1 dPos[iGroup] = i + 1 # we add 1, for we count tokens from 1 to n (not from 0) # check lemmas if sToken.startswith(">") and sToken != ">" and sToken[1:] not in self.dLemmas: self.dLemmas[sToken[1:]] = iLine # Parse actions for iAction, (iActionLine, sAction) in enumerate(lActions, 1): sAction = sAction.strip() if sAction: sActionId = f"{self.sGraphCode}__{sRuleName}__b{iActionBlock}_a{iAction}" aAction = self.createAction(sActionId, sAction, nPriority, len(lToken), dPos, iActionLine) if aAction: |
︙ | ︙ | |||
450 451 452 453 454 455 456 | sJSCallables += " },\n" return sPyCallables, sJSCallables def processing (sGraphName, sGraphCode, sLang, lRuleLine, dDef, dDecl, dOptPriority): "to be run in a separate process" oGraphBuilder = GraphBuilder(sGraphName, sGraphCode, sLang, dDef, dDecl, dOptPriority) | | | > | 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 | sJSCallables += " },\n" return sPyCallables, sJSCallables def processing (sGraphName, sGraphCode, sLang, lRuleLine, dDef, dDecl, dOptPriority): "to be run in a separate process" oGraphBuilder = GraphBuilder(sGraphName, sGraphCode, sLang, dDef, dDecl, dOptPriority) dGraph, dActions, sPy, sJS, dLemmas = oGraphBuilder.createGraphAndActions(lRuleLine) return (sGraphName, dGraph, dActions, sPy, sJS, dLemmas) def make (lRule, sLang, dDef, dDecl, dOptPriority): "compile rules, returns a dictionary of values" # for clarity purpose, don’t create any file here # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines print(" parsing graph rules...") lTokenLine = [] lActions = [] bActionBlock = False nPriority = -1 dAllGraph = {} dGraphCode = {} sGraphName = "" iActionBlock = 0 aRuleName = set() oDictionary = graphspell.SpellChecker("fr") for iLine, sLine in lRule: sLine = sLine.rstrip() if "\t" in sLine: # tabulation not allowed print("# Error. Tabulation at line: ", iLine) exit() |
︙ | ︙ | |||
569 570 571 572 573 574 575 | return "Executor broken. The server failed." # merging results xProcessPoolExecutor.shutdown(wait=True) # waiting that everything is finished dAllActions = {} sPyCallables = "" sJSCallables = "" for xFuture in lResult: | | > > > > | 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 | return "Executor broken. The server failed." # merging results xProcessPoolExecutor.shutdown(wait=True) # waiting that everything is finished dAllActions = {} sPyCallables = "" sJSCallables = "" for xFuture in lResult: sGraphName, dGraph, dActions, sPy, sJS, dLemmas = xFuture.result() dAllGraph[sGraphName] = dGraph dAllActions.update(dActions) sPyCallables += sPy sJSCallables += sJS # check lemmas for sLemma, iLine in dLemmas.items(): if sLemma not in oDictionary.getLemma(sLemma): print(f" # Error at line {iLine}: <{sLemma}> is not a known lemma") # create a dictionary of URL dTempURL = { "": 0 } i = 1 for sKey, lValue in dAllActions.items(): if lValue[3] == "-": if lValue[-1]: if lValue[-1] not in dTempURL: |
︙ | ︙ |
Modified lex_build.py from [5bdf726eee] to [d4a3c39602].
1 2 3 4 5 6 7 8 9 10 | #!python3 """ Lexicon builder """ import argparse from distutils import dir_util import graphspell.dawg as fsa | < | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | #!python3 """ Lexicon builder """ import argparse from distutils import dir_util import graphspell.dawg as fsa def build (spfSrc, sLangCode, sLangName, sfDict, bJavaScript=False, sDicName="", sDescription="", sFilter="", cStemmingMethod="S", nCompressMethod=1): "transform a text lexicon as a binary indexable dictionary" oDAWG = fsa.DAWG(spfSrc, cStemmingMethod, sLangCode, sLangName, sDicName, sDescription, sFilter) dir_util.mkpath("graphspell/_dictionaries") oDAWG.writeAsJSObject("graphspell/_dictionaries/" + sfDict + ".json") |
︙ | ︙ |