Index: compile_rules_graph.py ================================================================== --- compile_rules_graph.py +++ compile_rules_graph.py @@ -9,10 +9,11 @@ import concurrent.futures import darg import compile_rules_js_convert as jsconv import helpers +import graphspell #### PROCESS POOL EXECUTOR #### xProcessPoolExecutor = None @@ -102,10 +103,11 @@ self.dOptPriority = dOptPriority self.dAntiPatterns = {} self.dActions = {} self.dFuncName = {} self.dFunctions = {} + self.dLemmas = {} def _genTokenLines (self, sTokenLine): "tokenize a string and return a list of lines of tokens" lTokenLines = [] for sTokBlock in sTokenLine.split(): @@ -205,11 +207,11 @@ print("\nGRAPH:", self.sGraphName) for k, v in dGraph.items(): print(k, "\t", v) print("\tin {:>8.2f} s".format(time.time()-fStartTimer)) sPyCallables, sJSCallables = self.createCallables() - return dGraph, self.dActions, sPyCallables, sJSCallables + return dGraph, self.dActions, sPyCallables, sJSCallables, self.dLemmas def createRule (self, iLine, sRuleName, sTokenLine, iActionBlock, lActions, nPriority): "generator: create rule as list" # print(iLine, "//", sRuleName, "//", sTokenLine, "//", lActions, "//", nPriority) if sTokenLine.startswith("!!") and sTokenLine.endswith("¡¡"): @@ -233,11 +235,13 @@ for i, sToken in enumerate(lToken): if sToken.startswith("(") and sToken.endswith(")"): lToken[i] = sToken[1:-1] iGroup += 1 dPos[iGroup] = i + 1 # we add 1, for we count tokens from 1 to n (not from 0) - + # check lemmas + if sToken.startswith(">") and sToken != ">" and sToken[1:] not in self.dLemmas: + self.dLemmas[sToken[1:]] = iLine # Parse actions for iAction, (iActionLine, sAction) in enumerate(lActions, 1): sAction = sAction.strip() if sAction: sActionId = f"{self.sGraphCode}__{sRuleName}__b{iActionBlock}_a{iAction}" @@ -452,12 +456,12 @@ def processing (sGraphName, sGraphCode, sLang, lRuleLine, dDef, dDecl, dOptPriority): "to be run in a separate process" oGraphBuilder = GraphBuilder(sGraphName, sGraphCode, sLang, dDef, dDecl, dOptPriority) - dGraph, dActions, sPy, sJS = oGraphBuilder.createGraphAndActions(lRuleLine) - return (sGraphName, dGraph, dActions, sPy, sJS) + dGraph, dActions, sPy, sJS, dLemmas = oGraphBuilder.createGraphAndActions(lRuleLine) + return (sGraphName, dGraph, dActions, sPy, sJS, dLemmas) def make (lRule, sLang, dDef, dDecl, dOptPriority): "compile rules, returns a dictionary of values" # for clarity purpose, don’t create any file here @@ -471,10 +475,11 @@ dAllGraph = {} dGraphCode = {} sGraphName = "" iActionBlock = 0 aRuleName = set() + oDictionary = graphspell.SpellChecker("fr") for iLine, sLine in lRule: sLine = sLine.rstrip() if "\t" in sLine: # tabulation not allowed @@ -571,15 +576,19 @@ xProcessPoolExecutor.shutdown(wait=True) # waiting that everything is finished dAllActions = {} sPyCallables = "" sJSCallables = "" for xFuture in lResult: - sGraphName, dGraph, dActions, sPy, sJS = xFuture.result() + sGraphName, dGraph, dActions, sPy, sJS, dLemmas = xFuture.result() dAllGraph[sGraphName] = dGraph dAllActions.update(dActions) sPyCallables += sPy sJSCallables += sJS + # check lemmas + for sLemma, iLine in dLemmas.items(): + if sLemma not in oDictionary.getLemma(sLemma): + print(f" # Error at line {iLine}: <{sLemma}> is not a known lemma") # create a dictionary of URL dTempURL = { "": 0 } i = 1 for sKey, lValue in dAllActions.items(): if lValue[3] == "-": Index: lex_build.py ================================================================== --- lex_build.py +++ lex_build.py @@ -6,11 +6,10 @@ import argparse from distutils import dir_util import graphspell.dawg as fsa -from graphspell.ibdawg import IBDAWG def build (spfSrc, sLangCode, sLangName, sfDict, bJavaScript=False, sDicName="", sDescription="", sFilter="", cStemmingMethod="S", nCompressMethod=1): "transform a text lexicon as a binary indexable dictionary" oDAWG = fsa.DAWG(spfSrc, cStemmingMethod, sLangCode, sLangName, sDicName, sDescription, sFilter)