Overview
| Comment: | [build] check lemmas |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | build |
| Files: | files | file ages | folders |
| SHA3-256: |
b6e8d1bea57ddafc44ed66af87618ee0 |
| User & Date: | olr on 2020-11-16 21:57:29 |
| Other Links: | manifest | tags |
Context
|
2020-11-16
| ||
| 22:51 | [fr] mise à jour du dictionnaire check-in: c4896b5dd8 user: olr tags: trunk, fr | |
| 21:57 | [build] check lemmas check-in: b6e8d1bea5 user: olr tags: trunk, build | |
| 21:55 | [fr] ajustements check-in: 38a9f843dd user: olr tags: trunk, fr | |
Changes
Modified compile_rules_graph.py from [aa84907702] to [38bc0eb9da].
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
"""
Grammalecte: compile rules
Create a Direct Acyclic Rule Graphs (DARGs)
"""
import re
import os
import time
import concurrent.futures
import darg
import compile_rules_js_convert as jsconv
import helpers
#### PROCESS POOL EXECUTOR ####
xProcessPoolExecutor = None
def initProcessPoolExecutor (nMultiCPU=None):
"process pool executor initialisation"
| > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
"""
Grammalecte: compile rules
Create a Direct Acyclic Rule Graphs (DARGs)
"""
import re
import os
import time
import concurrent.futures
import darg
import compile_rules_js_convert as jsconv
import helpers
import graphspell
#### PROCESS POOL EXECUTOR ####
xProcessPoolExecutor = None
def initProcessPoolExecutor (nMultiCPU=None):
"process pool executor initialisation"
|
| ︙ | ︙ | |||
100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
self.dDef = dDef
self.dDecl = dDecl
self.dOptPriority = dOptPriority
self.dAntiPatterns = {}
self.dActions = {}
self.dFuncName = {}
self.dFunctions = {}
def _genTokenLines (self, sTokenLine):
"tokenize a string and return a list of lines of tokens"
lTokenLines = []
for sTokBlock in sTokenLine.split():
# replace merger characters by spaces
if "␣" in sTokBlock:
| > | 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
self.dDef = dDef
self.dDecl = dDecl
self.dOptPriority = dOptPriority
self.dAntiPatterns = {}
self.dActions = {}
self.dFuncName = {}
self.dFunctions = {}
self.dLemmas = {}
def _genTokenLines (self, sTokenLine):
"tokenize a string and return a list of lines of tokens"
lTokenLines = []
for sTokBlock in sTokenLine.split():
# replace merger characters by spaces
if "␣" in sTokBlock:
|
| ︙ | ︙ | |||
203 204 205 206 207 208 209 |
# debugging
if False:
print("\nGRAPH:", self.sGraphName)
for k, v in dGraph.items():
print(k, "\t", v)
print("\tin {:>8.2f} s".format(time.time()-fStartTimer))
sPyCallables, sJSCallables = self.createCallables()
| | | 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
# debugging
if False:
print("\nGRAPH:", self.sGraphName)
for k, v in dGraph.items():
print(k, "\t", v)
print("\tin {:>8.2f} s".format(time.time()-fStartTimer))
sPyCallables, sJSCallables = self.createCallables()
return dGraph, self.dActions, sPyCallables, sJSCallables, self.dLemmas
def createRule (self, iLine, sRuleName, sTokenLine, iActionBlock, lActions, nPriority):
"generator: create rule as list"
# print(iLine, "//", sRuleName, "//", sTokenLine, "//", lActions, "//", nPriority)
if sTokenLine.startswith("!!") and sTokenLine.endswith("¡¡"):
# antipattern
sTokenLine = sTokenLine[2:-2].strip()
|
| ︙ | ︙ | |||
231 232 233 234 235 236 237 |
#if iLine == 15818: # debug
# print(" ".join(lToken))
for i, sToken in enumerate(lToken):
if sToken.startswith("(") and sToken.endswith(")"):
lToken[i] = sToken[1:-1]
iGroup += 1
dPos[iGroup] = i + 1 # we add 1, for we count tokens from 1 to n (not from 0)
| | > > | 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 |
#if iLine == 15818: # debug
# print(" ".join(lToken))
for i, sToken in enumerate(lToken):
if sToken.startswith("(") and sToken.endswith(")"):
lToken[i] = sToken[1:-1]
iGroup += 1
dPos[iGroup] = i + 1 # we add 1, for we count tokens from 1 to n (not from 0)
# check lemmas
if sToken.startswith(">") and sToken != ">" and sToken[1:] not in self.dLemmas:
self.dLemmas[sToken[1:]] = iLine
# Parse actions
for iAction, (iActionLine, sAction) in enumerate(lActions, 1):
sAction = sAction.strip()
if sAction:
sActionId = f"{self.sGraphCode}__{sRuleName}__b{iActionBlock}_a{iAction}"
aAction = self.createAction(sActionId, sAction, nPriority, len(lToken), dPos, iActionLine)
if aAction:
|
| ︙ | ︙ | |||
450 451 452 453 454 455 456 |
sJSCallables += " },\n"
return sPyCallables, sJSCallables
def processing (sGraphName, sGraphCode, sLang, lRuleLine, dDef, dDecl, dOptPriority):
"to be run in a separate process"
oGraphBuilder = GraphBuilder(sGraphName, sGraphCode, sLang, dDef, dDecl, dOptPriority)
| | | > | 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 |
sJSCallables += " },\n"
return sPyCallables, sJSCallables
def processing (sGraphName, sGraphCode, sLang, lRuleLine, dDef, dDecl, dOptPriority):
"to be run in a separate process"
oGraphBuilder = GraphBuilder(sGraphName, sGraphCode, sLang, dDef, dDecl, dOptPriority)
dGraph, dActions, sPy, sJS, dLemmas = oGraphBuilder.createGraphAndActions(lRuleLine)
return (sGraphName, dGraph, dActions, sPy, sJS, dLemmas)
def make (lRule, sLang, dDef, dDecl, dOptPriority):
"compile rules, returns a dictionary of values"
# for clarity purpose, don’t create any file here
# removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
print(" parsing graph rules...")
lTokenLine = []
lActions = []
bActionBlock = False
nPriority = -1
dAllGraph = {}
dGraphCode = {}
sGraphName = ""
iActionBlock = 0
aRuleName = set()
oDictionary = graphspell.SpellChecker("fr")
for iLine, sLine in lRule:
sLine = sLine.rstrip()
if "\t" in sLine:
# tabulation not allowed
print("# Error. Tabulation at line: ", iLine)
exit()
|
| ︙ | ︙ | |||
569 570 571 572 573 574 575 |
return "Executor broken. The server failed."
# merging results
xProcessPoolExecutor.shutdown(wait=True) # waiting that everything is finished
dAllActions = {}
sPyCallables = ""
sJSCallables = ""
for xFuture in lResult:
| | > > > > | 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 |
return "Executor broken. The server failed."
# merging results
xProcessPoolExecutor.shutdown(wait=True) # waiting that everything is finished
dAllActions = {}
sPyCallables = ""
sJSCallables = ""
for xFuture in lResult:
sGraphName, dGraph, dActions, sPy, sJS, dLemmas = xFuture.result()
dAllGraph[sGraphName] = dGraph
dAllActions.update(dActions)
sPyCallables += sPy
sJSCallables += sJS
# check lemmas
for sLemma, iLine in dLemmas.items():
if sLemma not in oDictionary.getLemma(sLemma):
print(f" # Error at line {iLine}: <{sLemma}> is not a known lemma")
# create a dictionary of URL
dTempURL = { "": 0 }
i = 1
for sKey, lValue in dAllActions.items():
if lValue[3] == "-":
if lValue[-1]:
if lValue[-1] not in dTempURL:
|
| ︙ | ︙ |
Modified lex_build.py from [5bdf726eee] to [d4a3c39602].
1 2 3 4 5 6 7 8 9 10 | #!python3 """ Lexicon builder """ import argparse from distutils import dir_util import graphspell.dawg as fsa | < | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
#!python3
"""
Lexicon builder
"""
import argparse
from distutils import dir_util
import graphspell.dawg as fsa
def build (spfSrc, sLangCode, sLangName, sfDict, bJavaScript=False, sDicName="", sDescription="", sFilter="", cStemmingMethod="S", nCompressMethod=1):
"transform a text lexicon as a binary indexable dictionary"
oDAWG = fsa.DAWG(spfSrc, cStemmingMethod, sLangCode, sLangName, sDicName, sDescription, sFilter)
dir_util.mkpath("graphspell/_dictionaries")
oDAWG.writeAsJSObject("graphspell/_dictionaries/" + sfDict + ".json")
|
| ︙ | ︙ |