Grammalecte  Check-in [b6e8d1bea5]

Overview
Comment:[build] check lemmas
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | build
Files: files | file ages | folders
SHA3-256: b6e8d1bea57ddafc44ed66af87618ee0ebdf510d1cefc845a719739d760d2e5e
User & Date: olr on 2020-11-16 21:57:29
Other Links: manifest | tags
Context
2020-11-16
22:51
[fr] mise à jour du dictionnaire check-in: c4896b5dd8 user: olr tags: fr, trunk
21:57
[build] check lemmas check-in: b6e8d1bea5 user: olr tags: build, trunk
21:55
[fr] ajustements check-in: 38a9f843dd user: olr tags: fr, trunk
Changes

Modified compile_rules_graph.py from [aa84907702] to [38bc0eb9da].

1
2
3
4
5
6
7
8
9
10
11
12
13

14
15
16
17
18
19
20
"""
Grammalecte: compile rules
Create a Direct Acyclic Rule Graphs (DARGs)
"""

import re
import os
import time
import concurrent.futures

import darg
import compile_rules_js_convert as jsconv
import helpers



#### PROCESS POOL EXECUTOR ####
xProcessPoolExecutor = None

def initProcessPoolExecutor (nMultiCPU=None):
    "process pool executor initialisation"













>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
"""
Grammalecte: compile rules
Create a Direct Acyclic Rule Graphs (DARGs)
"""

import re
import os
import time
import concurrent.futures

import darg
import compile_rules_js_convert as jsconv
import helpers
import graphspell


#### PROCESS POOL EXECUTOR ####
xProcessPoolExecutor = None

def initProcessPoolExecutor (nMultiCPU=None):
    "process pool executor initialisation"
100
101
102
103
104
105
106

107
108
109
110
111
112
113
        self.dDef = dDef
        self.dDecl = dDecl
        self.dOptPriority = dOptPriority
        self.dAntiPatterns = {}
        self.dActions = {}
        self.dFuncName = {}
        self.dFunctions = {}


    def _genTokenLines (self, sTokenLine):
        "tokenize a string and return a list of lines of tokens"
        lTokenLines = []
        for sTokBlock in sTokenLine.split():
            # replace merger characters by spaces
            if "␣" in sTokBlock:







>







101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
        self.dDef = dDef
        self.dDecl = dDecl
        self.dOptPriority = dOptPriority
        self.dAntiPatterns = {}
        self.dActions = {}
        self.dFuncName = {}
        self.dFunctions = {}
        self.dLemmas = {}

    def _genTokenLines (self, sTokenLine):
        "tokenize a string and return a list of lines of tokens"
        lTokenLines = []
        for sTokBlock in sTokenLine.split():
            # replace merger characters by spaces
            if "␣" in sTokBlock:
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
        # debugging
        if False:
            print("\nGRAPH:", self.sGraphName)
            for k, v in dGraph.items():
                print(k, "\t", v)
        print("\tin {:>8.2f} s".format(time.time()-fStartTimer))
        sPyCallables, sJSCallables = self.createCallables()
        return dGraph, self.dActions, sPyCallables, sJSCallables

    def createRule (self, iLine, sRuleName, sTokenLine, iActionBlock, lActions, nPriority):
        "generator: create rule as list"
        # print(iLine, "//", sRuleName, "//", sTokenLine, "//", lActions, "//", nPriority)
        if sTokenLine.startswith("!!") and sTokenLine.endswith("¡¡"):
            # antipattern
            sTokenLine = sTokenLine[2:-2].strip()







|







205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
        # debugging
        if False:
            print("\nGRAPH:", self.sGraphName)
            for k, v in dGraph.items():
                print(k, "\t", v)
        print("\tin {:>8.2f} s".format(time.time()-fStartTimer))
        sPyCallables, sJSCallables = self.createCallables()
        return dGraph, self.dActions, sPyCallables, sJSCallables, self.dLemmas

    def createRule (self, iLine, sRuleName, sTokenLine, iActionBlock, lActions, nPriority):
        "generator: create rule as list"
        # print(iLine, "//", sRuleName, "//", sTokenLine, "//", lActions, "//", nPriority)
        if sTokenLine.startswith("!!") and sTokenLine.endswith("¡¡"):
            # antipattern
            sTokenLine = sTokenLine[2:-2].strip()
231
232
233
234
235
236
237
238


239
240
241
242
243
244
245
                #if iLine == 15818: # debug
                #    print(" ".join(lToken))
                for i, sToken in enumerate(lToken):
                    if sToken.startswith("(") and sToken.endswith(")"):
                        lToken[i] = sToken[1:-1]
                        iGroup += 1
                        dPos[iGroup] = i + 1    # we add 1, for we count tokens from 1 to n (not from 0)



                # Parse actions
                for iAction, (iActionLine, sAction) in enumerate(lActions, 1):
                    sAction = sAction.strip()
                    if sAction:
                        sActionId = f"{self.sGraphCode}__{sRuleName}__b{iActionBlock}_a{iAction}"
                        aAction = self.createAction(sActionId, sAction, nPriority, len(lToken), dPos, iActionLine)
                        if aAction:







|
>
>







233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
                #if iLine == 15818: # debug
                #    print(" ".join(lToken))
                for i, sToken in enumerate(lToken):
                    if sToken.startswith("(") and sToken.endswith(")"):
                        lToken[i] = sToken[1:-1]
                        iGroup += 1
                        dPos[iGroup] = i + 1    # we add 1, for we count tokens from 1 to n (not from 0)
                    # check lemmas
                    if sToken.startswith(">") and sToken != ">" and sToken[1:] not in self.dLemmas:
                        self.dLemmas[sToken[1:]] = iLine
                # Parse actions
                for iAction, (iActionLine, sAction) in enumerate(lActions, 1):
                    sAction = sAction.strip()
                    if sAction:
                        sActionId = f"{self.sGraphCode}__{sRuleName}__b{iActionBlock}_a{iAction}"
                        aAction = self.createAction(sActionId, sAction, nPriority, len(lToken), dPos, iActionLine)
                        if aAction:
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475

476
477
478
479
480
481
482
            sJSCallables += "    },\n"
        return sPyCallables, sJSCallables


def processing (sGraphName, sGraphCode, sLang, lRuleLine, dDef, dDecl, dOptPriority):
    "to be run in a separate process"
    oGraphBuilder = GraphBuilder(sGraphName, sGraphCode, sLang, dDef, dDecl, dOptPriority)
    dGraph, dActions, sPy, sJS = oGraphBuilder.createGraphAndActions(lRuleLine)
    return (sGraphName, dGraph, dActions, sPy, sJS)


def make (lRule, sLang, dDef, dDecl, dOptPriority):
    "compile rules, returns a dictionary of values"
    # for clarity purpose, don’t create any file here

    # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
    print("  parsing graph rules...")
    lTokenLine = []
    lActions = []
    bActionBlock = False
    nPriority = -1
    dAllGraph = {}
    dGraphCode = {}
    sGraphName = ""
    iActionBlock = 0
    aRuleName = set()


    for iLine, sLine in lRule:
        sLine = sLine.rstrip()
        if "\t" in sLine:
            # tabulation not allowed
            print("# Error. Tabulation at line: ", iLine)
            exit()







|
|

















>







454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
            sJSCallables += "    },\n"
        return sPyCallables, sJSCallables


def processing (sGraphName, sGraphCode, sLang, lRuleLine, dDef, dDecl, dOptPriority):
    "to be run in a separate process"
    oGraphBuilder = GraphBuilder(sGraphName, sGraphCode, sLang, dDef, dDecl, dOptPriority)
    dGraph, dActions, sPy, sJS, dLemmas = oGraphBuilder.createGraphAndActions(lRuleLine)
    return (sGraphName, dGraph, dActions, sPy, sJS, dLemmas)


def make (lRule, sLang, dDef, dDecl, dOptPriority):
    "compile rules, returns a dictionary of values"
    # for clarity purpose, don’t create any file here

    # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
    print("  parsing graph rules...")
    lTokenLine = []
    lActions = []
    bActionBlock = False
    nPriority = -1
    dAllGraph = {}
    dGraphCode = {}
    sGraphName = ""
    iActionBlock = 0
    aRuleName = set()
    oDictionary = graphspell.SpellChecker("fr")

    for iLine, sLine in lRule:
        sLine = sLine.rstrip()
        if "\t" in sLine:
            # tabulation not allowed
            print("# Error. Tabulation at line: ", iLine)
            exit()
569
570
571
572
573
574
575
576
577
578
579
580




581
582
583
584
585
586
587
            return "Executor broken. The server failed."
    # merging results
    xProcessPoolExecutor.shutdown(wait=True) # waiting that everything is finished
    dAllActions = {}
    sPyCallables = ""
    sJSCallables = ""
    for xFuture in lResult:
        sGraphName, dGraph, dActions, sPy, sJS = xFuture.result()
        dAllGraph[sGraphName] = dGraph
        dAllActions.update(dActions)
        sPyCallables += sPy
        sJSCallables += sJS




    # create a dictionary of URL
    dTempURL = { "": 0 }
    i = 1
    for sKey, lValue in dAllActions.items():
        if lValue[3] == "-":
            if lValue[-1]:
                if lValue[-1] not in dTempURL:







|




>
>
>
>







574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
            return "Executor broken. The server failed."
    # merging results
    xProcessPoolExecutor.shutdown(wait=True) # waiting that everything is finished
    dAllActions = {}
    sPyCallables = ""
    sJSCallables = ""
    for xFuture in lResult:
        sGraphName, dGraph, dActions, sPy, sJS, dLemmas = xFuture.result()
        dAllGraph[sGraphName] = dGraph
        dAllActions.update(dActions)
        sPyCallables += sPy
        sJSCallables += sJS
        # check lemmas
        for sLemma, iLine in dLemmas.items():
            if sLemma not in oDictionary.getLemma(sLemma):
                print(f"  # Error at line {iLine}: <{sLemma}> is not a known lemma")
    # create a dictionary of URL
    dTempURL = { "": 0 }
    i = 1
    for sKey, lValue in dAllActions.items():
        if lValue[3] == "-":
            if lValue[-1]:
                if lValue[-1] not in dTempURL:

Modified lex_build.py from [5bdf726eee] to [d4a3c39602].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#!python3

"""
Lexicon builder
"""

import argparse
from distutils import dir_util

import graphspell.dawg as fsa
from graphspell.ibdawg import IBDAWG


def build (spfSrc, sLangCode, sLangName, sfDict, bJavaScript=False, sDicName="", sDescription="", sFilter="", cStemmingMethod="S", nCompressMethod=1):
    "transform a text lexicon as a binary indexable dictionary"
    oDAWG = fsa.DAWG(spfSrc, cStemmingMethod, sLangCode, sLangName, sDicName, sDescription, sFilter)
    dir_util.mkpath("graphspell/_dictionaries")
    oDAWG.writeAsJSObject("graphspell/_dictionaries/" + sfDict + ".json")










<







1
2
3
4
5
6
7
8
9
10

11
12
13
14
15
16
17
#!python3

"""
Lexicon builder
"""

import argparse
from distutils import dir_util

import graphspell.dawg as fsa



def build (spfSrc, sLangCode, sLangName, sfDict, bJavaScript=False, sDicName="", sDescription="", sFilter="", cStemmingMethod="S", nCompressMethod=1):
    "transform a text lexicon as a binary indexable dictionary"
    oDAWG = fsa.DAWG(spfSrc, cStemmingMethod, sLangCode, sLangName, sDicName, sDescription, sFilter)
    dir_util.mkpath("graphspell/_dictionaries")
    oDAWG.writeAsJSObject("graphspell/_dictionaries/" + sfDict + ".json")