Grammalecte  Check-in [0f6ac8c5a7]

Overview
Comment:[build][core] graph generation update
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | build | rg
Files: files | file ages | folders
SHA3-256: 0f6ac8c5a79a323a681bb3e616f3635c4959af9a9d2db72625345ddda38806ac
User & Date: olr on 2018-05-20 10:07:41
Other Links: branch diff | manifest | tags
Context
2018-05-21
13:08
[build][core] small code cleaning check-in: 29461e1888 user: olr tags: core, build, rg
2018-05-20
10:07
[build][core] graph generation update check-in: 0f6ac8c5a7 user: olr tags: core, build, rg
2018-05-19
14:28
[build][core] graph rule: condition is moved in action check-in: 5d1e6b3f8b user: olr tags: core, build, rg
Changes

Modified compile_rules_graph.py from [ecde868c3b] to [7c9c436423].

1
2
3
4
5
6

7
8
9
10
11
12
13
1
2
3
4
5

6
7
8
9
10
11
12
13





-
+







# Create a Direct Acyclic Rule Graph (DARG)

import re
import traceback
import json
import datg
import darg


dDEF = {}
dACTIONS = {}
lFUNCTIONS = []


79
80
81
82
83
84
85
86
87


88
89
90
91
92
93
94
79
80
81
82
83
84
85


86
87
88
89
90
91
92
93
94







-
-
+
+







        print("   ==", sAction, "==")
        return None
    # Condition
    sCondition = sAction[:m.start()].strip()
    if sCondition:
        sCondition = prepareFunction(sCondition)
        sCondition = changeReferenceToken(sCondition, dPos)    
        lFUNCTIONS.append(("gc_"+sIdAction, sCondition))
        sCondition = "gc_"+sIdAction
        lFUNCTIONS.append(("g_c_"+sIdAction, sCondition))
        sCondition = "g_c_"+sIdAction
    else:
        sCondition = ""
    # Action
    cAction = m.group(1)
    sAction = sAction[m.end():].strip()
    sAction = changeReferenceToken(sAction, dPos)
    iStartAction = int(m.group(2))  if m.group(2)  else 0
110
111
112
113
114
115
116
117

118
119
120
121

122
123
124
125
126
127
128
110
111
112
113
114
115
116

117
118
119
120

121
122
123
124
125
126
127
128







-
+



-
+







            sURL = ""
            mURL = re.search("[|] *(https?://.*)", sMsg)
            if mURL:
                sURL = mURL.group(1).strip()
                sMsg = sMsg[:mURL.start(0)].strip()
            if sMsg[0:1] == "=":
                sMsg = prepareFunction(sMsg[1:])
                lFUNCTIONS.append(("gm_"+sIdAction, sMsg))
                lFUNCTIONS.append(("g_m_"+sIdAction, sMsg))
                for x in re.finditer("group[(](\d+)[)]", sMsg):
                    if int(x.group(1)) > nGroup:
                        print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)")
                sMsg = "=m_"+sIdAction
                sMsg = "=g_m_"+sIdAction
            else:
                for x in re.finditer(r"\\(\d+)", sMsg):
                    if int(x.group(1)) > nGroup:
                        print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)")
                if re.search("[.]\\w+[(]", sMsg):
                    print("# Error in message at line " + sIdAction + ":  This message looks like code. Line should begin with =")
            
141
142
143
144
145
146
147
148
149


150
151
152
153
154
155
156
157
158
159
160
161


162
163
164
165
166
167
168
169
170
171
172


173
174
175
176
177
178
179
141
142
143
144
145
146
147


148
149
150
151
152
153
154
155
156
157
158
159


160
161
162
163
164
165
166
167
168
169
170


171
172
173
174
175
176
177
178
179







-
-
+
+










-
-
+
+









-
-
+
+







            print("# Error in action at line " + sIdAction + ":  This action looks like code. Line should begin with =")

    if cAction == "-":
        ## error detected --> suggestion
        if not sAction:
            print("# Error in action at line " + sIdAction + ":  This action is empty.")
        if sAction[0:1] == "=":
            lFUNCTIONS.append(("gs_"+sIdAction, sAction[1:]))
            sAction = "=gs_"+sIdAction
            lFUNCTIONS.append(("g_s_"+sIdAction, sAction[1:]))
            sAction = "=g_s_"+sIdAction
        elif sAction.startswith('"') and sAction.endswith('"'):
            sAction = sAction[1:-1]
        if not sMsg:
            print("# Error in action at line " + sIdAction + ":  The message is empty.")
        return [sCondition, cAction, sAction, iStartAction, iEndAction, nPriority, sMsg, sURL]
    elif cAction == "~":
        ## text processor
        if not sAction:
            print("# Error in action at line " + sIdAction + ":  This action is empty.")
        if sAction[0:1] == "=":
            lFUNCTIONS.append(("gp_"+sIdAction, sAction[1:]))
            sAction = "=gp_"+sIdAction
            lFUNCTIONS.append(("g_p_"+sIdAction, sAction[1:]))
            sAction = "=g_p_"+sIdAction
        elif sAction.startswith('"') and sAction.endswith('"'):
            sAction = sAction[1:-1]
        return [sCondition, cAction, sAction, iStartAction, iEndAction]
    elif cAction == "=":
        ## disambiguator
        if sAction[0:1] == "=":
            sAction = sAction[1:]
        if not sAction:
            print("# Error in action at line " + sIdAction + ":  This action is empty.")
        lFUNCTIONS.append(("gd_"+sIdAction, sAction))
        sAction = "gd_"+sIdAction
        lFUNCTIONS.append(("g_d_"+sIdAction, sAction))
        sAction = "g_d_"+sIdAction
        return [sCondition, cAction, sAction]
    elif cAction == ">":
        ## no action, break loop if condition is False
        return [sCondition, cAction, ""]
    else:
        print("# Unknown action at line " + sIdAction)
        return None
262
263
264
265
266
267
268
269
270


271
272
273
274
275
276




277
278
279
280
281
262
263
264
265
266
267
268


269
270
271
272
273



274
275
276
277
278
279
280
281
282







-
-
+
+



-
-
-
+
+
+
+





        for lRule in createRule(i, sRuleGroup, sTokenLine, sActions, nPriority):
            lPreparedRule.append(lRule)

    # Graph creation
    for e in lPreparedRule:
        print(e)

    oDATG = datg.DATG(lPreparedRule, sLang)
    oRuleGraph = oDATG.createGraph()
    oDARG = darg.DARG(lPreparedRule, sLang)
    oRuleGraph = oDARG.createGraph()

    # Result
    d = {
        "g_callables": None,
        "g_gctests": None,
        "graph_rules": None,
        "graph_callables": None,
        "graph_gctests": None,
        "rules_graph": oRuleGraph,
        "rules_actions": dACTIONS
    }

    return d


Renamed and modified datg.py [c288dbf7f8] to darg.py [06ce413e8b].

10
11
12
13
14
15
16
17
18


19
20
21
22
23
24

25
26
27
28
29
30
31
10
11
12
13
14
15
16


17
18
19
20
21
22
23

24
25
26
27
28
29
30
31







-
-
+
+





-
+







import time
import traceback

from graphspell.progressbar import ProgressBar



class DATG:
    """DIRECT ACYCLIC TOKEN GRAPH"""
class DARG:
    """DIRECT ACYCLIC RULE GRAPH"""
    # This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)

    def __init__ (self, lRule, sLangCode):
        print("===== Direct Acyclic Token Graph - Minimal Acyclic Finite State Automaton =====")

        # Preparing DATG
        # Preparing DARG
        print(" > Preparing list of tokens")
        self.sLangCode = sLangCode
        self.nRule = len(lRule)
        self.aPreviousRule = []
        Node.resetNextId()
        self.oRoot = Node()
        self.lUncheckedNodes = []  # list of nodes that have not been checked for duplication.
41
42
43
44
45
46
47
48

49
50
51
52
53
54
55
41
42
43
44
45
46
47

48
49
50
51
52
53
54
55







-
+







            oProgBar.increment(1)
        oProgBar.done()
        self.finish()
        self.countNodes()
        self.countArcs()
        self.displayInfo()

    # BUILD DATG
    # BUILD DARG
    def insert (self, aRule):
        if aRule < self.aPreviousRule:
            sys.exit("# Error: tokens must be inserted in order.")
    
        # find common prefix between word and previous word
        nCommonPrefix = 0
        for i in range(min(len(aRule), len(self.aPreviousRule))):

Added gc_core/py/lang_core/gc_rules_graph.py version [e9a58f5498].






1
2
3
4
5
+
+
+
+
+
# generated code, do not edit

dGraph = ${rules_graph}

dRule = ${rules_actions}

Added gc_core/py/lang_core/gc_sentence.py version [5e66d88647].


























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
# Sentence checker

from ..graphspell.tokenizer import Tokenizer
from .gc_graph import dGraph


oTokenizer = Tokenizer("${lang}")


class Sentence:

    def __init__ (self, sSentence, sSentence0, nOffset):
        self.sSentence = sSentence
        self.sSentence0 = sSentence0
        self.nOffset = nOffset
        self.lToken = list(oTokenizer.genTokens())

    def parse (self):
        dErr = {}
        lPointer = []
        for dToken in self.lToken:
            for i, dPointer in enumerate(lPointer):
                bValid = False
                for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]):
                    dPointer["nOffset"] += 1
                    dPointer["dNode"] = dNode
                    bValid = True
                if not bValid:
                    del lPointer[i]
            for dNode in self._getNextMatchingNodes(dToken, dGraph):
                lPointer.append({"nOffset": 0, "dNode": dNode})
            for dPointer in lPointer:
                if "<rules>" in dPointer["dNode"]:
                    for dNode in dGraph[dPointer["dNode"]["<rules>"]]:
                        dErr = self._executeActions(dNode)
        return dErr

    def _getNextMatchingNodes (self, dToken, dNode):
        if dToken["sValue"] in dNode:
            yield dGraph[dNode[dToken["sValue"]]]
        for sLemma in dToken["sLemma"]:
            if sLemma in dNode:
                yield dGraph[dNode[dToken["sValue"]]]
        if "~" in dNode:
            for sRegex in dNode["~"]:
                for sMorph in dToken["lMorph"]:
                    if re.search(sRegex, sMorph):
                        yield dGraph[dNode["~"][sRegex]]

    def _executeActions (self, dNode):
        for sLineId, nextNodeKey in dNode.items():
            for sArc in dGraph[nextNodeKey]:
                bCondMemo = None
                sFuncCond, cActionType, sWhat, *eAct = dRule[sArc]
                # action in lActions: [ condition, action type, replacement/suggestion/action[, iGroupStart, iGroupEnd[, message, URL]] ]
                try:
                    bCondMemo = not sFuncCond or globals()[sFuncCond](s, sx, m, dDA, sCountry, bCondMemo)
                    if bCondMemo:
                        if cActionType == "-":
                            # grammar error
                            nErrorStart = nOffset + m.start(eAct[0])
                            nErrorEnd = nOffset + m.start(eAct[1])
                            if nErrorStart not in dErrs or nPriority > dPriority[nErrorStart]:
                                dErrs[nErrorStart] = _createError(self.lToken, self.sSentence0, sWhat, nOffset, m, nErrorStart, nErrorEnd, sLineId, bUppercase, eAct[2], eAct[3], bIdRule, sOption, bContext)
                                dPriority[nErrorStart] = nPriority
                        elif cActionType == "~":
                            # text processor
                            self.lToken = _rewrite(self.lToken, sWhat, bUppercase)
                            bChange = True
                        elif cActionType == "=":
                            # disambiguation
                            globals()[sWhat](self.lToken, dDA)
                        elif cActionType == ">":
                            # we do nothing, this test is just a condition to apply all following actions
                            pass
                        else:
                            echo("# error: unknown action at " + sLineId)
                    elif cActionType == ">":
                        break
                except Exception as e:
                    raise Exception(str(e), "# " + sLineId + " # " + sRuleId)

    def _createWriterError (self):
        d = {}
        return d

    def _createDictError (self):
        d = {}
        return d

Modified make.py from [08ab9e62c7] to [ff9ae5f2b3].

194
195
196
197
198
199
200

201
202
203
204
205
206
207
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208







+







    dVars['loc'] = str(dict([ [s, [s[0:2], s[3:5], ""]] for s in dVars["locales"].split(" ") ]))

    ## COMPILE RULES
    dResultRegex = compile_rules.make(spLang, dVars['lang'], bJavaScript)
    dVars.update(dResultRegex)

    dResultGraph = compile_rules_graph.make(spLang, dVars['lang'], bJavaScript)
    dVars.update(dResultGraph)

    ## READ GRAMMAR CHECKER PLUGINS
    print("PYTHON:")
    print("+ Plugins: ", end="")
    sCodePlugins = ""
    for sf in os.listdir(spLang+"/modules"):
        if re.match(r"gce_\w+[.]py$", sf):