Grammalecte  Diff

Differences From Artifact [ef1f63d249]:

To Artifact [c4702b3e5a]:




1

2
3
4
5
6
7
8
9
10
11
12
13
14

15
16
17
18
19
20
21


# Create a Direct Acyclic Rule Graph (DARG)


import re
import traceback
import json

import darg


dACTIONS = {}
dFUNCTIONS = {}


def prepareFunction (s, bTokenValue=False):

    s = s.replace("__also__", "bCondMemo")
    s = s.replace("__else__", "not bCondMemo")
    s = re.sub(r"(morph|analyse|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset], dTags', s)
    s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset], dTags', s)
    s = re.sub(r"(switchGender|has(?:Mas|Fem)Form)[(]\\(\d+)", '\\1(lToken[\\2+nTokenOffset]["sValue"]', s)
    s = re.sub(r"(morph|analyse)\(>1", 'g_\\1(lToken[nLastToken+1]', s)                     # next token
>
>
|
>













>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
"""
Grammalecte: compile rules
Create a Direct Acyclic Rule Graphs (DARGs)
"""

import re
import traceback
import json

import darg


dACTIONS = {}
dFUNCTIONS = {}


def prepareFunction (s, bTokenValue=False):
    "convert simple rule syntax to a string of Python code"
    s = s.replace("__also__", "bCondMemo")
    s = s.replace("__else__", "not bCondMemo")
    s = re.sub(r"(morph|analyse|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset], dTags', s)
    s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset], dTags', s)
    s = re.sub(r"(switchGender|has(?:Mas|Fem)Form)[(]\\(\d+)", '\\1(lToken[\\2+nTokenOffset]["sValue"]', s)
    s = re.sub(r"(morph|analyse)\(>1", 'g_\\1(lToken[nLastToken+1]', s)                     # next token
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
    return s


def genTokenLines (sTokenLine, dDef):
    "tokenize a string and return a list of lines of tokens"
    lToken = sTokenLine.split()
    lTokenLines = None
    for i, sToken in enumerate(lToken):
        # optional token?
        bNullPossible = sToken.startswith("?") and sToken.endswith("¿")
        if bNullPossible:
            sToken = sToken[1:-1]
        # token with definition?
        if sToken.startswith("({") and sToken.endswith("})") and sToken[1:-1] in dDef:
            sToken = "(" + dDef[sToken[1:-1]] + ")"







|







39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
    return s


def genTokenLines (sTokenLine, dDef):
    "tokenize a string and return a list of lines of tokens"
    lToken = sTokenLine.split()
    lTokenLines = None
    for sToken in lToken:
        # optional token?
        bNullPossible = sToken.startswith("?") and sToken.endswith("¿")
        if bNullPossible:
            sToken = sToken[1:-1]
        # token with definition?
        if sToken.startswith("({") and sToken.endswith("})") and sToken[1:-1] in dDef:
            sToken = "(" + dDef[sToken[1:-1]] + ")"
92
93
94
95
96
97
98

99
100
101
102
103
104
105
                    for aRule in lTokenLines:
                        aRule.append(sToken)
    for aRule in lTokenLines:
        yield aRule


def createRule (iLine, sRuleName, sTokenLine, iActionBlock, sActions, nPriority, dDef):

    # print(iLine, "//", sRuleName, "//", sTokenLine, "//", sActions, "//", nPriority)
    for lToken in genTokenLines(sTokenLine, dDef):
        # Calculate positions
        dPos = {}   # key: iGroup, value: iToken
        iGroup = 0
        for i, sToken in enumerate(lToken):
            if sToken.startswith("(") and sToken.endswith(")"):







>







96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
                    for aRule in lTokenLines:
                        aRule.append(sToken)
    for aRule in lTokenLines:
        yield aRule


def createRule (iLine, sRuleName, sTokenLine, iActionBlock, sActions, nPriority, dDef):
    "generator: create rule as list"
    # print(iLine, "//", sRuleName, "//", sTokenLine, "//", sActions, "//", nPriority)
    for lToken in genTokenLines(sTokenLine, dDef):
        # Calculate positions
        dPos = {}   # key: iGroup, value: iToken
        iGroup = 0
        for i, sToken in enumerate(lToken):
            if sToken.startswith("(") and sToken.endswith(")"):
117
118
119
120
121
122
123

124
125
126
127
128
129

130
131
132
133
134
135
136

137
138
139
140
141
142

143
144
145
146
147
148
149
                    dACTIONS[sActionId] = aAction
                    lResult = list(lToken)
                    lResult.extend(["##"+str(iLine), sActionId])
                    yield lResult


def changeReferenceToken (sText, dPos):

    for i in range(len(dPos), 0, -1):
        sText = sText.replace("\\"+str(i), "\\"+str(dPos[i]))
    return sText


def checkTokenNumbers (sText, sActionId, nToken):

    for x in re.finditer(r"\\(\d+)", sText):
        if int(x.group(1)) > nToken:
            print("# Error in token index at line " + sActionId + " ("+str(nToken)+" tokens only)")
            print(sText)


def checkIfThereIsCode (sText, sActionId):

    if re.search("[.]\\w+[(]|sugg\\w+[(]|\\([0-9]|\\[[0-9]", sText):
        print("# Warning at line " + sActionId + ":  This message looks like code. Line should probably begin with =")
        print(sText)


def createAction (sActionId, sAction, nPriority, nToken, dPos):

    # Option
    sOption = False
    m = re.match("/(\\w+)/", sAction)
    if m:
        sOption = m.group(1)
        sAction = sAction[m.end():].strip()
    # valid action?







>






>







>






>







122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
                    dACTIONS[sActionId] = aAction
                    lResult = list(lToken)
                    lResult.extend(["##"+str(iLine), sActionId])
                    yield lResult


def changeReferenceToken (sText, dPos):
    "change group reference in <sText> with values in <dPos>"
    for i in range(len(dPos), 0, -1):
        sText = sText.replace("\\"+str(i), "\\"+str(dPos[i]))
    return sText


def checkTokenNumbers (sText, sActionId, nToken):
    "check if token references in <sText> greater than <nToken> (debugging)"
    for x in re.finditer(r"\\(\d+)", sText):
        if int(x.group(1)) > nToken:
            print("# Error in token index at line " + sActionId + " ("+str(nToken)+" tokens only)")
            print(sText)


def checkIfThereIsCode (sText, sActionId):
    "check if there is code in <sText> (debugging)"
    if re.search("[.]\\w+[(]|sugg\\w+[(]|\\([0-9]|\\[[0-9]", sText):
        print("# Warning at line " + sActionId + ":  This message looks like code. Line should probably begin with =")
        print(sText)


def createAction (sActionId, sAction, nPriority, nToken, dPos):
    "create action rule as a list"
    # Option
    sOption = False
    m = re.match("/(\\w+)/", sAction)
    if m:
        sOption = m.group(1)
        sAction = sAction[m.end():].strip()
    # valid action?
365
366
367
368
369
370
371
372
373
374
375
376
377
        print("\nActions:")
        for sActionName, aAction in dACTIONS.items():
            print(sActionName, aAction)
        print("\nFunctions:")
        print(sPyCallables)

    # Result
    d = {
        "graph_callables": sPyCallables,
        "rules_graphs": dAllGraph,
        "rules_actions": dACTIONS
    }
    return d







|




<
374
375
376
377
378
379
380
381
382
383
384
385

        print("\nActions:")
        for sActionName, aAction in dACTIONS.items():
            print(sActionName, aAction)
        print("\nFunctions:")
        print(sPyCallables)

    # Result
    return {
        "graph_callables": sPyCallables,
        "rules_graphs": dAllGraph,
        "rules_actions": dACTIONS
    }