14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
|
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
|
-
-
-
-
-
+
+
+
+
+
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
dFUNCTIONS = {}
dFUNCNAME = {}
def createFunction (sType, sActionId, sCode, bStartWithEqual=False):
"create a function (stored in dFUNCTIONS) and return function name"
sCode = prepareFunction(sCode)
if sActionId not in dFUNCNAME:
dFUNCNAME[sActionId] = {}
if sCode not in dFUNCNAME[sActionId]:
dFUNCNAME[sActionId][sCode] = len(dFUNCNAME[sActionId])+1
sFuncName = "_g_" + sType + "_" + sActionId + "_" + str(dFUNCNAME[sActionId][sCode])
if sType not in dFUNCNAME:
dFUNCNAME[sType] = {}
if sCode not in dFUNCNAME[sType]:
dFUNCNAME[sType][sCode] = len(dFUNCNAME[sType])+1
sFuncName = "_g_" + sType + "_" + str(dFUNCNAME[sType][sCode])
dFUNCTIONS[sFuncName] = sCode
return sFuncName if not bStartWithEqual else "="+sFuncName
def prepareFunction (s):
def prepareFunction (sCode):
"convert simple rule syntax to a string of Python code"
if s[0:1] == "=":
s = s[1:]
s = s.replace("__also__", "bCondMemo")
s = s.replace("__else__", "not bCondMemo")
s = s.replace("sContext", "_sAppContext")
s = re.sub(r"(morph|morphVC|analyse|value|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[nTokenOffset+\\2]', s)
s = re.sub(r"(morph|morphVC|analyse|value|displayInfo)[(]\\-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1]', s)
s = re.sub(r"(select|exclude|define|define_from)[(][\\](\d+)", 'g_\\1(lToken[nTokenOffset+\\2]', s)
s = re.sub(r"(select|exclude|define|define_from)[(][\\]-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1]', s)
s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[nTokenOffset+\\2], dTags', s)
s = re.sub(r"(tag_before|tag_after)[(][\\]-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1], dTags', s)
s = re.sub(r"space_after[(][\\](\d+)", 'g_space_between_tokens(lToken[nTokenOffset+\\1], lToken[nTokenOffset+\\1+1]', s)
s = re.sub(r"space_after[(][\\]-(\d+)", 'g_space_between_tokens(lToken[nLastToken-\\1+1], lToken[nLastToken-\\1+2]', s)
s = re.sub(r"analyse_with_next[(][\\](\d+)", 'g_merged_analyse(lToken[nTokenOffset+\\1], lToken[nTokenOffset+\\1+1]', s)
s = re.sub(r"analyse_with_next[(][\\]-(\d+)", 'g_merged_analyse(lToken[nLastToken-\\1+1], lToken[nLastToken-\\1+2]', s)
s = re.sub(r"(morph|analyse|value)\(>1", 'g_\\1(lToken[nLastToken+1]', s) # next token
s = re.sub(r"(morph|analyse|value)\(<1", 'g_\\1(lToken[nTokenOffset]', s) # previous token
s = re.sub(r"(morph|analyse|value)\(>(\d+)", 'g_\\1(g_token(lToken, nLastToken+\\2)', s) # next token
s = re.sub(r"(morph|analyse|value)\(<(\d+)", 'g_\\1(g_token(lToken, nTokenOffset+1-\\2)', s) # previous token
s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)
s = re.sub(r"\bbefore\(\s*", 'look(sSentence[:lToken[1+nTokenOffset]["nStart"]], ', s) # before(s)
s = re.sub(r"\bafter\(\s*", 'look(sSentence[lToken[nLastToken]["nEnd"]:], ', s) # after(s)
s = re.sub(r"\bbefore0\(\s*", 'look(sSentence0[:lToken[1+nTokenOffset]["nStart"]], ', s) # before0(s)
s = re.sub(r"\bafter0\(\s*", 'look(sSentence[lToken[nLastToken]["nEnd"]:], ', s) # after0(s)
s = re.sub(r"[\\](\d+)", 'lToken[nTokenOffset+\\1]["sValue"]', s)
s = re.sub(r"[\\]-(\d+)", 'lToken[nLastToken-\\1+1]["sValue"]', s)
return s
if sCode[0:1] == "=":
sCode = sCode[1:]
sCode = sCode.replace("__also__", "bCondMemo")
sCode = sCode.replace("__else__", "not bCondMemo")
sCode = sCode.replace("sContext", "_sAppContext")
sCode = re.sub(r"(morph|morphVC|analyse|value|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[nTokenOffset+\\2]', sCode)
sCode = re.sub(r"(morph|morphVC|analyse|value|displayInfo)[(]\\-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1]', sCode)
sCode = re.sub(r"(select|exclude|define|define_from)[(][\\](\d+)", 'g_\\1(lToken[nTokenOffset+\\2]', sCode)
sCode = re.sub(r"(select|exclude|define|define_from)[(][\\]-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1]', sCode)
sCode = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[nTokenOffset+\\2], dTags', sCode)
sCode = re.sub(r"(tag_before|tag_after)[(][\\]-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1], dTags', sCode)
sCode = re.sub(r"space_after[(][\\](\d+)", 'g_space_between_tokens(lToken[nTokenOffset+\\1], lToken[nTokenOffset+\\1+1]', sCode)
sCode = re.sub(r"space_after[(][\\]-(\d+)", 'g_space_between_tokens(lToken[nLastToken-\\1+1], lToken[nLastToken-\\1+2]', sCode)
sCode = re.sub(r"analyse_with_next[(][\\](\d+)", 'g_merged_analyse(lToken[nTokenOffset+\\1], lToken[nTokenOffset+\\1+1]', sCode)
sCode = re.sub(r"analyse_with_next[(][\\]-(\d+)", 'g_merged_analyse(lToken[nLastToken-\\1+1], lToken[nLastToken-\\1+2]', sCode)
sCode = re.sub(r"(morph|analyse|value)\(>1", 'g_\\1(lToken[nLastToken+1]', sCode) # next token
sCode = re.sub(r"(morph|analyse|value)\(<1", 'g_\\1(lToken[nTokenOffset]', sCode) # previous token
sCode = re.sub(r"(morph|analyse|value)\(>(\d+)", 'g_\\1(g_token(lToken, nLastToken+\\2)', sCode) # next token
sCode = re.sub(r"(morph|analyse|value)\(<(\d+)", 'g_\\1(g_token(lToken, nTokenOffset+1-\\2)', sCode) # previous token
sCode = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', sCode)
sCode = re.sub(r"\bbefore\(\s*", 'look(sSentence[:lToken[1+nTokenOffset]["nStart"]], ', sCode) # before(sCode)
sCode = re.sub(r"\bafter\(\s*", 'look(sSentence[lToken[nLastToken]["nEnd"]:], ', sCode) # after(sCode)
sCode = re.sub(r"\bbefore0\(\s*", 'look(sSentence0[:lToken[1+nTokenOffset]["nStart"]], ', sCode) # before0(sCode)
sCode = re.sub(r"\bafter0\(\s*", 'look(sSentence[lToken[nLastToken]["nEnd"]:], ', sCode) # after0(sCode)
sCode = re.sub(r"[\\](\d+)", 'lToken[nTokenOffset+\\1]["sValue"]', sCode)
sCode = re.sub(r"[\\]-(\d+)", 'lToken[nLastToken-\\1+1]["sValue"]', sCode)
return sCode
def genTokenLines (sTokenLine, dDef):
"tokenize a string and return a list of lines of tokens"
lToken = sTokenLine.split()
lTokenLines = None
for sToken in lToken:
|
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
|
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
|
-
+
-
+
+
+
+
|
"generator: create rule as list"
# print(iLine, "//", sRuleName, "//", sTokenLine, "//", sActions, "//", nPriority)
for lToken in genTokenLines(sTokenLine, dDef):
# Calculate positions
dPos = {} # key: iGroup, value: iToken
iGroup = 0
#if iLine == 3971: # debug
# print(lToken)
# print(lToken.join(" "))
for i, sToken in enumerate(lToken):
if sToken.startswith("(") and sToken.endswith(")"):
lToken[i] = sToken[1:-1]
iGroup += 1
dPos[iGroup] = i + 1 # we add 1, for we count tokens from 1 to n (not from 0)
# Parse actions
for iAction, sAction in enumerate(sActions.split(" <<- ")):
sAction = sAction.strip()
if sAction:
sActionId = sRuleName + "__b" + str(iActionBlock) + "_a" + str(iAction) + "_" + str(len(lToken))
sActionId = sRuleName + "__b" + str(iActionBlock) + "_l" + str(iLine) + "_a" + str(iAction) + "_" + str(len(lToken))
aAction = createAction(sActionId, sAction, nPriority, dOptPriority, len(lToken), dPos)
if aAction:
dACTIONS[sActionId] = aAction
lResult = list(lToken)
lResult.extend(["##"+str(iLine), sActionId])
if iLine == 13341:
print(" ".join(lToken))
print(sActionId, aAction)
yield lResult
else:
print(" # Error on action at line:", iLine)
print(sTokenLine, "\n", sActions)
def changeReferenceToken (sText, dPos):
|