︙ | | | ︙ | |
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
dDEFINITIONS = {}
dDECLENSIONS = {}
lFUNCTIONS = []
aRULESET = set() # set of rule-ids to check if there is several rules with the same id
nRULEWITHOUTNAME = 0
dJSREGEXES = {}
sWORDLIMITLEFT = r"(?<![\w.,–-])" # r"(?<![-.,—])\b" seems slower
sWORDLIMITRIGHT = r"(?![\w–-])" # r"\b(?!-—)" seems slower
|
<
|
14
15
16
17
18
19
20
21
22
23
24
25
26
27
|
dDEFINITIONS = {}
dDECLENSIONS = {}
lFUNCTIONS = []
aRULESET = set() # set of rule-ids to check if there is several rules with the same id
dJSREGEXES = {}
sWORDLIMITLEFT = r"(?<![\w.,–-])" # r"(?<![-.,—])\b" seems slower
sWORDLIMITRIGHT = r"(?![\w–-])" # r"\b(?!-—)" seems slower
|
︙ | | | ︙ | |
53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
return dColorType
def prepareFunction (s):
"convert simple rule syntax to a string of Python code"
s = s.replace("__also__", "bCondMemo")
s = s.replace("__else__", "not bCondMemo")
s = re.sub(r"isStart *\(\)", 'before("^ *$|, *$")', s)
s = re.sub(r"isRealStart *\(\)", 'before("^ *$")', s)
s = re.sub(r"isStart0 *\(\)", 'before0("^ *$|, *$")', s)
s = re.sub(r"isRealStart0 *\(\)", 'before0("^ *$")', s)
s = re.sub(r"isEnd *\(\)", 'after("^ *$|^,")', s)
s = re.sub(r"isRealEnd *\(\)", 'after("^ *$")', s)
s = re.sub(r"isEnd0 *\(\)", 'after0("^ *$|^,")', s)
|
>
|
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
return dColorType
def prepareFunction (s):
"convert simple rule syntax to a string of Python code"
s = s.replace("__also__", "bCondMemo")
s = s.replace("__else__", "not bCondMemo")
s = s.replace("sContext", "_sAppContext")
s = re.sub(r"isStart *\(\)", 'before("^ *$|, *$")', s)
s = re.sub(r"isRealStart *\(\)", 'before("^ *$")', s)
s = re.sub(r"isStart0 *\(\)", 'before0("^ *$|, *$")', s)
s = re.sub(r"isRealStart0 *\(\)", 'before0("^ *$")', s)
s = re.sub(r"isEnd *\(\)", 'after("^ *$|^,")', s)
s = re.sub(r"isRealEnd *\(\)", 'after("^ *$")', s)
s = re.sub(r"isEnd0 *\(\)", 'after0("^ *$|^,")', s)
|
︙ | | | ︙ | |
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
|
print(sRegex)
return 0
def createRule (s, nIdLine, sLang, bParagraph, dOptPriority):
"returns rule as list [option name, regex, bCaseInsensitive, identifier, list of actions]"
global dJSREGEXES
global nRULEWITHOUTNAME
sLineId = "#" + str(nIdLine) + ("p" if bParagraph else "s")
sRuleId = sLineId
#### GRAPH CALL
if s.startswith("@@@@"):
if bParagraph:
print("Error. Graph call can be made only after the first pass (sentence by sentence)")
exit()
return ["@@@@", s[4:], sLineId]
#### OPTIONS
sOption = False # False or [a-z0-9]+ name
nPriority = 4 # Default is 4, value must be between 0 and 9
tGroups = None # code for groups positioning (only useful for JavaScript)
cCaseMode = 'i' # i: case insensitive, s: case sensitive, u: uppercasing allowed
cWordLimitLeft = '[' # [: word limit, <: no specific limit
cWordLimitRight = ']' # ]: word limit, >: no specific limit
m = re.match("^__(?P<borders_and_case>[\\[<]\\w[\\]>])(?P<option>/[a-zA-Z0-9]+|)(?P<ruleid>\\(\\w+\\)|)(?P<priority>![0-9]|)__ *", s)
if m:
cWordLimitLeft = m.group('borders_and_case')[0]
cCaseMode = m.group('borders_and_case')[1]
cWordLimitRight = m.group('borders_and_case')[2]
sOption = m.group('option')[1:] if m.group('option') else False
if m.group('ruleid'):
sRuleId = m.group('ruleid')[1:-1]
if sRuleId in aRULESET:
print("# Error. Several rules have the same id: " + sRuleId)
exit()
aRULESET.add(sRuleId)
else:
nRULEWITHOUTNAME += 1
nPriority = dOptPriority.get(sOption, 4)
if m.group('priority'):
nPriority = int(m.group('priority')[1:])
s = s[m.end(0):]
else:
print("# Warning. No option defined at line: " + sLineId)
#### REGEX TRIGGER
i = s.find(" <<-")
if i == -1:
print("# Error: no condition at line " + sLineId)
return None
sRegex = s[:i].strip()
|
<
|
<
|
|
|
|
|
<
<
|
>
|
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
|
print(sRegex)
return 0
def createRule (s, nIdLine, sLang, bParagraph, dOptPriority):
"returns rule as list [option name, regex, bCaseInsensitive, identifier, list of actions]"
global dJSREGEXES
sLineId = "#" + str(nIdLine) + ("p" if bParagraph else "s")
sRuleId = sLineId
#### GRAPH CALL
if s.startswith("@@@@"):
if bParagraph:
print("Error. Graph call can be made only after the first pass (sentence by sentence)")
exit()
return ["@@@@", s[4:], sLineId]
#### OPTIONS
sOption = False # False or [a-z0-9]+ name
nPriority = 4 # Default is 4, value must be between 0 and 9
tGroups = None # code for groups positioning (only useful for JavaScript)
cCaseMode = 'i' # i: case insensitive, s: case sensitive, u: uppercasing allowed
cWordLimitLeft = '[' # [: word limit, <: no specific limit
cWordLimitRight = ']' # ]: word limit, >: no specific limit
m = re.match("^__(?P<borders_and_case>[\\[<]\\w[\\]>])(?P<option>/[a-zA-Z0-9]+|)(?P<ruleid>\\(\\w+\\))(?P<priority>![0-9]|)__ *", s)
if m:
cWordLimitLeft = m.group('borders_and_case')[0]
cCaseMode = m.group('borders_and_case')[1]
cWordLimitRight = m.group('borders_and_case')[2]
sOption = m.group('option')[1:] if m.group('option') else False
sRuleId = m.group('ruleid')[1:-1]
if sRuleId in aRULESET:
print("# Error. Several rules have the same id: " + sRuleId)
exit()
aRULESET.add(sRuleId)
nPriority = dOptPriority.get(sOption, 4)
if m.group('priority'):
nPriority = int(m.group('priority')[1:])
s = s[m.end(0):]
else:
print("# Warning. Rule wrongly shaped at line: " + sLineId)
exit()
#### REGEX TRIGGER
i = s.find(" <<-")
if i == -1:
print("# Error: no condition at line " + sLineId)
return None
sRegex = s[:i].strip()
|
︙ | | | ︙ | |
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
|
pass
elif bGraph:
lGraphRule.append([i, sLine])
# Regex rules
elif re.match("[ \t]*$", sLine):
# empty line
pass
elif sLine.startswith((" ", "\t")):
# rule (continuation)
lRuleLine[-1][1] += " " + sLine.strip()
else:
# new rule
lRuleLine.append([i, sLine.strip()])
# generating options files
|
|
|
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
|
pass
elif bGraph:
lGraphRule.append([i, sLine])
# Regex rules
elif re.match("[ \t]*$", sLine):
# empty line
pass
elif sLine.startswith(" "):
# rule (continuation)
lRuleLine[-1][1] += " " + sLine.strip()
else:
# new rule
lRuleLine.append([i, sLine.strip()])
# generating options files
|
︙ | | | ︙ | |
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
|
# JavaScript
sJSCallables += " {}: function ({})".format(sFuncName, sParams) + " {\n"
sJSCallables += " return " + jsconv.py2js(sReturn) + ";\n"
sJSCallables += " },\n"
displayStats(lParagraphRules, lSentenceRules)
print("Unnamed rules: " + str(nRULEWITHOUTNAME))
dVars = {
"fBuildTime": fBuildTime,
"callables": sPyCallables,
"callablesJS": sJSCallables,
"gctests": sGCTests,
"gctestsJS": sGCTestsJS,
"paragraph_rules": mergeRulesByOption(lParagraphRules),
|
<
<
|
622
623
624
625
626
627
628
629
630
631
632
633
634
635
|
# JavaScript
sJSCallables += " {}: function ({})".format(sFuncName, sParams) + " {\n"
sJSCallables += " return " + jsconv.py2js(sReturn) + ";\n"
sJSCallables += " },\n"
displayStats(lParagraphRules, lSentenceRules)
dVars = {
"fBuildTime": fBuildTime,
"callables": sPyCallables,
"callablesJS": sJSCallables,
"gctests": sGCTests,
"gctestsJS": sGCTestsJS,
"paragraph_rules": mergeRulesByOption(lParagraphRules),
|
︙ | | | ︙ | |