Grammalecte  Check-in [a835df567d]

Overview
Comment:[build] rename global vars
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | build
Files: files | file ages | folders
SHA3-256: a835df567d36bd6d8260ae36db48b7e032aa0efa7128ba797df7fdb6bb0878b1
User & Date: olr on 2017-05-16 23:46:18
Other Links: manifest | tags
Context
2017-05-16
23:52
[build] count unnamed rules check-in: e6a7cd50a7 user: olr tags: trunk, build
23:46
[build] rename global vars check-in: a835df567d user: olr tags: trunk, build
22:30
[fr] tabulations check-in: b650eee6b7 user: olr tags: trunk, fr
Changes

Modified compile_rules.py from [e2df43feb0] to [e1c06fc5a9].

1
2
3
4
5
6
7
8
9
10
11


12
13

14
15

16
17
18


19
20
21
22
23
24
25
1
2
3
4
5
6
7
8
9


10
11
12

13
14

15
16


17
18
19
20
21
22
23
24
25









-
-
+
+

-
+

-
+

-
-
+
+








import re
import sys
import traceback
import copy
import json
from distutils import file_util


DEF = {}
FUNCTIONS = []
dDEF = {}
lFUNCTIONS = []

RULESET = set()     # set of rule-ids to check if there is several rules with the same id
aRULESET = set()     # set of rule-ids to check if there is several rules with the same id

JSREGEXES = {}
dJSREGEXES = {}

WORDLIMITLEFT  = r"(?<![\w.,–-])"   # r"(?<![-.,—])\b"  seems slower
WORDLIMITRIGHT = r"(?![\w–-])"      # r"\b(?!-—)"       seems slower
sWORDLIMITLEFT  = r"(?<![\w.,–-])"   # r"(?<![-.,—])\b"  seems slower
sWORDLIMITRIGHT = r"(?![\w–-])"      # r"\b(?!-—)"       seems slower


def prepareFunction (s):
    s = s.replace("__also__", "bCondMemo")
    s = s.replace("__else__", "not bCondMemo")
    s = re.sub(r"(select|exclude)[(][\\](\d+)", '\\1(dDA, m.start(\\2), m.group(\\2)', s)
    s = re.sub(r"define[(][\\](\d+)", 'define(dDA, m.start(\\1)', s)
160
161
162
163
164
165
166
167

168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186

187
188
189

190
191
192
193
194
195
196
160
161
162
163
164
165
166

167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185

186
187
188

189
190
191
192
193
194
195
196







-
+


















-
+


-
+







        traceback.print_exc()
        print(sRegex)
    return 0


def createRule (s, nIdLine, sLang, bParagraph, dOptPriority):
    "returns rule as list [option name, regex, bCaseInsensitive, identifier, list of actions]"
    global JSREGEXES
    global dJSREGEXES

    #### OPTIONS
    sLineId = str(nIdLine) + ("p" if bParagraph else "s")
    sRuleId = sLineId
    sOption = False         # False or [a-z0-9]+ name
    nPriority = 4           # Default is 4, value must be between 0 and 9
    tGroups = None          # code for groups positioning (only useful for JavaScript)
    cCaseMode = 'i'         # i: case insensitive,  s: case sensitive,  u: uppercasing allowed
    cWordLimitLeft = '['    # [: word limit, <: no specific limit
    cWordLimitRight = ']'   # ]: word limit, >: no specific limit
    m = re.match("^__(?P<borders_and_case>[[<]\\w[]>])(?P<option>/[a-zA-Z0-9]+|)(?P<ruleid>\\(\\w+\\)|)(?P<priority>![0-9]|)__ *", s)
    if m:
        cWordLimitLeft = m.group('borders_and_case')[0]
        cCaseMode = m.group('borders_and_case')[1]
        cWordLimitRight = m.group('borders_and_case')[2]
        sOption = m.group('option')[1:]  if m.group('option')  else False
        if m.group('ruleid'):
            sRuleId =  m.group('ruleid')[1:-1]
            if sRuleId in RULESET:
            if sRuleId in aRULESET:
                print("# Error. Several rules have the same id: " + sRuleId)
                exit()
            RULESET.add(sRuleId)
            aRULESET.add(sRuleId)
        nPriority = dOptPriority.get(sOption, 4)
        if m.group('priority'):
            nPriority = int(m.group('priority')[1:])
        s = s[m.end(0):]
    else:
        print("# Warning. No option defined at line: " + sLineId)

206
207
208
209
210
211
212
213

214
215
216
217
218
219
220
221
222
223
224

225
226
227
228
229
230
231
232
233
234
235
236
237
238

239
240

241
242
243
244
245
246
247
206
207
208
209
210
211
212

213
214
215
216
217
218
219
220
221
222
223

224
225
226
227
228
229
230
231
232
233
234
235
236
237

238
239

240
241
242
243
244
245
246
247







-
+










-
+













-
+

-
+







    m = re.search("@@\\S+", sRegex)
    if m:
        tGroups = groupsPositioningCodeToList(sRegex[m.start()+2:])
        sRegex = sRegex[:m.start()].strip()
    # JS regex
    m = re.search("<js>.+</js>i?", sRegex)
    if m:
        JSREGEXES[sLineId] = m.group(0)
        dJSREGEXES[sLineId] = m.group(0)
        sRegex = sRegex[:m.start()].strip()
    if "<js>" in sRegex or "</js>" in sRegex:
        print("# Error: JavaScript regex not delimited at line " + sLineId)
        return None

    # quotes ?
    if sRegex.startswith('"') and sRegex.endswith('"'):
        sRegex = sRegex[1:-1]

    ## definitions
    for sDef, sRepl in DEF.items():
    for sDef, sRepl in dDEF.items():
        sRegex = sRegex.replace(sDef, sRepl)

    ## count number of groups (must be done before modifying the regex)
    nGroup = countGroupInRegex(sRegex)
    if nGroup > 0:
        if not tGroups:
            print("# Warning: groups positioning code for JavaScript should be defined at line " + sLineId)
        else:
            if nGroup != len(tGroups):
                print("# Error: groups positioning code irrelevant at line " + sLineId)

    ## word limit
    if cWordLimitLeft == '[' and not sRegex.startswith(("^", '’', "'", ",")):
        sRegex = WORDLIMITLEFT + sRegex
        sRegex = sWORDLIMITLEFT + sRegex
    if cWordLimitRight == ']' and not sRegex.endswith(("$", '’', "'", ",")):
        sRegex = sRegex + WORDLIMITRIGHT
        sRegex = sRegex + sWORDLIMITRIGHT

    ## casing mode
    if cCaseMode == "i":
        bCaseInsensitive = True
        if not sRegex.startswith("(?i)"):
            sRegex = "(?i)" + sRegex
    elif cCaseMode == "s":
278
279
280
281
282
283
284
285

286
287
288
289
290
291
292
293
294
295
296

297
298
299
300
301
302
303
278
279
280
281
282
283
284

285
286
287
288
289
290
291
292
293
294
295

296
297
298
299
300
301
302
303







-
+










-
+







        return None

    return [sOption, sRegex, bCaseInsensitive, sLineId, sRuleId, nPriority, lActions, tGroups]


def createAction (sIdAction, sAction, nGroup):
    "returns an action to perform as a tuple (condition, action type, action[, iGroup [, message, URL ]])"
    global FUNCTIONS
    global lFUNCTIONS

    m = re.search(r"([-~=>])(\d*|)>>", sAction)
    if not m:
        print("# No action at line " + sIdAction)
        return None

    #### CONDITION
    sCondition = sAction[:m.start()].strip()
    if sCondition:
        sCondition = prepareFunction(sCondition)
        FUNCTIONS.append(("c_"+sIdAction, sCondition))
        lFUNCTIONS.append(("c_"+sIdAction, sCondition))
        for x in re.finditer("[.](?:group|start|end)[(](\d+)[)]", sCondition):
            if int(x.group(1)) > nGroup:
                print("# Error in groups in condition at line " + sIdAction + " ("+str(nGroup)+" groups only)")
        if ".match" in sCondition:
            print("# Error. JS compatibility. Don't use .match() in condition, use .search()")
        sCondition = "c_"+sIdAction
    else:
319
320
321
322
323
324
325
326

327
328
329
330
331
332
333
319
320
321
322
323
324
325

326
327
328
329
330
331
332
333







-
+







        sURL = ""
        mURL = re.search("[|] *(https?://.*)", sMsg)
        if mURL:
            sURL = mURL.group(1).strip()
            sMsg = sMsg[:mURL.start(0)].strip()
        if sMsg[0:1] == "=":
            sMsg = prepareFunction(sMsg[1:])
            FUNCTIONS.append(("m_"+sIdAction, sMsg))
            lFUNCTIONS.append(("m_"+sIdAction, sMsg))
            for x in re.finditer("group[(](\d+)[)]", sMsg):
                if int(x.group(1)) > nGroup:
                    print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)")
            sMsg = "=m_"+sIdAction
        else:
            for x in re.finditer(r"\\(\d+)", sMsg):
                if int(x.group(1)) > nGroup:
351
352
353
354
355
356
357
358

359
360
361
362
363
364
365
366
367
368
369
370

371
372
373
374
375
376
377
378
379
380
381

382
383
384
385
386
387
388
351
352
353
354
355
356
357

358
359
360
361
362
363
364
365
366
367
368
369

370
371
372
373
374
375
376
377
378
379
380

381
382
383
384
385
386
387
388







-
+











-
+










-
+







            print("# Error in action at line " + sIdAction + ":  This action looks like code. Line should begin with =")

    if cAction == "-":
        ## error detected --> suggestion
        if not sAction:
            print("# Error in action at line " + sIdAction + ":  This action is empty.")
        if sAction[0:1] == "=":
            FUNCTIONS.append(("s_"+sIdAction, sAction[1:]))
            lFUNCTIONS.append(("s_"+sIdAction, sAction[1:]))
            sAction = "=s_"+sIdAction
        elif sAction.startswith('"') and sAction.endswith('"'):
            sAction = sAction[1:-1]
        if not sMsg:
            print("# Error in action at line " + sIdAction + ":  the message is empty.")
        return [sCondition, cAction, sAction, iGroup, sMsg, sURL]
    elif cAction == "~":
        ## text processor
        if not sAction:
            print("# Error in action at line " + sIdAction + ":  This action is empty.")
        if sAction[0:1] == "=":
            FUNCTIONS.append(("p_"+sIdAction, sAction[1:]))
            lFUNCTIONS.append(("p_"+sIdAction, sAction[1:]))
            sAction = "=p_"+sIdAction
        elif sAction.startswith('"') and sAction.endswith('"'):
            sAction = sAction[1:-1]
        return [sCondition, cAction, sAction, iGroup]
    elif cAction == "=":
        ## disambiguator
        if sAction[0:1] == "=":
            sAction = sAction[1:]
        if not sAction:
            print("# Error in action at line " + sIdAction + ":  This action is empty.")
        FUNCTIONS.append(("d_"+sIdAction, sAction))
        lFUNCTIONS.append(("d_"+sIdAction, sAction))
        sAction = "d_"+sIdAction
        return [sCondition, cAction, sAction]
    elif cAction == ">":
        ## no action, break loop if condition is False
        return [sCondition, cAction, ""]
    else:
        print("# Unknown action at line " + sIdAction)
402
403
404
405
406
407
408
409
410


411
412
413
414
415
416
417
402
403
404
405
406
407
408


409
410
411
412
413
414
415
416
417







-
-
+
+







    #   Ā-ʯ     0100-02AF   (mixed)
    #   -> a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ
    bCaseInsensitive = False
    if "(?i)" in sRegex:
        sRegex = sRegex.replace("(?i)", "")
        bCaseInsensitive = True
    lNegLookBeforeRegex = []
    if WORDLIMITLEFT in sRegex:
        sRegex = sRegex.replace(WORDLIMITLEFT, "")
    if sWORDLIMITLEFT in sRegex:
        sRegex = sRegex.replace(sWORDLIMITLEFT, "")
        lNegLookBeforeRegex = ["[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ.,–-]$"]
    sRegex = sRegex.replace("[\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ")
    sRegex = sRegex.replace("\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ]")
    sRegex = sRegex.replace("[.]", r"\.")
    if not sRegex.startswith("<js>"):
        sRegex = sRegex.replace("/", r"\/")
    m = re.search(r"\(\?<!([^()]+)\)", sRegex)  # Negative lookbefore assertion should always be at the beginning of regex
436
437
438
439
440
441
442
443

444
445
446
447
448
449
450
436
437
438
439
440
441
442

443
444
445
446
447
448
449
450







-
+







    lRuleJS = copy.deepcopy(lRule)
    del lRule[-1] # tGroups positioning codes are useless for Python
    # error messages
    for aAction in lRuleJS[6]:
        if aAction[1] == "-":
            aAction[4] = aAction[4].replace("« ", "«&nbsp;").replace(" »", "&nbsp;»")
    # js regexes
    lRuleJS[1], lNegLookBehindRegex = regex2js( JSREGEXES.get(lRuleJS[3], lRuleJS[1]) )
    lRuleJS[1], lNegLookBehindRegex = regex2js( dJSREGEXES.get(lRuleJS[3], lRuleJS[1]) )
    lRuleJS.append(lNegLookBehindRegex)
    return lRuleJS


def writeRulesToJSArray (lRules):
    sArray = "[\n"
    for sOption, aRuleGroup in lRules:
542
543
544
545
546
547
548
549

550
551
552
553
554
555
556
557
558
559
560
561
562

563
564
565
566
567
568
569
542
543
544
545
546
547
548

549
550
551
552
553
554
555
556
557
558
559
560
561

562
563
564
565
566
567
568
569







-
+












-
+








def make (lRules, sLang, bJavaScript):
    "compile rules, returns a dictionary of values"
    # for clarity purpose, don’t create any file here

    # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
    print("  parsing rules...")
    global DEF
    global dDEF
    lLine = []
    lRuleLine = []
    lTest = []
    lOpt = []
    for i, sLine in enumerate(lRules, 1):
        if sLine.startswith('#END'):
            break
        elif sLine.startswith("#"):
            pass
        elif sLine.startswith("DEF:"):
            m = re.match("DEF: +([a-zA-Z_][a-zA-Z_0-9]*) +(.+)$", sLine.strip())
            if m:
                DEF["{"+m.group(1)+"}"] = m.group(2)
                dDEF["{"+m.group(1)+"}"] = m.group(2)
            else:
                print("Error in definition: ", end="")
                print(sLine.strip())
        elif sLine.startswith("TEST:"):
            lTest.append("{:<8}".format(i) + "  " + sLine[5:].strip())
        elif sLine.startswith("TODO:"):
            pass
611
612
613
614
615
616
617
618

619
620
621
622
623
624
625
611
612
613
614
615
616
617

618
619
620
621
622
623
624
625







-
+







                        lSentenceRules.append(aRule)
                        lSentenceRulesJS.append(pyRuleToJS(aRule))

    # creating file with all functions callable by rules
    print("  creating callables...")
    sPyCallables = "# generated code, do not edit\n"
    sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n"
    for sFuncName, sReturn in FUNCTIONS:
    for sFuncName, sReturn in lFUNCTIONS:
        cType = sFuncName[0:1]
        if cType == "c": # condition
            sParams = "s, sx, m, dDA, sCountry, bCondMemo"
        elif cType == "m": # message
            sParams = "s, m"
        elif cType == "s": # suggestion
            sParams = "s, m"