Grammalecte  Diff

Differences From Artifact [94acd166e0]:

To Artifact [2d30706580]:


1
2
3
4
5
6
7
8
9
10
11
12


13
14
15
16
17
18
19
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21












+
+








import re
import sys
import traceback
import copy
import json
from distutils import file_util


DEF = {}
FUNCTIONS = []

RULESET = set()     # set of rule-ids to check if there is several rules with the same id

JSREGEXES = {}

WORDLIMITLEFT  = r"(?<![\w.,–-])"   # r"(?<![-.,—])\b"  seems slower
WORDLIMITRIGHT = r"(?![\w–-])"      # r"\b(?!-—)"       seems slower


def prepareFunction (s):
177
178
179
180
181
182
183



184
185
186
187
188
189

190
191
192
193
194
195
196
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193

194
195
196
197
198
199
200
201







+
+
+





-
+







    if m:
        cWordLimitLeft = m.group(1)[0]
        cCaseMode = m.group(1)[1]
        cWordLimitRight = m.group(1)[2]
        sOption = m.group(2)[1:]  if m.group(2)  else False
        if m.group(3):
            sRuleId =  m.group(3)[1:-1]
            if sRuleId in RULESET:
                print("# Warning. Several rules have the same id: " + sRuleId)
            RULESET.add(sRuleId)
        nPriority = dOptPriority.get(sOption, 4)
        if m.group(4):
            nPriority = int(m.group(4)[1:])
        s = s[m.end(0):]
    else:
        print("Warning. No option defined at line: " + sLineId)
        print("# Warning. No option defined at line: " + sLineId)

    #### REGEX TRIGGER
    i = s.find(" <<-")
    if i == -1:
        print("# Error: no condition at line " + sLineId)
        return None
    sRegex = s[:i].strip()
218
219
220
221
222
223
224
225

226
227
228

229
230
231
232
233
234
235
223
224
225
226
227
228
229

230
231
232

233
234
235
236
237
238
239
240







-
+


-
+







    for sDef, sRepl in DEF.items():
        sRegex = sRegex.replace(sDef, sRepl)

    ## count number of groups (must be done before modifying the regex)
    nGroup = countGroupInRegex(sRegex)
    if nGroup > 0:
        if not tGroups:
            print("# warning: groups positioning code for JavaScript should be defined at line " + sLineId)
            print("# Warning: groups positioning code for JavaScript should be defined at line " + sLineId)
        else:
            if nGroup != len(tGroups):
                print("# error: groups positioning code irrelevant at line " + sLineId)
                print("# Error: groups positioning code irrelevant at line " + sLineId)

    ## word limit
    if cWordLimitLeft == '[' and not sRegex.startswith(("^", '’', "'", ",")):
        sRegex = WORDLIMITLEFT + sRegex
    if cWordLimitRight == ']' and not sRegex.endswith(("$", '’', "'", ",")):
        sRegex = sRegex + WORDLIMITRIGHT

316
317
318
319
320
321
322
323

324
325
326
327
328

329
330

331
332
333
334

335
336
337
338
339

340
341
342
343

344
345

346
347
348
349
350

351
352
353
354
355
356
357

358
359
360
361
362

363
364
365
366
367
368
369
370
371
372
373
374

375
376
377
378
379
380
381
321
322
323
324
325
326
327

328
329
330
331
332

333
334

335
336
337
338

339
340
341
342
343

344
345
346
347

348
349

350
351
352
353
354

355
356
357
358
359
360
361

362
363
364
365
366

367
368
369
370
371
372
373
374
375
376
377
378

379
380
381
382
383
384
385
386







-
+




-
+

-
+



-
+




-
+



-
+

-
+




-
+






-
+




-
+











-
+







            sURL = mURL.group(1).strip()
            sMsg = sMsg[:mURL.start(0)].strip()
        if sMsg[0:1] == "=":
            sMsg = prepareFunction(sMsg[1:])
            FUNCTIONS.append(("m"+sIdAction, sMsg))
            for x in re.finditer("group[(](\d+)[)]", sMsg):
                if int(x.group(1)) > nGroup:
                    print("# error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)")
                    print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)")
            sMsg = "=m"+sIdAction
        else:
            for x in re.finditer(r"\\(\d+)", sMsg):
                if int(x.group(1)) > nGroup:
                    print("# error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)")
                    print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)")
            if re.search("[.]\\w+[(]", sMsg):
                print("# error in message at line " + sIdAction + ":  This message looks like code. Line should begin with =")
                print("# Error in message at line " + sIdAction + ":  This message looks like code. Line should begin with =")
            
    if sAction[0:1] == "=" or cAction == "=":
        if "define" in sAction and not re.search(r"define\(\\\d+ *, *\[.*\] *\)", sAction):
            print("# error in action at line " + sIdAction + ": second argument for define must be a list of strings")
            print("# Error in action at line " + sIdAction + ": second argument for define must be a list of strings")
        sAction = prepareFunction(sAction)
        sAction = sAction.replace("m.group(i[4])", "m.group("+str(iGroup)+")")
        for x in re.finditer("group[(](\d+)[)]", sAction):
            if int(x.group(1)) > nGroup:
                print("# error in groups in replacement at line " + sIdAction + " ("+str(nGroup)+" groups only)")
                print("# Error in groups in replacement at line " + sIdAction + " ("+str(nGroup)+" groups only)")
    else:
        for x in re.finditer(r"\\(\d+)", sAction):
            if int(x.group(1)) > nGroup:
                print("# error in groups in replacement at line " + sIdAction + " ("+str(nGroup)+" groups only)")
                print("# Error in groups in replacement at line " + sIdAction + " ("+str(nGroup)+" groups only)")
        if re.search("[.]\\w+[(]", sAction):
            print("# error in action at line " + sIdAction + ":  This action looks like code. Line should begin with =")
            print("# Error in action at line " + sIdAction + ":  This action looks like code. Line should begin with =")

    if cAction == "-":
        ## error detected
        if not sAction:
            print("# error in action at line " + sIdAction + ":  This action is empty.")
            print("# Error in action at line " + sIdAction + ":  This action is empty.")
        if sAction[0:1] == "=":
            FUNCTIONS.append(("s"+sIdAction, sAction[1:]))
            sAction = "=s"+sIdAction
        elif sAction.startswith('"') and sAction.endswith('"'):
            sAction = sAction[1:-1]
        if not sMsg:
            print("# error in action at line " + sIdAction + ":  the message is empty.")
            print("# Error in action at line " + sIdAction + ":  the message is empty.")
        return [sCondition, cAction, sAction, iGroup, sMsg, sURL]
    elif cAction == "~":
        ## text preprocessor
        if not sAction:
            print("# error in action at line " + sIdAction + ":  This action is empty.")
            print("# Error in action at line " + sIdAction + ":  This action is empty.")
        if sAction[0:1] == "=":
            FUNCTIONS.append(("p"+sIdAction, sAction[1:]))
            sAction = "=p"+sIdAction
        elif sAction.startswith('"') and sAction.endswith('"'):
            sAction = sAction[1:-1]
        return [sCondition, cAction, sAction, iGroup]
    elif cAction == "=":
        ## disambiguator
        if sAction[0:1] == "=":
            sAction = sAction[1:]
        if not sAction:
            print("# error in action at line " + sIdAction + ":  This action is empty.")
            print("# Error in action at line " + sIdAction + ":  This action is empty.")
        FUNCTIONS.append(("d"+sIdAction, sAction))
        sAction = "d"+sIdAction
        return [sCondition, cAction, sAction]
    elif cAction == ">":
        ## no action, break loop if condition is False
        return [sCondition, cAction, ""]
    else: