Grammalecte  Check-in [cb88d3d2dc]

Overview
Comment:[build] merge graph rules building and regex rules building
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | build | rg
Files: files | file ages | folders
SHA3-256: cb88d3d2dc6889b8c530b5059d7cd161257d02fda44835afa75cfb4b97ab22a6
User & Date: olr on 2018-06-13 05:37:08
Other Links: branch diff | manifest | tags
Context
2018-06-13
05:39
[fr] merge rules in one file check-in: 6fb93b7388 user: olr tags: fr, rg
05:37
[build] merge graph rules building and regex rules building check-in: cb88d3d2dc user: olr tags: build, rg
2018-06-12
19:55
[fr] conversion: regex -> graph rules check-in: 3d50852f45 user: olr tags: fr, rg
Changes

Modified compile_rules.py from [8c11a24e22] to [22bdc8ba86].

1
2
3
4
5
6

7
8
9
10
11
12
13

import re
import traceback
import json

import compile_rules_js_convert as jsconv



dDEF = {}
lFUNCTIONS = []

aRULESET = set()     # set of rule-ids to check if there is several rules with the same id
nRULEWITHOUTNAME = 0






>







1
2
3
4
5
6
7
8
9
10
11
12
13
14

import re
import traceback
import json

import compile_rules_js_convert as jsconv
import compile_rules_graph as crg


dDEF = {}
lFUNCTIONS = []

aRULESET = set()     # set of rule-ids to check if there is several rules with the same id
nRULEWITHOUTNAME = 0
439
440
441
442
443
444
445
446
447
448
449


450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491



















492
493
494
495
496
497
498
    except:
        print("Error. Rules file in project [" + sLang + "] not found.")
        exit()

    # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
    print("  parsing rules...")
    global dDEF
    lLine = []
    lRuleLine = []
    lTest = []
    lOpt = []



    for i, sLine in enumerate(lRules, 1):
        if sLine.startswith('#END'):
            # arbitrary end
            printBookmark(0, "BREAK BY #END", i)
            break
        elif sLine.startswith("#"):
            # comment
            pass
        elif sLine.startswith("@@@@"):
            # rules graph call
            m = re.match(r"@@@@GRAPH: *(\w+)@@@@", sLine.strip())
            if m:
                #lRuleLine.append(["@GRAPHLINK", m.group(1)])
                printBookmark(1, "@GRAPH: " + m.group(1), i)
                lRuleLine.append([i, "@@@@"+m.group(1)])
        elif sLine.startswith("DEF:"):
            # definition
            m = re.match("DEF: +([a-zA-Z_][a-zA-Z_0-9]*) +(.+)$", sLine.strip())
            if m:
                dDEF["{"+m.group(1)+"}"] = m.group(2)
            else:
                print("Error in definition: ", end="")
                print(sLine.strip())
        elif sLine.startswith("TEST:"):
            # test
            lTest.append("r{:<7}".format(i) + "  " + sLine[5:].strip())
        elif sLine.startswith("TODO:"):
            # todo
            pass
        elif sLine.startswith(("OPTGROUP/", "OPTSOFTWARE:", "OPT/", "OPTLANG/", "OPTDEFAULTUILANG:", "OPTLABEL/", "OPTPRIORITY/")):
            # options
            lOpt.append(sLine)
        elif re.match("[  \t]*$", sLine):
            # empty line
            pass
        elif sLine.startswith("!!"):
            # bookmark
            m = re.match("!!+", sLine)
            nExMk = len(m.group(0))
            if sLine[nExMk:].strip():
                printBookmark(nExMk-2, sLine[nExMk:].strip(), i)



















        elif sLine.startswith(("    ", "\t")):
            # rule (continuation)
            lRuleLine[-1][1] += " " + sLine.strip()
        else:
            # new rule
            lRuleLine.append([i, sLine.strip()])








<



>
>









<
<
<
<
<
<
<










|






<
<
<






>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







440
441
442
443
444
445
446

447
448
449
450
451
452
453
454
455
456
457
458
459
460







461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477



478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
    except:
        print("Error. Rules file in project [" + sLang + "] not found.")
        exit()

    # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
    print("  parsing rules...")
    global dDEF

    lRuleLine = []
    lTest = []
    lOpt = []
    bGraph = False
    lGraphRule = []

    for i, sLine in enumerate(lRules, 1):
        if sLine.startswith('#END'):
            # arbitrary end
            printBookmark(0, "BREAK BY #END", i)
            break
        elif sLine.startswith("#"):
            # comment
            pass







        elif sLine.startswith("DEF:"):
            # definition
            m = re.match("DEF: +([a-zA-Z_][a-zA-Z_0-9]*) +(.+)$", sLine.strip())
            if m:
                dDEF["{"+m.group(1)+"}"] = m.group(2)
            else:
                print("Error in definition: ", end="")
                print(sLine.strip())
        elif sLine.startswith("TEST:"):
            # test
            lTest.append("{:<8}".format(i) + "  " + sLine[5:].strip())
        elif sLine.startswith("TODO:"):
            # todo
            pass
        elif sLine.startswith(("OPTGROUP/", "OPTSOFTWARE:", "OPT/", "OPTLANG/", "OPTDEFAULTUILANG:", "OPTLABEL/", "OPTPRIORITY/")):
            # options
            lOpt.append(sLine)



        elif sLine.startswith("!!"):
            # bookmark
            m = re.match("!!+", sLine)
            nExMk = len(m.group(0))
            if sLine[nExMk:].strip():
                printBookmark(nExMk-2, sLine[nExMk:].strip(), i)
        # Graph rules
        elif sLine.startswith("@@@@GRAPH:"):
            # rules graph call
            m = re.match(r"@@@@GRAPH: *(\w+)", sLine.strip())
            if m:
                printBookmark(1, "@GRAPH: " + m.group(1), i)
                lRuleLine.append([i, "@@@@"+m.group(1)])
                bGraph = True
            lGraphRule.append([i, sLine])
            bGraph = True
        elif sLine.startswith("@@@@END_GRAPH"):
            lGraphRule.append([i, sLine])
            bGraph = False
        elif bGraph:
            lGraphRule.append([i, sLine])
        # Regex rules
        elif re.match("[  \t]*$", sLine):
            # empty line
            pass
        elif sLine.startswith(("    ", "\t")):
            # rule (continuation)
            lRuleLine[-1][1] += " " + sLine.strip()
        else:
            # new rule
            lRuleLine.append([i, sLine.strip()])

559
560
561
562
563
564
565
566
567
568
569
570
571
572
573




574

    displayStats(lParagraphRules, lSentenceRules)

    print("Unnamed rules: " + str(nRULEWITHOUTNAME))

    d = { "callables": sPyCallables,
          "callablesJS": sJSCallables,
          "regex_gctests": sGCTests,
          "regex_gctestsJS": sGCTestsJS,
          "paragraph_rules": mergeRulesByOption(lParagraphRules),
          "sentence_rules": mergeRulesByOption(lSentenceRules),
          "paragraph_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)),
          "sentence_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) }
    d.update(dOptions)





    return d







|
|






>
>
>
>

570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589

    displayStats(lParagraphRules, lSentenceRules)

    print("Unnamed rules: " + str(nRULEWITHOUTNAME))

    d = { "callables": sPyCallables,
          "callablesJS": sJSCallables,
          "gctests": sGCTests,
          "gctestsJS": sGCTestsJS,
          "paragraph_rules": mergeRulesByOption(lParagraphRules),
          "sentence_rules": mergeRulesByOption(lSentenceRules),
          "paragraph_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)),
          "sentence_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) }
    d.update(dOptions)

    # compile graph rules
    d2 = crg.make(lGraphRule, sLang, bJavaScript)
    d.update(d2)

    return d

Modified compile_rules_graph.py from [a4f6103185] to [e76a1b508a].

1
2
3
4
5

6
7
8
9
10
11
12
13
14
15
16
# Create a Direct Acyclic Rule Graph (DARG)

import re
import traceback
import json

import darg


dDEF = {}
dACTIONS = {}
dFUNCTIONS = {}


def prepareFunction (s, bTokenValue=False):
    s = s.replace("__also__", "bCondMemo")
    s = s.replace("__else__", "not bCondMemo")





>



<







1
2
3
4
5
6
7
8
9

10
11
12
13
14
15
16
# Create a Direct Acyclic Rule Graph (DARG)

import re
import traceback
import json

import darg



dACTIONS = {}
dFUNCTIONS = {}


def prepareFunction (s, bTokenValue=False):
    s = s.replace("__also__", "bCondMemo")
    s = s.replace("__else__", "not bCondMemo")
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
        ## no action, break loop if condition is False
        return [sOption, sCondition, cAction, ""]
    else:
        print("# Unknown action at line " + sIdAction)
        return None


def printBookmark (nLevel, sComment, nLine):
    print("  {:>6}:  {}".format(nLine, "  " * nLevel + sComment))


def make (spLang, sLang, bJavaScript):
    "compile rules, returns a dictionary of values"
    # for clarity purpose, don’t create any file here

    print("> read graph rules file...")
    try:
        lRules = open(spLang + "/rules_graph.grx", 'r', encoding="utf-8").readlines()
    except:
        print("Error. Rules file in project [" + sLang + "] not found.")
        exit()

    # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
    print("  parsing rules...")
    global dDEF
    lTest = []
    lTokenLine = []
    sActions = ""
    nPriority = 4
    dAllGraph = {}
    sGraphName = ""

    for i, sLine in enumerate(lRules, 1):
        sLine = sLine.rstrip()
        if "\t" in sLine:
            # tabulation not allowed
            print("Error. Tabulation at line: ", i)
            exit()
        if sLine.startswith('#END'):
            # arbitrary end
            printBookmark(0, "BREAK BY #END", i)
            break
        elif sLine.startswith("#"):
            # comments
            pass
        elif sLine.startswith("GRAPH_NAME: "):
            # Graph name
            m = re.match("GRAPH_NAME: +([a-zA-Z_][a-zA-Z_0-9]*)+", sLine.strip())
            if m:
                sGraphName = m.group(1)
                if sGraphName in dAllGraph:
                    print("Error. Group name " + sGraphName + " already exists.")
                    exit()
                dAllGraph[sGraphName] = []
            else:
                print("Error. Graph name not found in", sLine.strip())
                exit()
        elif sLine.startswith("DEF:"):
            # definition
            m = re.match("DEF: +([a-zA-Z_][a-zA-Z_0-9]*) +(.+)$", sLine.strip())
            if m:
                dDEF["{"+m.group(1)+"}"] = m.group(2)
            else:
                print("Error in definition: ", end="")
                print(sLine.strip())
        elif sLine.startswith("TEST:"):
            # test
            lTest.append("g{:<7}".format(i) + "  " + sLine[5:].strip())
        elif sLine.startswith("TODO:"):
            # todo
            pass
        elif sLine.startswith("!!"):
            # bookmarks
            m = re.search("^!!+", sLine)
            nExMk = len(m.group(0))
            if sLine[nExMk:].strip():
                printBookmark(nExMk-2, sLine[nExMk:].strip(), i)
        elif sLine.startswith("__") and sLine.endswith("__"):
            # new rule group
            m = re.match("__(\\w+)(!\\d|)__", sLine)
            if m:
                sRuleName = m.group(1)
                nPriority = int(m.group(2)[1:]) if m.group(2)  else 4
            else:







<
<
<
<
|



<
<
<
<
<
<
<


<
<






|





<
<
<
<
|
<
<
<
|
|







|

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







221
222
223
224
225
226
227




228
229
230
231







232
233


234
235
236
237
238
239
240
241
242
243
244
245




246



247
248
249
250
251
252
253
254
255
256
257




















258
259
260
261
262
263
264
        ## no action, break loop if condition is False
        return [sOption, sCondition, cAction, ""]
    else:
        print("# Unknown action at line " + sIdAction)
        return None






def make (lRule, sLang, bJavaScript):
    "compile rules, returns a dictionary of values"
    # for clarity purpose, don’t create any file here








    # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
    print("  parsing rules...")


    lTokenLine = []
    sActions = ""
    nPriority = 4
    dAllGraph = {}
    sGraphName = ""

    for i, sLine in lRule:
        sLine = sLine.rstrip()
        if "\t" in sLine:
            # tabulation not allowed
            print("Error. Tabulation at line: ", i)
            exit()




        elif sLine.startswith("@@@@GRAPH: "):



            # rules graph call
            m = re.match(r"@@@@GRAPH: *(\w+)", sLine.strip())
            if m:
                sGraphName = m.group(1)
                if sGraphName in dAllGraph:
                    print("Error. Group name " + sGraphName + " already exists.")
                    exit()
                dAllGraph[sGraphName] = []
            else:
                print("Error. Graph name not found at line", i)
                exit()




















        elif sLine.startswith("__") and sLine.endswith("__"):
            # new rule group
            m = re.match("__(\\w+)(!\\d|)__", sLine)
            if m:
                sRuleName = m.group(1)
                nPriority = int(m.group(2)[1:]) if m.group(2)  else 4
            else:
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
                exit()
            for j, sTokenLine in lTokenLine:
                dAllGraph[sGraphName].append((j, sRuleName, sTokenLine, sActions, nPriority))
            lTokenLine.clear()
            sActions = ""
            sRuleName = ""
            nPriority = 4
        elif sLine.startswith(("        <<-", "    <<-")):
            # actions
            sActions += " " + sLine.strip()
        elif sLine.startswith(("    ")):
            lTokenLine.append([i, sLine.strip()])
        else:
            print("Unknown line:")
            print(sLine)

    # tests
    print("  list tests...")
    sGCTests = "\n".join(lTest)
    sGCTestsJS = '{ "aData2": ' + json.dumps(lTest, ensure_ascii=False) + " }\n"

    # processing rules
    print("  preparing rules...")
    for sGraphName, lRuleLine in dAllGraph.items():
        lPreparedRule = []
        for i, sRuleGroup, sTokenLine, sActions, nPriority in lRuleLine:
            for lRule in createRule(i, sRuleGroup, sTokenLine, sActions, nPriority):
                lPreparedRule.append(lRule)







|








<
<
<
<
<







276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291





292
293
294
295
296
297
298
                exit()
            for j, sTokenLine in lTokenLine:
                dAllGraph[sGraphName].append((j, sRuleName, sTokenLine, sActions, nPriority))
            lTokenLine.clear()
            sActions = ""
            sRuleName = ""
            nPriority = 4
        elif re.search("    +<<- ", sLine):
            # actions
            sActions += " " + sLine.strip()
        elif sLine.startswith(("    ")):
            lTokenLine.append([i, sLine.strip()])
        else:
            print("Unknown line:")
            print(sLine)






    # processing rules
    print("  preparing rules...")
    for sGraphName, lRuleLine in dAllGraph.items():
        lPreparedRule = []
        for i, sRuleGroup, sTokenLine, sActions, nPriority in lRuleLine:
            for lRule in createRule(i, sRuleGroup, sTokenLine, sActions, nPriority):
                lPreparedRule.append(lRule)
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390

    for sActionName, aAction in dACTIONS.items():
        print(sActionName, aAction)

    # Result
    d = {
        "graph_callables": sPyCallables,
        "graph_gctests": sGCTests,
        "rules_graphs": dAllGraph,
        "rules_actions": dACTIONS
    }

    return d









<



<

<
<
331
332
333
334
335
336
337

338
339
340

341



    for sActionName, aAction in dACTIONS.items():
        print(sActionName, aAction)

    # Result
    d = {
        "graph_callables": sPyCallables,

        "rules_graphs": dAllGraph,
        "rules_actions": dACTIONS
    }

    return d


Modified make.py from [5704755499] to [6b14360ed5].

15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import json
import platform

from distutils import dir_util, file_util

import dialog_bundled
import compile_rules
import compile_rules_graph
import helpers
import lex_build


sWarningMessage = "The content of this folder is generated by code and replaced at each build.\n"









<







15
16
17
18
19
20
21

22
23
24
25
26
27
28
import json
import platform

from distutils import dir_util, file_util

import dialog_bundled
import compile_rules

import helpers
import lex_build


sWarningMessage = "The content of this folder is generated by code and replaced at each build.\n"


190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
    spLang = "gc_lang/" + sLang

    dVars = xConfig._sections['args']
    dVars['locales'] = dVars["locales"].replace("_", "-")
    dVars['loc'] = str(dict([ [s, [s[0:2], s[3:5], ""]] for s in dVars["locales"].split(" ") ]))

    ## COMPILE RULES
    dResultRegex = compile_rules.make(spLang, dVars['lang'], bJavaScript)
    dVars.update(dResultRegex)

    dResultGraph = compile_rules_graph.make(spLang, dVars['lang'], bJavaScript)
    dVars.update(dResultGraph)

    ## READ GRAMMAR CHECKER PLUGINS
    print("PYTHON:")
    print("+ Plugins: ", end="")
    sCodePlugins = ""
    for sf in os.listdir(spLang+"/modules"):
        if re.match(r"gce_\w+[.]py$", sf):







|
|
<
<
<







189
190
191
192
193
194
195
196
197



198
199
200
201
202
203
204
    spLang = "gc_lang/" + sLang

    dVars = xConfig._sections['args']
    dVars['locales'] = dVars["locales"].replace("_", "-")
    dVars['loc'] = str(dict([ [s, [s[0:2], s[3:5], ""]] for s in dVars["locales"].split(" ") ]))

    ## COMPILE RULES
    dResult = compile_rules.make(spLang, dVars['lang'], bJavaScript)
    dVars.update(dResult)




    ## READ GRAMMAR CHECKER PLUGINS
    print("PYTHON:")
    print("+ Plugins: ", end="")
    sCodePlugins = ""
    for sf in os.listdir(spLang+"/modules"):
        if re.match(r"gce_\w+[.]py$", sf):
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
            helpers.copyAndFileTemplate(spLang+"/modules/"+sf, spLangPack+"/"+sf, dVars)
            print(sf, end=", ")
    print()

    # TEST FILES
    with open("grammalecte/"+sLang+"/gc_test.txt", "w", encoding="utf-8", newline="\n") as hDstPy:
        hDstPy.write("# TESTS FOR LANG [" + sLang + "]\n\n")
        hDstPy.write("# REGEX RULES\n\n")
        hDstPy.write(dVars['regex_gctests'])
        hDstPy.write("\n\n\n# GRAPH RULES\n\n")
        hDstPy.write(dVars['graph_gctests'])
        hDstPy.write("\n")

    createOXT(spLang, dVars, xConfig._sections['oxt'], spLangPack, bInstallOXT)

    createServerOptions(sLang, dVars)
    createPackageZip(sLang, dVars, spLangPack)








<
<
<
|







225
226
227
228
229
230
231



232
233
234
235
236
237
238
239
            helpers.copyAndFileTemplate(spLang+"/modules/"+sf, spLangPack+"/"+sf, dVars)
            print(sf, end=", ")
    print()

    # TEST FILES
    with open("grammalecte/"+sLang+"/gc_test.txt", "w", encoding="utf-8", newline="\n") as hDstPy:
        hDstPy.write("# TESTS FOR LANG [" + sLang + "]\n\n")



        hDstPy.write(dVars['gctests'])
        hDstPy.write("\n")

    createOXT(spLang, dVars, xConfig._sections['oxt'], spLangPack, bInstallOXT)

    createServerOptions(sLang, dVars)
    createPackageZip(sLang, dVars, spLangPack)