Grammalecte  Check-in [7e92a17d42]

Overview
Comment:[build][core] named graphs
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | build | rg
Files: files | file ages | folders
SHA3-256: 7e92a17d420719345291388e13ab89f17b39abb9341f9c123f161092b5e9ce76
User & Date: olr on 2018-06-11 09:26:03
Other Links: branch diff | manifest | tags
Context
2018-06-11
13:21
[build][core] graph calls within regex rules check-in: eeef098bd9 user: olr tags: core, build, rg
09:26
[build][core] named graphs check-in: 7e92a17d42 user: olr tags: core, build, rg
09:11
[core] gc engine: small code clarification check-in: 70e6105d8a user: olr tags: core, rg
Changes

Modified compile_rules_graph.py from [37d848c323] to [f86ee887b1].

10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
10
11
12
13
14
15
16








17
18
19
20
21
22
23
24
25
26



27
28
29
30
31
32
33







-
-
-
-
-
-
-
-










-
-
-







dACTIONS = {}
lFUNCTIONS = []


def prepareFunction (s):
    s = s.replace("__also__", "bCondMemo")
    s = s.replace("__else__", "not bCondMemo")
    s = re.sub(r"isStart *\(\)", 'before(["<START>", ","])', s)
    s = re.sub(r"isRealStart *\(\)", 'before(["<START>"])', s)
    s = re.sub(r"isStart0 *\(\)", 'before0(["<START>", ","])', s)
    s = re.sub(r"isRealStart0 *\(\)", 'before0(["<START>"])', s)
    s = re.sub(r"isEnd *\(\)", 'after(["<END>", ","])', s)
    s = re.sub(r"isRealEnd *\(\)", 'after(["<END>"])', s)
    s = re.sub(r"isEnd0 *\(\)", 'after0(["<END>", ","])', s)
    s = re.sub(r"isRealEnd0 *\(\)", 'after0(["<END>"])', s)
    s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(morph|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"token\(\s*(\d)", 'nextToken(\\1', s)                                       # token(n)
    s = re.sub(r"token\(\s*-(\d)", 'prevToken(\\1', s)                                      # token(-n)
    s = re.sub(r"before\(\s*", 'look(s[:m.start()], ', s)                                   # before(s)
    s = re.sub(r"after\(\s*", 'look(s[m.end():], ', s)                                      # after(s)
    s = re.sub(r"textarea\(\s*", 'look(s, ', s)                                             # textarea(s)
    s = re.sub(r"before_chk1\(\s*", 'look_chk1(dDA, s[:m.start()], 0, ', s)                 # before_chk1(s)
    s = re.sub(r"after_chk1\(\s*", 'look_chk1(dDA, s[m.end():], m.end(), ', s)              # after_chk1(s)
    s = re.sub(r"textarea_chk1\(\s*", 'look_chk1(dDA, s, 0, ', s)                           # textarea_chk1(s)
    #s = re.sub(r"isEndOfNG\(\s*\)", 'isEndOfNG(dDA, s[m.end():], m.end())', s)              # isEndOfNG(s)
    #s = re.sub(r"isNextNotCOD\(\s*\)", 'isNextNotCOD(dDA, s[m.end():], m.end())', s)        # isNextNotCOD(s)
    #s = re.sub(r"isNextVerb\(\s*\)", 'isNextVerb(dDA, s[m.end():], m.end())', s)            # isNextVerb(s)
    s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)
    s = re.sub(r"[\\](\d+)", 'lToken[\\1]', s)
    return s


def genTokenLines (sTokenLine):
    "tokenize a string and return a list of lines of tokens"
234
235
236
237
238
239
240
241
242
243
244
245
246
247


248
249
250
251

252
253

254

255
256
257

258












259

260
261
262
263
264
265
266

267
268

269
270

271
272
273
274
275
276
277
278
279
280
281
282
283
284
285








286
287
288



289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304

305
306
307
308
309





310
311
312
313
314
315





316
317
318
319
320
321
322
223
224
225
226
227
228
229


230

231
232
233
234
235
236
237
238
239
240
241

242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300



301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320





321
322
323
324
325






326
327
328
329
330
331
332
333
334
335
336
337







-
-

-



+
+




+

-
+

+



+

+
+
+
+
+
+
+
+
+
+
+
+

+







+


+


+















+
+
+
+
+
+
+
+
-
-
-
+
+
+
















+
-
-
-
-
-
+
+
+
+
+
-
-
-
-
-
-
+
+
+
+
+







    except:
        print("Error. Rules file in project [" + sLang + "] not found.")
        exit()

    # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
    print("  parsing rules...")
    global dDEF
    lLine = []
    lRuleLine = []
    lTest = []
    lOpt = []
    lTokenLine = []
    sActions = ""
    nPriority = 4
    dAllGraph = {}
    sGraphName = ""

    for i, sLine in enumerate(lRules, 1):
        sLine = sLine.rstrip()
        if "\t" in sLine:
            # tabulation not allowed
            print("Error. Tabulation at line: ", i)
            break
            exit()
        if sLine.startswith('#END'):
            # arbitrary end
            printBookmark(0, "BREAK BY #END", i)
            break
        elif sLine.startswith("#"):
            # comments
            pass
        elif sLine.startswith("GRAPH_NAME: "):
            # Graph name
            m = re.match("GRAPH_NAME: +([a-zA-Z_][a-zA-Z_0-9]*)+", sLine.strip())
            if m:
                sGraphName = m.group(1)
                if sGraphName in dAllGraph:
                    print("Error. Group name " + sGraphName + " already exists.")
                    exit()
                dAllGraph[sGraphName] = []
            else:
                print("Error. Graph name not found in", sLine.strip())
                exit()
        elif sLine.startswith("DEF:"):
            # definition
            m = re.match("DEF: +([a-zA-Z_][a-zA-Z_0-9]*) +(.+)$", sLine.strip())
            if m:
                dDEF["{"+m.group(1)+"}"] = m.group(2)
            else:
                print("Error in definition: ", end="")
                print(sLine.strip())
        elif sLine.startswith("TEST:"):
            # test
            lTest.append("g{:<7}".format(i) + "  " + sLine[5:].strip())
        elif sLine.startswith("TODO:"):
            # todo
            pass
        elif sLine.startswith("!!"):
            # bookmarks
            m = re.search("^!!+", sLine)
            nExMk = len(m.group(0))
            if sLine[nExMk:].strip():
                printBookmark(nExMk-2, sLine[nExMk:].strip(), i)
        elif sLine.startswith("__") and sLine.endswith("__"):
            # new rule group
            m = re.match("__(\\w+)(!\\d|)__", sLine)
            if m:
                sRuleName = m.group(1)
                nPriority = int(m.group(2)[1:]) if m.group(2)  else 4
            else:
                print("Error at rule group: ", sLine, " -- line:", i)
                break
        elif re.match("[  ]*$", sLine):
            # empty line to end merging
            if not lTokenLine:
                continue
            if not sActions:
                print("Error. No action found at line:", i)
                exit()
            if not sGraphName:
                print("Error. All rules must belong to a named graph. Line: ", i)
                exit()
            for i, sTokenLine in lTokenLine:
                lRuleLine.append((i, sRuleName, sTokenLine, sActions, nPriority))
            lTokenLine = []
            for j, sTokenLine in lTokenLine:
                dAllGraph[sGraphName].append((j, sRuleName, sTokenLine, sActions, nPriority))
            lTokenLine.clear()
            sActions = ""
            sRuleName = ""
            nPriority = 4
        elif sLine.startswith(("        ")):
            # actions
            sActions += " " + sLine.strip()
        else:
            lTokenLine.append([i, sLine.strip()])

    # tests
    print("  list tests...")
    sGCTests = "\n".join(lTest)
    sGCTestsJS = '{ "aData2": ' + json.dumps(lTest, ensure_ascii=False) + " }\n"

    # processing rules
    print("  preparing rules...")
    for sGraphName, lRuleLine in dAllGraph.items():
    lPreparedRule = []
    for i, sRuleGroup, sTokenLine, sActions, nPriority in lRuleLine:
        for lRule in createRule(i, sRuleGroup, sTokenLine, sActions, nPriority):
            lPreparedRule.append(lRule)

        lPreparedRule = []
        for i, sRuleGroup, sTokenLine, sActions, nPriority in lRuleLine:
            for lRule in createRule(i, sRuleGroup, sTokenLine, sActions, nPriority):
                lPreparedRule.append(lRule)
        # Show rules
    # Graph creation
    for e in lPreparedRule:
        print(e)

    oDARG = darg.DARG(lPreparedRule, sLang)
    oRuleGraph = oDARG.createGraph()
        for e in lPreparedRule:
            print(e)
        # Graph creation
        oDARG = darg.DARG(lPreparedRule, sLang)
        dAllGraph[sGraphName] = oDARG.createGraph()

    # creating file with all functions callable by rules
    print("  creating callables...")
    sPyCallables = "# generated code, do not edit\n"
    #sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n"
    for sFuncName, sReturn in lFUNCTIONS:
        if sFuncName.startswith("g_c_"): # condition
342
343
344
345
346
347
348
349

350
351
352
353
354
355
357
358
359
360
361
362
363

364
365
366
367
368
369
370







-
+






    for sActionName, aAction in dACTIONS.items():
        print(sActionName, aAction)

    # Result
    d = {
        "graph_callables": sPyCallables,
        "graph_gctests": sGCTests,
        "rules_graph": oRuleGraph,
        "rules_graphs": dAllGraph,
        "rules_actions": dACTIONS
    }

    return d


Modified gc_core/py/lang_core/gc_engine.py from [7c7a312e6d] to [12b89317cd].

9
10
11
12
13
14
15
16

17
18
19
20
21
22
23
9
10
11
12
13
14
15

16
17
18
19
20
21
22
23







-
+







from itertools import chain

from ..graphspell.spellchecker import SpellChecker
from ..graphspell.echo import echo
from . import gc_options

from ..graphspell.tokenizer import Tokenizer
from .gc_rules_graph import dGraph, dRule
from .gc_rules_graph import dAllGraph, dRule

try:
    # LibreOffice / OpenOffice
    from com.sun.star.linguistic2 import SingleProofreadingError
    from com.sun.star.text.TextMarkupType import PROOFREADING
    from com.sun.star.beans import PropertyValue
    #import lightproof_handler_${implname} as opt
136
137
138
139
140
141
142
143

144
145
146
147
148
149
150
136
137
138
139
140
141
142

143
144
145
146
147
148
149
150







-
+







            dDA.clear()
            try:
                # regex parser
                _, errs = _proofread(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dDA, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
                aErrors.update(errs)
                # token parser
                oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart)
                bChange, errs = oSentence.parse(dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
                bChange, errs = oSentence.parse(dAllGraph["test_graph"], dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
                aErrors.update(errs)
                if bChange:
                    oSentence.rewrite()
                    if bDebug:
                        print("~", oSentence.sSentence)
            except:
                raise
575
576
577
578
579
580
581
582

583
584
585
586
587
588
589
575
576
577
578
579
580
581

582
583
584
585
586
587
588
589







-
+







    def __init__ (self, sSentence, sSentence0, nOffset):
        self.sSentence = sSentence
        self.sSentence0 = sSentence0
        self.nOffset = nOffset
        self.lToken = list(_oTokenizer.genTokens(sSentence, True))
        self.createError = self._createWriterError  if _bWriterError  else self._createDictError

    def _getNextMatchingNodes (self, dToken, dNode):
    def _getNextMatchingNodes (self, dToken, dGraph, dNode):
        "generator: return nodes where <dToken> “values” match <dNode> arcs"
        # token value
        if dToken["sValue"] in dNode:
            #print("value found: ", dToken["sValue"])
            yield dGraph[dNode[dToken["sValue"]]]
        # token lemmas
        if "<lemmas>" in dNode:
617
618
619
620
621
622
623
624

625
626
627
628
629
630
631
632
633
634

635
636
637
638

639
640
641
642
643

644
645
646
647
648
649

650
651
652
653
654
655
656
617
618
619
620
621
622
623

624
625
626
627
628
629
630
631
632
633

634
635
636
637

638
639
640
641
642

643
644
645
646
647
648

649
650
651
652
653
654
655
656







-
+









-
+



-
+




-
+





-
+







                            yield dGraph[dNode["<re_morph>"][sRegex]]
                    else:
                        if sNegPattern and any(re.search(sNegPattern, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                            continue
                        if any(re.search(sPattern, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                            yield dGraph[dNode["<re_morph>"][sRegex]]

    def parse (self, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False):
    def parse (self, dGraph, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False):
        dErr = {}
        dPriority = {}  # Key = position; value = priority
        dOpt = _dOptions  if not dOptions  else dOptions
        lPointer = []
        bChange = False
        for dToken in self.lToken:
            # check arcs for each existing pointer
            lNextPointer = []
            for dPointer in lPointer:
                for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]):
                for dNode in self._getNextMatchingNodes(dToken, dGraph, dPointer["dNode"]):
                    lNextPointer.append({"iToken": dPointer["iToken"], "dNode": dNode})
            lPointer = lNextPointer
            # check arcs of first nodes
            for dNode in self._getNextMatchingNodes(dToken, dGraph[0]):
            for dNode in self._getNextMatchingNodes(dToken, dGraph, dGraph[0]):
                lPointer.append({"iToken": dToken["i"], "dNode": dNode})
            # check if there is rules to check for each pointer
            for dPointer in lPointer:
                if "<rules>" in dPointer["dNode"]:
                    bHasChanged, errs = self._executeActions(dPointer["dNode"]["<rules>"], dPointer["iToken"]-1, dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext)
                    bHasChanged, errs = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iToken"]-1, dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext)
                    dErr.update(errs)
                    if bHasChanged:
                        bChange = True
        return (bChange, dErr)

    def _executeActions (self, dNode, nTokenOffset, dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext):
    def _executeActions (self, dGraph, dNode, nTokenOffset, dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext):
        "execute actions found in the DARG"
        dErrs = {}
        bChange = False
        for sLineId, nextNodeKey in dNode.items():
            for sRuleId in dGraph[nextNodeKey]:
                bCondMemo = None
                sFuncCond, cActionType, sWhat, *eAct = dRule[sRuleId]

Modified gc_core/py/lang_core/gc_rules_graph.py from [e9a58f5498] to [b99ba93b1b].

1
2
3

4
5
1
2

3
4
5


-
+


# generated code, do not edit

dGraph = ${rules_graph}
dAllGraph = ${rules_graphs}

dRule = ${rules_actions}

Modified gc_lang/fr/rules_graph.grx from [7f8d7a1159] to [7ae68d4f85].

34
35
36
37
38
39
40


41
42
43
44
45
46
47
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49







+
+









# Fin d’interprétation du fichier avec une ligne commençant par #END

# ERREURS COURANTES
# http://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Fautes_d%27orthographe/Courantes


GRAPH_NAME: test_graph

__da1__
    ne >donner
        <<- =>> select(\2, ":V")

TEST: je ne donne rien.