Grammalecte  Check-in [6c9f0b9f2d]

Overview
Comment:[build][core] definititions for the graph + debugging update
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | build | rg
Files: files | file ages | folders
SHA3-256: 6c9f0b9f2df3c32916728cac4e5300fca675764730e2fa946811aa2bc91ace4b
User & Date: olr on 2018-06-17 13:07:32
Other Links: branch diff | manifest | tags
Context
2018-06-17
13:11
[graphspell] tokenizer: update ordinals check-in: 4be13a74c3 user: olr tags: graphspell, rg
13:07
[build][core] definititions for the graph + debugging update check-in: 6c9f0b9f2d user: olr tags: core, build, rg
13:06
[fr] conversion: regex rules -> graph rules check-in: cd761303f8 user: olr tags: fr, rg
Changes

Modified compile_rules.py from [a5c1ea137d] to [06d201754c].

581
582
583
584
585
586
587
588

589
590
591
581
582
583
584
585
586
587

588
589
590
591







-
+



          "paragraph_rules": mergeRulesByOption(lParagraphRules),
          "sentence_rules": mergeRulesByOption(lSentenceRules),
          "paragraph_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)),
          "sentence_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) }
    d.update(dOptions)

    # compile graph rules
    d2 = crg.make(lGraphRule, sLang, bJavaScript)
    d2 = crg.make(lGraphRule, dDEF, sLang, bJavaScript)
    d.update(d2)

    return d

Modified compile_rules_graph.py from [6a40538d43] to [2d984fb6f6].

30
31
32
33
34
35
36
37

38
39
40
41
42
43


44
45
46
47
48
49
50
30
31
32
33
34
35
36

37
38
39
40
41


42
43
44
45
46
47
48
49
50







-
+




-
-
+
+







        s = re.sub(r"[\\](\d+)", 'lToken[\\1+nTokenOffset]["sValue"]', s)
    else:
        # tokens used as parameter
        s = re.sub(r"[\\](\d+)", 'lToken[\\1+nTokenOffset]', s)
    return s


def genTokenLines (sTokenLine):
def genTokenLines (sTokenLine, dDef):
    "tokenize a string and return a list of lines of tokens"
    lToken = sTokenLine.split()
    lTokenLines = None
    for i, sToken in enumerate(lToken):
        if sToken.startswith("{") and sToken.endswith("}") and sToken in dDEF:
            lToken[i] = dDEF[sToken]
        if sToken.startswith("{") and sToken.endswith("}") and sToken in dDef:
            sToken = dDef[sToken]
        if ( (sToken.startswith("[") and sToken.endswith("]")) or (sToken.startswith("([") and sToken.endswith("])")) ):
            bSelectedGroup = sToken.startswith("(") and sToken.endswith(")")
            if bSelectedGroup:
                sToken = sToken[1:-1]
            # multiple token
            if not lTokenLines:
                lTokenLines = [ [s]  for s  in sToken[1:-1].split("|") ]
70
71
72
73
74
75
76
77

78
79

80
81
82
83
84
85
86
70
71
72
73
74
75
76

77
78

79
80
81
82
83
84
85
86







-
+

-
+







            else:
                for aRule in lTokenLines:
                    aRule.append(sToken)
    for aRule in lTokenLines:
        yield aRule


def createRule (iLine, sRuleName, sTokenLine, iActionBlock, sActions, nPriority):
def createRule (iLine, sRuleName, sTokenLine, iActionBlock, sActions, nPriority, dDef):
    # print(iLine, "//", sRuleName, "//", sTokenLine, "//", sActions, "//", nPriority)
    for lToken in genTokenLines(sTokenLine):
    for lToken in genTokenLines(sTokenLine, dDef):
        # Calculate positions
        dPos = {}   # key: iGroup, value: iToken
        iGroup = 0
        for i, sToken in enumerate(lToken):
            if sToken.startswith("(") and sToken.endswith(")"):
                lToken[i] = sToken[1:-1]
                iGroup += 1
221
222
223
224
225
226
227
228

229
230
231
232
233
234
235
221
222
223
224
225
226
227

228
229
230
231
232
233
234
235







-
+







        ## no action, break loop if condition is False
        return [sOption, sCondition, cAction, ""]
    else:
        print("# Unknown action at line " + sIdAction)
        return None


def make (lRule, sLang, bJavaScript):
def make (lRule, dDef, sLang, bJavaScript):
    "compile rules, returns a dictionary of values"
    # for clarity purpose, don’t create any file here

    # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
    print("  parsing rules...")
    lTokenLine = []
    sActions = ""
292
293
294
295
296
297
298
299

300
301
302
303
304
305
306
292
293
294
295
296
297
298

299
300
301
302
303
304
305
306







-
+







            print(sLine)

    # processing rules
    print("  preparing rules...")
    for sGraphName, lRuleLine in dAllGraph.items():
        lPreparedRule = []
        for i, sRuleGroup, sTokenLine, iActionBlock, sActions, nPriority in lRuleLine:
            for lRule in createRule(i, sRuleGroup, sTokenLine, iActionBlock, sActions, nPriority):
            for lRule in createRule(i, sRuleGroup, sTokenLine, iActionBlock, sActions, nPriority, dDef):
                lPreparedRule.append(lRule)
        # Show rules
        for e in lPreparedRule:
            print(e)
        # Graph creation
        oDARG = darg.DARG(lPreparedRule, sLang)
        dAllGraph[sGraphName] = oDARG.createGraph()

Modified gc_core/py/lang_core/gc_engine.py from [c27f614a0d] to [4c2d49e047].

706
707
708
709
710
711
712
713

714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734

735
736
737
738
739
740
741
706
707
708
709
710
711
712

713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733

734
735
736
737
738
739
740
741







-
+




















-
+







                                if nErrorStart not in dErrs or eAct[2] > dPriority[nErrorStart]:
                                    dErrs[nErrorStart] = self.createError(sWhat, nTokenOffset, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, True, eAct[3], eAct[4], bShowRuleId, "notype", bContext)
                                    dPriority[nErrorStart] = eAct[2]
                                    if bDebug:
                                        print("-", sRuleId, dErrs[nErrorStart])
                            elif cActionType == "~":
                                # text processor
                                self._tagAndPrepareTokenForRewriting(sWhat, nTokenOffset + eAct[0], nTokenOffset + eAct[1])
                                self._tagAndPrepareTokenForRewriting(sWhat, nTokenOffset + eAct[0], nTokenOffset + eAct[1], bDebug)
                                if bDebug:
                                    print("~", sRuleId)
                                bChange = True
                            elif cActionType == "=":
                                # disambiguation
                                globals()[sWhat](self.lToken, nTokenOffset)
                                if bDebug:
                                    print("=", sRuleId)
                            elif cActionType == ">":
                                # we do nothing, this test is just a condition to apply all following actions
                                if bDebug:
                                    print(">", sRuleId)
                                pass
                            else:
                                print("# error: unknown action at " + sLineId)
                        elif cActionType == ">":
                            if bDebug:
                                print(">!", sRuleId)
                            break
                except Exception as e:
                    raise Exception(str(e), sLineId)
                    raise Exception(str(e), sLineId, sRuleId, self.sSentence)
        return bChange, dErrs

    def _createWriterError (self, sSugg, nTokenOffset, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
        "error for Writer (LO/OO)"
        xErr = SingleProofreadingError()
        #xErr = uno.createUnoStruct( "com.sun.star.linguistic2.SingleProofreadingError" )
        xErr.nErrorStart = nStart
816
817
818
819
820
821
822
823

824


825
826
827
828
829
830
831
816
817
818
819
820
821
822

823
824
825
826
827
828
829
830
831
832
833







-
+

+
+







    def _expand (self, sMsg, nTokenOffset):
        #print("*", sMsg)
        for m in re.finditer(r"\\([0-9]+)", sMsg):
            sMsg = sMsg.replace(m.group(0), self.lToken[int(m.group(1))+nTokenOffset]["sValue"])
        #print(">", sMsg)
        return sMsg

    def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, bUppercase=True):
    def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, bUppercase=True, bDebug=False):
        "text processor: rewrite tokens between <nTokenRewriteStart> and <nTokenRewriteEnd> position"
        if bDebug:
            print("REWRITING:", nTokenRewriteStart, nTokenRewriteEnd)
        if sWhat == "*":
            # purge text
            if nTokenRewriteEnd - nTokenRewriteStart == 0:
                self.lToken[nTokenRewriteStart]["bToRemove"] = True
            else:
                for i in range(nTokenRewriteStart, nTokenRewriteEnd+1):
                    self.lToken[i]["bToRemove"] = True