Grammalecte  Check-in [6c9f0b9f2d]

Overview
Comment:[build][core] definititions for the graph + debugging update
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | build | rg
Files: files | file ages | folders
SHA3-256: 6c9f0b9f2df3c32916728cac4e5300fca675764730e2fa946811aa2bc91ace4b
User & Date: olr on 2018-06-17 13:07:32
Other Links: branch diff | manifest | tags
Context
2018-06-17
13:11
[graphspell] tokenizer: update ordinals check-in: 4be13a74c3 user: olr tags: graphspell, rg
13:07
[build][core] definititions for the graph + debugging update check-in: 6c9f0b9f2d user: olr tags: core, build, rg
13:06
[fr] conversion: regex rules -> graph rules check-in: cd761303f8 user: olr tags: fr, rg
Changes

Modified compile_rules.py from [a5c1ea137d] to [06d201754c].

581
582
583
584
585
586
587
588
589
590
591
          "paragraph_rules": mergeRulesByOption(lParagraphRules),
          "sentence_rules": mergeRulesByOption(lSentenceRules),
          "paragraph_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)),
          "sentence_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) }
    d.update(dOptions)

    # compile graph rules
    d2 = crg.make(lGraphRule, sLang, bJavaScript)
    d.update(d2)

    return d







|



581
582
583
584
585
586
587
588
589
590
591
          "paragraph_rules": mergeRulesByOption(lParagraphRules),
          "sentence_rules": mergeRulesByOption(lSentenceRules),
          "paragraph_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)),
          "sentence_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) }
    d.update(dOptions)

    # compile graph rules
    d2 = crg.make(lGraphRule, dDEF, sLang, bJavaScript)
    d.update(d2)

    return d

Modified compile_rules_graph.py from [6a40538d43] to [2d984fb6f6].

30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
        s = re.sub(r"[\\](\d+)", 'lToken[\\1+nTokenOffset]["sValue"]', s)
    else:
        # tokens used as parameter
        s = re.sub(r"[\\](\d+)", 'lToken[\\1+nTokenOffset]', s)
    return s


def genTokenLines (sTokenLine):
    "tokenize a string and return a list of lines of tokens"
    lToken = sTokenLine.split()
    lTokenLines = None
    for i, sToken in enumerate(lToken):
        if sToken.startswith("{") and sToken.endswith("}") and sToken in dDEF:
            lToken[i] = dDEF[sToken]
        if ( (sToken.startswith("[") and sToken.endswith("]")) or (sToken.startswith("([") and sToken.endswith("])")) ):
            bSelectedGroup = sToken.startswith("(") and sToken.endswith(")")
            if bSelectedGroup:
                sToken = sToken[1:-1]
            # multiple token
            if not lTokenLines:
                lTokenLines = [ [s]  for s  in sToken[1:-1].split("|") ]







|




|
|







30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
        s = re.sub(r"[\\](\d+)", 'lToken[\\1+nTokenOffset]["sValue"]', s)
    else:
        # tokens used as parameter
        s = re.sub(r"[\\](\d+)", 'lToken[\\1+nTokenOffset]', s)
    return s


def genTokenLines (sTokenLine, dDef):
    "tokenize a string and return a list of lines of tokens"
    lToken = sTokenLine.split()
    lTokenLines = None
    for i, sToken in enumerate(lToken):
        if sToken.startswith("{") and sToken.endswith("}") and sToken in dDef:
            sToken = dDef[sToken]
        if ( (sToken.startswith("[") and sToken.endswith("]")) or (sToken.startswith("([") and sToken.endswith("])")) ):
            bSelectedGroup = sToken.startswith("(") and sToken.endswith(")")
            if bSelectedGroup:
                sToken = sToken[1:-1]
            # multiple token
            if not lTokenLines:
                lTokenLines = [ [s]  for s  in sToken[1:-1].split("|") ]
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
            else:
                for aRule in lTokenLines:
                    aRule.append(sToken)
    for aRule in lTokenLines:
        yield aRule


def createRule (iLine, sRuleName, sTokenLine, iActionBlock, sActions, nPriority):
    # print(iLine, "//", sRuleName, "//", sTokenLine, "//", sActions, "//", nPriority)
    for lToken in genTokenLines(sTokenLine):
        # Calculate positions
        dPos = {}   # key: iGroup, value: iToken
        iGroup = 0
        for i, sToken in enumerate(lToken):
            if sToken.startswith("(") and sToken.endswith(")"):
                lToken[i] = sToken[1:-1]
                iGroup += 1







|

|







70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
            else:
                for aRule in lTokenLines:
                    aRule.append(sToken)
    for aRule in lTokenLines:
        yield aRule


def createRule (iLine, sRuleName, sTokenLine, iActionBlock, sActions, nPriority, dDef):
    # print(iLine, "//", sRuleName, "//", sTokenLine, "//", sActions, "//", nPriority)
    for lToken in genTokenLines(sTokenLine, dDef):
        # Calculate positions
        dPos = {}   # key: iGroup, value: iToken
        iGroup = 0
        for i, sToken in enumerate(lToken):
            if sToken.startswith("(") and sToken.endswith(")"):
                lToken[i] = sToken[1:-1]
                iGroup += 1
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
        ## no action, break loop if condition is False
        return [sOption, sCondition, cAction, ""]
    else:
        print("# Unknown action at line " + sIdAction)
        return None


def make (lRule, sLang, bJavaScript):
    "compile rules, returns a dictionary of values"
    # for clarity purpose, don’t create any file here

    # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
    print("  parsing rules...")
    lTokenLine = []
    sActions = ""







|







221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
        ## no action, break loop if condition is False
        return [sOption, sCondition, cAction, ""]
    else:
        print("# Unknown action at line " + sIdAction)
        return None


def make (lRule, dDef, sLang, bJavaScript):
    "compile rules, returns a dictionary of values"
    # for clarity purpose, don’t create any file here

    # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
    print("  parsing rules...")
    lTokenLine = []
    sActions = ""
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
            print(sLine)

    # processing rules
    print("  preparing rules...")
    for sGraphName, lRuleLine in dAllGraph.items():
        lPreparedRule = []
        for i, sRuleGroup, sTokenLine, iActionBlock, sActions, nPriority in lRuleLine:
            for lRule in createRule(i, sRuleGroup, sTokenLine, iActionBlock, sActions, nPriority):
                lPreparedRule.append(lRule)
        # Show rules
        for e in lPreparedRule:
            print(e)
        # Graph creation
        oDARG = darg.DARG(lPreparedRule, sLang)
        dAllGraph[sGraphName] = oDARG.createGraph()







|







292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
            print(sLine)

    # processing rules
    print("  preparing rules...")
    for sGraphName, lRuleLine in dAllGraph.items():
        lPreparedRule = []
        for i, sRuleGroup, sTokenLine, iActionBlock, sActions, nPriority in lRuleLine:
            for lRule in createRule(i, sRuleGroup, sTokenLine, iActionBlock, sActions, nPriority, dDef):
                lPreparedRule.append(lRule)
        # Show rules
        for e in lPreparedRule:
            print(e)
        # Graph creation
        oDARG = darg.DARG(lPreparedRule, sLang)
        dAllGraph[sGraphName] = oDARG.createGraph()

Modified gc_core/py/lang_core/gc_engine.py from [c27f614a0d] to [4c2d49e047].

706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
                                if nErrorStart not in dErrs or eAct[2] > dPriority[nErrorStart]:
                                    dErrs[nErrorStart] = self.createError(sWhat, nTokenOffset, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, True, eAct[3], eAct[4], bShowRuleId, "notype", bContext)
                                    dPriority[nErrorStart] = eAct[2]
                                    if bDebug:
                                        print("-", sRuleId, dErrs[nErrorStart])
                            elif cActionType == "~":
                                # text processor
                                self._tagAndPrepareTokenForRewriting(sWhat, nTokenOffset + eAct[0], nTokenOffset + eAct[1])
                                if bDebug:
                                    print("~", sRuleId)
                                bChange = True
                            elif cActionType == "=":
                                # disambiguation
                                globals()[sWhat](self.lToken, nTokenOffset)
                                if bDebug:
                                    print("=", sRuleId)
                            elif cActionType == ">":
                                # we do nothing, this test is just a condition to apply all following actions
                                if bDebug:
                                    print(">", sRuleId)
                                pass
                            else:
                                print("# error: unknown action at " + sLineId)
                        elif cActionType == ">":
                            if bDebug:
                                print(">!", sRuleId)
                            break
                except Exception as e:
                    raise Exception(str(e), sLineId)
        return bChange, dErrs

    def _createWriterError (self, sSugg, nTokenOffset, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
        "error for Writer (LO/OO)"
        xErr = SingleProofreadingError()
        #xErr = uno.createUnoStruct( "com.sun.star.linguistic2.SingleProofreadingError" )
        xErr.nErrorStart = nStart







|




















|







706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
                                if nErrorStart not in dErrs or eAct[2] > dPriority[nErrorStart]:
                                    dErrs[nErrorStart] = self.createError(sWhat, nTokenOffset, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, True, eAct[3], eAct[4], bShowRuleId, "notype", bContext)
                                    dPriority[nErrorStart] = eAct[2]
                                    if bDebug:
                                        print("-", sRuleId, dErrs[nErrorStart])
                            elif cActionType == "~":
                                # text processor
                                self._tagAndPrepareTokenForRewriting(sWhat, nTokenOffset + eAct[0], nTokenOffset + eAct[1], bDebug)
                                if bDebug:
                                    print("~", sRuleId)
                                bChange = True
                            elif cActionType == "=":
                                # disambiguation
                                globals()[sWhat](self.lToken, nTokenOffset)
                                if bDebug:
                                    print("=", sRuleId)
                            elif cActionType == ">":
                                # we do nothing, this test is just a condition to apply all following actions
                                if bDebug:
                                    print(">", sRuleId)
                                pass
                            else:
                                print("# error: unknown action at " + sLineId)
                        elif cActionType == ">":
                            if bDebug:
                                print(">!", sRuleId)
                            break
                except Exception as e:
                    raise Exception(str(e), sLineId, sRuleId, self.sSentence)
        return bChange, dErrs

    def _createWriterError (self, sSugg, nTokenOffset, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
        "error for Writer (LO/OO)"
        xErr = SingleProofreadingError()
        #xErr = uno.createUnoStruct( "com.sun.star.linguistic2.SingleProofreadingError" )
        xErr.nErrorStart = nStart
816
817
818
819
820
821
822
823
824


825
826
827
828
829
830
831
    def _expand (self, sMsg, nTokenOffset):
        #print("*", sMsg)
        for m in re.finditer(r"\\([0-9]+)", sMsg):
            sMsg = sMsg.replace(m.group(0), self.lToken[int(m.group(1))+nTokenOffset]["sValue"])
        #print(">", sMsg)
        return sMsg

    def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, bUppercase=True):
        "text processor: rewrite tokens between <nTokenRewriteStart> and <nTokenRewriteEnd> position"


        if sWhat == "*":
            # purge text
            if nTokenRewriteEnd - nTokenRewriteStart == 0:
                self.lToken[nTokenRewriteStart]["bToRemove"] = True
            else:
                for i in range(nTokenRewriteStart, nTokenRewriteEnd+1):
                    self.lToken[i]["bToRemove"] = True







|

>
>







816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
    def _expand (self, sMsg, nTokenOffset):
        #print("*", sMsg)
        for m in re.finditer(r"\\([0-9]+)", sMsg):
            sMsg = sMsg.replace(m.group(0), self.lToken[int(m.group(1))+nTokenOffset]["sValue"])
        #print(">", sMsg)
        return sMsg

    def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, bUppercase=True, bDebug=False):
        "text processor: rewrite tokens between <nTokenRewriteStart> and <nTokenRewriteEnd> position"
        if bDebug:
            print("REWRITING:", nTokenRewriteStart, nTokenRewriteEnd)
        if sWhat == "*":
            # purge text
            if nTokenRewriteEnd - nTokenRewriteStart == 0:
                self.lToken[nTokenRewriteStart]["bToRemove"] = True
            else:
                for i in range(nTokenRewriteStart, nTokenRewriteEnd+1):
                    self.lToken[i]["bToRemove"] = True