Grammalecte  Check-in [7fd21ca8e0]

Overview
Comment:[build][core] gc engine update
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | build | rg
Files: files | file ages | folders
SHA3-256: 7fd21ca8e0741929defbdc585dc60ca6b69f93c61a0e9fa20645f0e9cbc68153
User & Date: olr on 2018-06-05 16:20:00
Other Links: branch diff | manifest | tags
Context
2018-06-06
06:00
[build][core] DARG: syntax change: ~~ replaced by @ check-in: 2fd61da75f user: olr tags: core, build, rg
2018-06-05
16:20
[build][core] gc engine update check-in: 7fd21ca8e0 user: olr tags: core, build, rg
15:04
[fr] update tests check-in: 22b38f12e0 user: olr tags: fr, rg
Changes

Modified compile_rules_graph.py from [0bf3596a1c] to [ca6fc181e8].

18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
    s = re.sub(r"isRealStart *\(\)", 'before(["<START>"])', s)
    s = re.sub(r"isStart0 *\(\)", 'before0(["<START>", ","])', s)
    s = re.sub(r"isRealStart0 *\(\)", 'before0(["<START>"])', s)
    s = re.sub(r"isEnd *\(\)", 'after(["<END>", ","])', s)
    s = re.sub(r"isRealEnd *\(\)", 'after(["<END>"])', s)
    s = re.sub(r"isEnd0 *\(\)", 'after0(["<END>", ","])', s)
    s = re.sub(r"isRealEnd0 *\(\)", 'after0(["<END>"])', s)
    s = re.sub(r"(select|exclude)[(][\\](\d+)", '\\1(lToken[\\2]', s)
    s = re.sub(r"define[(][\\](\d+)", 'define(lToken[\\1]', s)
    s = re.sub(r"(morph|morphex|displayInfo)[(]\\(\d+)", '\\1(lToken[\\2]', s)
    s = re.sub(r"token\(\s*(\d)", 'nextToken(\\1', s)                                       # token(n)
    s = re.sub(r"token\(\s*-(\d)", 'prevToken(\\1', s)                                      # token(-n)
    s = re.sub(r"before\(\s*", 'look(s[:m.start()], ', s)                                   # before(s)
    s = re.sub(r"after\(\s*", 'look(s[m.end():], ', s)                                      # after(s)
    s = re.sub(r"textarea\(\s*", 'look(s, ', s)                                             # textarea(s)
    s = re.sub(r"before_chk1\(\s*", 'look_chk1(dDA, s[:m.start()], 0, ', s)                 # before_chk1(s)
    s = re.sub(r"after_chk1\(\s*", 'look_chk1(dDA, s[m.end():], m.end(), ', s)              # after_chk1(s)
    s = re.sub(r"textarea_chk1\(\s*", 'look_chk1(dDA, s, 0, ', s)                           # textarea_chk1(s)
    s = re.sub(r"isEndOfNG\(\s*\)", 'isEndOfNG(dDA, s[m.end():], m.end())', s)              # isEndOfNG(s)
    s = re.sub(r"isNextNotCOD\(\s*\)", 'isNextNotCOD(dDA, s[m.end():], m.end())', s)        # isNextNotCOD(s)
    s = re.sub(r"isNextVerb\(\s*\)", 'isNextVerb(dDA, s[m.end():], m.end())', s)            # isNextVerb(s)
    s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)
    s = re.sub(r"[\\](\d+)", 'lToken[\\1]', s)
    return s


def genTokenLines (sTokenLine):
    "tokenize a string and return a list of lines of tokens"







|
<
|








|
|
|







18
19
20
21
22
23
24
25

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
    s = re.sub(r"isRealStart *\(\)", 'before(["<START>"])', s)
    s = re.sub(r"isStart0 *\(\)", 'before0(["<START>", ","])', s)
    s = re.sub(r"isRealStart0 *\(\)", 'before0(["<START>"])', s)
    s = re.sub(r"isEnd *\(\)", 'after(["<END>", ","])', s)
    s = re.sub(r"isRealEnd *\(\)", 'after(["<END>"])', s)
    s = re.sub(r"isEnd0 *\(\)", 'after0(["<END>", ","])', s)
    s = re.sub(r"isRealEnd0 *\(\)", 'after0(["<END>"])', s)
    s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)

    s = re.sub(r"(morph|morphex|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"token\(\s*(\d)", 'nextToken(\\1', s)                                       # token(n)
    s = re.sub(r"token\(\s*-(\d)", 'prevToken(\\1', s)                                      # token(-n)
    s = re.sub(r"before\(\s*", 'look(s[:m.start()], ', s)                                   # before(s)
    s = re.sub(r"after\(\s*", 'look(s[m.end():], ', s)                                      # after(s)
    s = re.sub(r"textarea\(\s*", 'look(s, ', s)                                             # textarea(s)
    s = re.sub(r"before_chk1\(\s*", 'look_chk1(dDA, s[:m.start()], 0, ', s)                 # before_chk1(s)
    s = re.sub(r"after_chk1\(\s*", 'look_chk1(dDA, s[m.end():], m.end(), ', s)              # after_chk1(s)
    s = re.sub(r"textarea_chk1\(\s*", 'look_chk1(dDA, s, 0, ', s)                           # textarea_chk1(s)
    #s = re.sub(r"isEndOfNG\(\s*\)", 'isEndOfNG(dDA, s[m.end():], m.end())', s)              # isEndOfNG(s)
    #s = re.sub(r"isNextNotCOD\(\s*\)", 'isNextNotCOD(dDA, s[m.end():], m.end())', s)        # isNextNotCOD(s)
    #s = re.sub(r"isNextVerb\(\s*\)", 'isNextVerb(dDA, s[m.end():], m.end())', s)            # isNextVerb(s)
    s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)
    s = re.sub(r"[\\](\d+)", 'lToken[\\1]', s)
    return s


def genTokenLines (sTokenLine):
    "tokenize a string and return a list of lines of tokens"
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327

    # creating file with all functions callable by rules
    print("  creating callables...")
    sPyCallables = "# generated code, do not edit\n"
    #sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n"
    for sFuncName, sReturn in lFUNCTIONS:
        if sFuncName.startswith("g_c_"): # condition
            sParams = "lToken, sCountry, bCondMemo"
        elif sFuncName.startswith("g_m_"): # message
            sParams = "lToken"
        elif sFuncName.startswith("g_s_"): # suggestion
            sParams = "lToken"
        elif sFuncName.startswith("g_p_"): # preprocessor
            sParams = "lToken"
        elif sFuncName.startswith("g_d_"): # disambiguator







|







312
313
314
315
316
317
318
319
320
321
322
323
324
325
326

    # creating file with all functions callable by rules
    print("  creating callables...")
    sPyCallables = "# generated code, do not edit\n"
    #sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n"
    for sFuncName, sReturn in lFUNCTIONS:
        if sFuncName.startswith("g_c_"): # condition
            sParams = "lToken, nTokenOffset, sCountry, bCondMemo"
        elif sFuncName.startswith("g_m_"): # message
            sParams = "lToken"
        elif sFuncName.startswith("g_s_"): # suggestion
            sParams = "lToken"
        elif sFuncName.startswith("g_p_"): # preprocessor
            sParams = "lToken"
        elif sFuncName.startswith("g_d_"): # disambiguator

Modified gc_core/py/lang_core/gc_engine.py from [1262fde6c4] to [dbebb9c217].

726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
            lPointer.extend(lNewPointer)
            # check arcs of first nodes
            for dNode in self._getNextMatchingNodes(dToken, dGraph[0]):
                lPointer.append({"nOffset": dToken["i"], "dNode": dNode})
            # check if there is rules to check for each pointer
            for dPointer in lPointer:
                if "<rules>" in dPointer["dNode"]:
                    bHasChanged, errs = self._executeActions(dPointer["dNode"]["<rules>"], dPointer["nOffset"]-1, dPriority, dOpt, bShowRuleId, bContext)
                    dErr.update(errs)
                    if bHasChanged:
                        bChange = True
        if dErr:
            print(dErr)
        return (bChange, dErr)

    def _executeActions (self, dNode, nTokenOffset, dPriority, dOpt, bShowRuleId, bContext):
        #print(locals())
        dErrs = {}
        bChange = False
        for sLineId, nextNodeKey in dNode.items():
            for sRuleId in dGraph[nextNodeKey]:
                print(sRuleId)
                bCondMemo = None
                sFuncCond, cActionType, sWhat, *eAct = dRule[sRuleId]
                # action in lActions: [ condition, action type, replacement/suggestion/action[, iTokenStart, iTokenEnd[, nPriority, message, URL]] ]
                try:
                    bCondMemo = not sFuncCond or globals()[sFuncCond](self, sCountry, bCondMemo)
                    if bCondMemo:
                        if cActionType == "-":
                            # grammar error
                            print("-")
                            nTokenErrorStart = nTokenOffset + eAct[0]
                            nTokenErrorEnd = nTokenOffset + eAct[1]
                            nErrorStart = self.iStart + self.lToken[nTokenErrorStart]["nStart"]







|







|










|







726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
            lPointer.extend(lNewPointer)
            # check arcs of first nodes
            for dNode in self._getNextMatchingNodes(dToken, dGraph[0]):
                lPointer.append({"nOffset": dToken["i"], "dNode": dNode})
            # check if there is rules to check for each pointer
            for dPointer in lPointer:
                if "<rules>" in dPointer["dNode"]:
                    bHasChanged, errs = self._executeActions(dPointer["dNode"]["<rules>"], dPointer["nOffset"]-1, dPriority, dOpt, sCountry, bShowRuleId, bContext)
                    dErr.update(errs)
                    if bHasChanged:
                        bChange = True
        if dErr:
            print(dErr)
        return (bChange, dErr)

    def _executeActions (self, dNode, nTokenOffset, dPriority, dOpt, sCountry, bShowRuleId, bContext):
        #print(locals())
        dErrs = {}
        bChange = False
        for sLineId, nextNodeKey in dNode.items():
            for sRuleId in dGraph[nextNodeKey]:
                print(sRuleId)
                bCondMemo = None
                sFuncCond, cActionType, sWhat, *eAct = dRule[sRuleId]
                # action in lActions: [ condition, action type, replacement/suggestion/action[, iTokenStart, iTokenEnd[, nPriority, message, URL]] ]
                try:
                    bCondMemo = not sFuncCond or globals()[sFuncCond](self.lToken, nTokenOffset, sCountry, bCondMemo)
                    if bCondMemo:
                        if cActionType == "-":
                            # grammar error
                            print("-")
                            nTokenErrorStart = nTokenOffset + eAct[0]
                            nTokenErrorEnd = nTokenOffset + eAct[1]
                            nErrorStart = self.iStart + self.lToken[nTokenErrorStart]["nStart"]
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
    if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
        return False
    # search sPattern
    zPattern = re.compile(sPattern)
    return any(zPattern.search(sMorph)  for sMorph in lMorph)


#### Go outside the rule scope

def g_nextToken (i):
    pass

def g_prevToken (i):
    pass

def g_look ():
    pass

def g_lookAndCheck ():
    pass


#### Disambiguator

def g_select (dToken, sPattern, lDefault=None):
    "select morphologies for <dToken> according to <sPattern>, always return True"
    lMorph = dToken["lMorph"]  if "lMorph" in dToken  else _oSpellChecker.getMorph(dToken["sValue"])
    if not lMorph or len(lMorph) == 1:







<
<
<
<
<
<
<
<
<
<
<
<
<
<







856
857
858
859
860
861
862














863
864
865
866
867
868
869
    if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
        return False
    # search sPattern
    zPattern = re.compile(sPattern)
    return any(zPattern.search(sMorph)  for sMorph in lMorph)

















#### Disambiguator

def g_select (dToken, sPattern, lDefault=None):
    "select morphologies for <dToken> according to <sPattern>, always return True"
    lMorph = dToken["lMorph"]  if "lMorph" in dToken  else _oSpellChecker.getMorph(dToken["sValue"])
    if not lMorph or len(lMorph) == 1: