Grammalecte  Check-in [3a97457e25]

Overview
Comment:[build][core] compile rules graph + token sentence checker update
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | build | rg
Files: files | file ages | folders
SHA3-256: 3a97457e25092c026d572fcd6507e0a3c7e9c5f9e98f0be273f3d0d854f902cd
User & Date: olr on 2018-05-23 11:48:14
Original Comment: [build][core][bug] compile rules graph: update
Other Links: branch diff | manifest | tags
Context
2018-05-23
12:10
[graphspell][js][bug] remove prefix sign from lemmas check-in: 13109802df user: olr tags: graphspell, rg
11:48
[build][core] compile rules graph + token sentence checker update check-in: 3a97457e25 user: olr tags: core, build, rg
11:47
[core][js][bug] disambigator: always return true check-in: 6e946dad21 user: olr tags: core, rg
Changes

Modified compile_rules_graph.py from [7c9c436423] to [a8ad098a64].

20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
    s = re.sub(r"isRealStart0 *\(\)", 'before0(["<START>"])', s)
    s = re.sub(r"isEnd *\(\)", 'after(["<END>", ","])', s)
    s = re.sub(r"isRealEnd *\(\)", 'after(["<END>"])', s)
    s = re.sub(r"isEnd0 *\(\)", 'after0(["<END>", ","])', s)
    s = re.sub(r"isRealEnd0 *\(\)", 'after0(["<END>"])', s)
    s = re.sub(r"(select|exclude)[(][\\](\d+)", '\\1(lToken[\\2]', s)
    s = re.sub(r"define[(][\\](\d+)", 'define(lToken[\\1]', s)
    s = re.sub(r"(morph|morphex|displayInfo)[(][\\](\d+)", '\\1(lToken[\\2])', s)
    s = re.sub(r"token\(\s*(\d)", 'nextToken(\\1', s)                                       # token(n)
    s = re.sub(r"token\(\s*-(\d)", 'prevToken(\\1', s)                                      # token(-n)
    s = re.sub(r"before\(\s*", 'look(s[:m.start()], ', s)                                   # before(s)
    s = re.sub(r"after\(\s*", 'look(s[m.end():], ', s)                                      # after(s)
    s = re.sub(r"textarea\(\s*", 'look(s, ', s)                                             # textarea(s)
    s = re.sub(r"before_chk1\(\s*", 'look_chk1(dDA, s[:m.start()], 0, ', s)                 # before_chk1(s)
    s = re.sub(r"after_chk1\(\s*", 'look_chk1(dDA, s[m.end():], m.end(), ', s)              # after_chk1(s)







|







20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
    s = re.sub(r"isRealStart0 *\(\)", 'before0(["<START>"])', s)
    s = re.sub(r"isEnd *\(\)", 'after(["<END>", ","])', s)
    s = re.sub(r"isRealEnd *\(\)", 'after(["<END>"])', s)
    s = re.sub(r"isEnd0 *\(\)", 'after0(["<END>", ","])', s)
    s = re.sub(r"isRealEnd0 *\(\)", 'after0(["<END>"])', s)
    s = re.sub(r"(select|exclude)[(][\\](\d+)", '\\1(lToken[\\2]', s)
    s = re.sub(r"define[(][\\](\d+)", 'define(lToken[\\1]', s)
    s = re.sub(r"(morph|morphex|displayInfo)[(]\\(\d+)", '\\1(lToken[\\2]', s)
    s = re.sub(r"token\(\s*(\d)", 'nextToken(\\1', s)                                       # token(n)
    s = re.sub(r"token\(\s*-(\d)", 'prevToken(\\1', s)                                      # token(-n)
    s = re.sub(r"before\(\s*", 'look(s[:m.start()], ', s)                                   # before(s)
    s = re.sub(r"after\(\s*", 'look(s[m.end():], ', s)                                      # after(s)
    s = re.sub(r"textarea\(\s*", 'look(s, ', s)                                             # textarea(s)
    s = re.sub(r"before_chk1\(\s*", 'look_chk1(dDA, s[:m.start()], 0, ', s)                 # before_chk1(s)
    s = re.sub(r"after_chk1\(\s*", 'look_chk1(dDA, s[m.end():], m.end(), ', s)              # after_chk1(s)
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
            mURL = re.search("[|] *(https?://.*)", sMsg)
            if mURL:
                sURL = mURL.group(1).strip()
                sMsg = sMsg[:mURL.start(0)].strip()
            if sMsg[0:1] == "=":
                sMsg = prepareFunction(sMsg[1:])
                lFUNCTIONS.append(("g_m_"+sIdAction, sMsg))
                for x in re.finditer("group[(](\d+)[)]", sMsg):
                    if int(x.group(1)) > nGroup:
                        print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)")
                sMsg = "=g_m_"+sIdAction
            else:
                for x in re.finditer(r"\\(\d+)", sMsg):
                    if int(x.group(1)) > nGroup:
                        print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)")
                if re.search("[.]\\w+[(]", sMsg):
                    print("# Error in message at line " + sIdAction + ":  This message looks like code. Line should begin with =")
            
    if sAction[0:1] == "=" or cAction == "=":
        if "define" in sAction and not re.search(r"define\(\\\d+ *, *\[.*\] *\)", sAction):
            print("# Error in action at line " + sIdAction + ": second argument for define must be a list of strings")
        sAction = prepareFunction(sAction)
        for x in re.finditer("group[(](\d+)[)]", sAction):
            if int(x.group(1)) > nGroup:
                print("# Error in groups in replacement at line " + sIdAction + " ("+str(nGroup)+" groups only)")
    else:
        for x in re.finditer(r"\\(\d+)", sAction):
            if int(x.group(1)) > nGroup:
                print("# Error in groups in replacement at line " + sIdAction + " ("+str(nGroup)+" groups only)")
        if re.search("[.]\\w+[(]|sugg\\w+[(]", sAction):







|














|







111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
            mURL = re.search("[|] *(https?://.*)", sMsg)
            if mURL:
                sURL = mURL.group(1).strip()
                sMsg = sMsg[:mURL.start(0)].strip()
            if sMsg[0:1] == "=":
                sMsg = prepareFunction(sMsg[1:])
                lFUNCTIONS.append(("g_m_"+sIdAction, sMsg))
                for x in re.finditer("group[(](\\d+)[)]", sMsg):
                    if int(x.group(1)) > nGroup:
                        print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)")
                sMsg = "=g_m_"+sIdAction
            else:
                for x in re.finditer(r"\\(\d+)", sMsg):
                    if int(x.group(1)) > nGroup:
                        print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)")
                if re.search("[.]\\w+[(]", sMsg):
                    print("# Error in message at line " + sIdAction + ":  This message looks like code. Line should begin with =")
            
    if sAction[0:1] == "=" or cAction == "=":
        if "define" in sAction and not re.search(r"define\(\\\d+ *, *\[.*\] *\)", sAction):
            print("# Error in action at line " + sIdAction + ": second argument for define must be a list of strings")
        sAction = prepareFunction(sAction)
        for x in re.finditer("group[(](\\d+)[)]", sAction):
            if int(x.group(1)) > nGroup:
                print("# Error in groups in replacement at line " + sIdAction + " ("+str(nGroup)+" groups only)")
    else:
        for x in re.finditer(r"\\(\d+)", sAction):
            if int(x.group(1)) > nGroup:
                print("# Error in groups in replacement at line " + sIdAction + " ("+str(nGroup)+" groups only)")
        if re.search("[.]\\w+[(]|sugg\\w+[(]", sAction):
265
266
267
268
269
270
271

























272
273
274
275
276
277
278
279
280
281
282
    # Graph creation
    for e in lPreparedRule:
        print(e)

    oDARG = darg.DARG(lPreparedRule, sLang)
    oRuleGraph = oDARG.createGraph()


























    # Result
    d = {
        "graph_callables": None,
        "graph_gctests": None,
        "rules_graph": oRuleGraph,
        "rules_actions": dACTIONS
    }

    return d









>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


|








265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
    # Graph creation
    for e in lPreparedRule:
        print(e)

    oDARG = darg.DARG(lPreparedRule, sLang)
    oRuleGraph = oDARG.createGraph()

    # creating file with all functions callable by rules
    print("  creating callables...")
    sPyCallables = "# generated code, do not edit\n"
    #sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n"
    for sFuncName, sReturn in lFUNCTIONS:
        if sFuncName.startswith("g_c_"): # condition
            sParams = "lToken, sCountry, bCondMemo"
        elif sFuncName.startswith("g_m_"): # message
            sParams = "lToken"
        elif sFuncName.startswith("g_s_"): # suggestion
            sParams = "lToken"
        elif sFuncName.startswith("g_p_"): # preprocessor
            sParams = "lToken"
        elif sFuncName.startswith("g_d_"): # disambiguator
            sParams = "lToken"
        else:
            print("# Unknown function type in [" + sFuncName + "]")
            continue
        sPyCallables += "def {} ({}):\n".format(sFuncName, sParams)
        sPyCallables += "    return " + sReturn + "\n"
        #sJSCallables += "    {}: function ({})".format(sFuncName, sParams) + " {\n"
        #sJSCallables += "        return " + jsconv.py2js(sReturn) + ";\n"
        #sJSCallables += "    },\n"
    #sJSCallables += "}\n"

    # Result
    d = {
        "graph_callables": sPyCallables,
        "graph_gctests": None,
        "rules_graph": oRuleGraph,
        "rules_actions": dACTIONS
    }

    return d


Modified gc_core/py/lang_core/gc_engine.py from [29b43c054f] to [070a603bc6].

74
75
76
77
78
79
80



81
82
83
84
85
86
87
    for iStart, iEnd in _getSentenceBoundaries(sText):
        if 4 < (iEnd - iStart) < 2000:
            dDA.clear()
            try:
                # regex parser
                _, errs = _proofread(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dDA, dPriority, sCountry, dOpt, bDebug, bContext)
                aErrors.update(errs)



            except:
                raise
    return aErrors.values() # this is a view (iterable)


def _getSentenceBoundaries (sText):
    iStart = _zBeginOfParagraph.match(sText).end()







>
>
>







74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
    for iStart, iEnd in _getSentenceBoundaries(sText):
        if 4 < (iEnd - iStart) < 2000:
            dDA.clear()
            try:
                # regex parser
                _, errs = _proofread(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dDA, dPriority, sCountry, dOpt, bDebug, bContext)
                aErrors.update(errs)
                # token parser
                oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, dPriority, sCountry, dOpt, bDebug, bContext)
                oSentence.parse()
            except:
                raise
    return aErrors.values() # this is a view (iterable)


def _getSentenceBoundaries (sText):
    iStart = _zBeginOfParagraph.match(sText).end()
564
565
566
567
568
569
570






































































































































































































































${plugins}


#### CALLABLES (generated code)

${callables}












































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802

${plugins}


#### CALLABLES (generated code)

${callables}



#### TOKEN SENTENCE CHECKER

class TokenSentence:

    def __init__ (self, sSentence, sSentence0, iStart, dPriority, sCountry, dOpt, bDebug, bContext):
        self.sSentence = sSentence
        self.sSentence0 = sSentence0
        self.iStart = iStart
        self.lToken = list(_oTokenizer.genTokens(sSentence))

    def parse (self):
        dErr = {}
        lPointer = []
        for dToken in self.lToken:
            for i, dPointer in enumerate(lPointer):
                bValid = False
                for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]):
                    dPointer["nOffset"] = dToken["i"]
                    dPointer["dNode"] = dNode
                    bValid = True
                if not bValid:
                    del lPointer[i]
            for dNode in self._getNextMatchingNodes(dToken, dGraph):
                lPointer.append({"nOffset": 0, "dNode": dNode})
            for dPointer in lPointer:
                if "<rules>" in dPointer["dNode"]:
                    for dNode in dGraph[dPointer["dNode"]["<rules>"]]:
                        dErr = self._executeActions(dNode, nOffset)
        return dErr

    def _getNextMatchingNodes (self, dToken, dNode):
        # token value
        if dToken["sValue"] in dNode:
            yield dGraph[dNode[dToken["sValue"]]]
        # token lemmas
        for sLemma in _oSpellChecker.getLemma(dToken["sValue"]):
            if sLemma in dNode:
                yield dGraph[dNode[sLemma]]
        # universal arc
        if "*" in dNode:
            yield dGraph[dNode["*"]]
        # regex arcs
        if "~" in dNode:
            for sRegex in dNode["~"]:
                for sMorph in _oSpellChecker.getMorph(dToken["sValue"]):
                    if re.search(sRegex, sMorph):
                        yield dGraph[dNode["~"][sRegex]]

    def _executeActions (self, dNode, nOffset):
        for sLineId, nextNodeKey in dNode.items():
            for sArc in dGraph[nextNodeKey]:
                print(sArc)
                bCondMemo = None
                sFuncCond, cActionType, sWhat, *eAct = dRule[sArc]
                # action in lActions: [ condition, action type, replacement/suggestion/action[, iGroupStart, iGroupEnd[, message, URL]] ]
                try:
                    bCondMemo = not sFuncCond or globals()[sFuncCond](self, sCountry, bCondMemo)
                    if bCondMemo:
                        if cActionType == "-":
                            # grammar error
                            print("-")
                            nErrorStart = nSentenceOffset + m.start(eAct[0])
                            nErrorEnd = nSentenceOffset + m.start(eAct[1])
                            if nErrorStart not in dErrs or nPriority > dPriority[nErrorStart]:
                                dErrs[nErrorStart] = _createError(self, sWhat, nErrorStart, nErrorEnd, sLineId, bUppercase, eAct[2], eAct[3], bIdRule, sOption, bContext)
                                dPriority[nErrorStart] = nPriority
                        elif cActionType == "~":
                            # text processor
                            print("~")
                            self._rewrite(sWhat, nErrorStart, nErrorEnd)
                        elif cActionType == "@":
                            # jump
                            print("@")
                            self._jump(sWhat)
                        elif cActionType == "=":
                            # disambiguation
                            print("=")
                            globals()[sWhat](self.lToken)
                        elif cActionType == ">":
                            # we do nothing, this test is just a condition to apply all following actions
                            print(">")
                            pass
                        else:
                            print("# error: unknown action at " + sLineId)
                    elif cActionType == ">":
                        break
                except Exception as e:
                    raise Exception(str(e), "# " + sLineId + " # " + sRuleId)

    def _createWriterError (self):
        d = {}
        return d

    def _createDictError (self):
        d = {}
        return d

    def _rewrite (self, sWhat, nErrorStart, nErrorEnd):
        "text processor: rewrite tokens between <nErrorStart> and <nErrorEnd> position"
        lTokenValue = sWhat.split("|")
        if len(lTokenValue) != (nErrorEnd - nErrorStart + 1):
            print("Error. Text processor: number of replacements != number of tokens.")
            return
        for i, sValue in zip(range(nErrorStart, nErrorEnd+1), lTokenValue):
            self.lToken[i]["sValue"] = sValue

    def _jump (self, sWhat):
        try:
            nFrom, nTo = sWhat.split(">")
            self.lToken[int(nFrom)]["iJump"] = int(nTo)
        except:
            print("# Error. Jump failed: ", sWhat)
            traceback.print_exc()
            return


#### Analyse tokens

def g_morph (dToken, sPattern, bStrict=True):
    "analyse a token, return True if <sPattern> in morphologies"
    if "lMorph" in dToken:
        lMorph = dToken["lMorph"]
    else:
        lMorph = _oSpellChecker.getMorph(dToken["sValue"])
        if not lMorph:
            return False
    zPattern = re.compile(sPattern)
    if bStrict:
        return all(zPattern.search(sMorph)  for sMorph in lMorph)
    return any(zPattern.search(sMorph)  for sMorph in lMorph)

def g_morphex (dToken, sPattern, sNegPattern):
    "analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies"
    if "lMorph" in dToken:
        lMorph = dToken["lMorph"]
    else:
        lMorph = _oSpellChecker.getMorph(dToken["sValue"])
        if not lMorph:
            return False
    # check negative condition
    zNegPattern = re.compile(sNegPattern)
    if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
        return False
    # search sPattern
    zPattern = re.compile(sPattern)
    return any(zPattern.search(sMorph)  for sMorph in lMorph)

def g_analyse (dToken, sPattern, bStrict=True):
    "analyse a token, return True if <sPattern> in morphologies (disambiguation off)"
    lMorph = _oSpellChecker.getMorph(dToken["sValue"])
    if not lMorph:
        return False
    zPattern = re.compile(sPattern)
    if bStrict:
        return all(zPattern.search(sMorph)  for sMorph in lMorph)
    return any(zPattern.search(sMorph)  for sMorph in lMorph)


def g_analysex (dToken, sPattern, sNegPattern):
    "analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies (disambiguation off)"
    lMorph = _oSpellChecker.getMorph(dToken["sValue"])
    if not lMorph:
        return False
    # check negative condition
    zNegPattern = re.compile(sNegPattern)
    if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
        return False
    # search sPattern
    zPattern = re.compile(sPattern)
    return any(zPattern.search(sMorph)  for sMorph in lMorph)


#### Go outside the rule scope

def g_nextToken (i):
    pass

def g_prevToken (i):
    pass

def g_look ():
    pass

def g_lookAndCheck ():
    pass


#### Disambiguator

def g_select (dToken, sPattern, lDefault=None):
    "select morphologies for <dToken> according to <sPattern>, always return True"
    lMorph = dToken["lMorph"]  if "lMorph" in dToken  else _oSpellChecker.getMorph(dToken["sValue"])
    if not lMorph or len(lMorph) == 1:
        return True
    lSelect = [ sMorph  for sMorph in lMorph  if re.search(sPattern, sMorph) ]
    if lSelect:
        if len(lSelect) != len(lMorph):
            dToken["lMorph"] = lSelect
    elif lDefault:
        dToken["lMorph"] = lDefault
    return True


def g_exclude (dToken, sPattern, lDefault=None):
    "select morphologies for <dToken> according to <sPattern>, always return True"
    lMorph = dToken["lMorph"]  if "lMorph" in dToken  else _oSpellChecker.getMorph(dToken["sValue"])
    if not lMorph or len(lMorph) == 1:
        return True
    lSelect = [ sMorph  for sMorph in lMorph  if not re.search(sPattern, sMorph) ]
    if lSelect:
        if len(lSelect) != len(lMorph):
            dToken["lMorph"] = lSelect
    elif lDefault:
        dToken["lMorph"] = lDefault
    return True


def g_define (dToken, lMorph):
    "set morphologies of <dToken>, always return True"
    dToken["lMorph"] = lMorph
    return True


#### CALLABLES (generated code)

${graph_callables}

Modified gc_core/py/lang_core/gc_sentence.py from [90cbca3aed] to [c68dc1622f].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# Sentence checker

from ..graphspell.tokenizer import Tokenizer
from .gc_graph import dGraph


oTokenizer = Tokenizer("${lang}")


class Sentence:

    def __init__ (self, sSentence, sSentence0, nOffset):
        self.sSentence = sSentence
        self.sSentence0 = sSentence0
        self.nOffset = nOffset
        self.lToken = list(oTokenizer.genTokens())




|





|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# Sentence checker

from ..graphspell.tokenizer import Tokenizer
from .gc_rules_graph import dGraph


oTokenizer = Tokenizer("${lang}")


class TokenSentence:

    def __init__ (self, sSentence, sSentence0, nOffset):
        self.sSentence = sSentence
        self.sSentence0 = sSentence0
        self.nOffset = nOffset
        self.lToken = list(oTokenizer.genTokens())

28
29
30
31
32
33
34
35
36
37
38

39
40

41
42



43

44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96






97
98



99



100
101
102
103



104
105









106
107



108
109



110



111



112
113
114
115








116
















117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133








134





135
136














137
138




139
140

141

                if not bValid:
                    del lPointer[i]
            for dNode in self._getNextMatchingNodes(dToken, dGraph):
                lPointer.append({"nOffset": 0, "dNode": dNode})
            for dPointer in lPointer:
                if "<rules>" in dPointer["dNode"]:
                    for dNode in dGraph[dPointer["dNode"]["<rules>"]]:
                        dErr = self._executeActions(dNode)
        return dErr

    def _getNextMatchingNodes (self, dToken, dNode):

        if dToken["sValue"] in dNode:
            yield dGraph[dNode[dToken["sValue"]]]

        for sLemma in dToken["sLemma"]:
            if sLemma in dNode:



                yield dGraph[dNode[dToken["sValue"]]]

        if "~" in dNode:
            for sRegex in dNode["~"]:
                for sMorph in dToken["lMorph"]:
                    if re.search(sRegex, sMorph):
                        yield dGraph[dNode["~"][sRegex]]

    def _executeActions (self, dNode):
        for sLineId, nextNodeKey in dNode.items():
            for sArc in dGraph[nextNodeKey]:
                bCondMemo = None
                sFuncCond, cActionType, sWhat, *eAct = dRule[sArc]
                # action in lActions: [ condition, action type, replacement/suggestion/action[, iGroupStart, iGroupEnd[, message, URL]] ]
                try:
                    bCondMemo = not sFuncCond or globals()[sFuncCond](self, dDA, sCountry, bCondMemo)
                    if bCondMemo:
                        if cActionType == "-":
                            # grammar error
                            nErrorStart = nSentenceOffset + m.start(eAct[0])
                            nErrorEnd = nSentenceOffset + m.start(eAct[1])
                            if nErrorStart not in dErrs or nPriority > dPriority[nErrorStart]:
                                dErrs[nErrorStart] = _createError(self, sWhat, nErrorStart, nErrorEnd, sLineId, bUppercase, eAct[2], eAct[3], bIdRule, sOption, bContext)
                                dPriority[nErrorStart] = nPriority
                        elif cActionType == "~":
                            # text processor
                            self.lToken = _rewrite(self, sWhat, nErrorStart, nErrorEnd, bUppercase)
                            bChange = True
                        elif cActionType == "@":
                            # text processor
                            self.lToken = _rewrite(self, sWhat, nErrorStart, nErrorEnd, bUppercase)
                            bChange = True
                        elif cActionType == "=":
                            # disambiguation
                            globals()[sWhat](self, dDA)
                        elif cActionType == ">":
                            # we do nothing, this test is just a condition to apply all following actions
                            pass
                        else:
                            echo("# error: unknown action at " + sLineId)
                    elif cActionType == ">":
                        break
                except Exception as e:
                    raise Exception(str(e), "# " + sLineId + " # " + sRuleId)

    def _createWriterError (self):
        d = {}
        return d

    def _createDictError (self):
        d = {}
        return d


#### Common functions







def option ():



    pass





#### Analyse tokens




def morph ():
    pass










def morphex ():



    pass




def analyse ():



    pass




def analysex ():
    pass


























#### Go outside scope

def nextToken ():
    pass

def prevToken ():
    pass

def look ():
    pass

def lookAndCheck ():
    pass


#### Disambiguator









def select ():





    pass















def exclude ():
    pass





def define ():

    pass








|



>


>
|

>
>
>
|
>






|






|










|
<

|
<
|


|




|













|
|
>
>
>
>
>
>

|
>
>
>
|
>
>
>




>
>
>
|
|
>
>
>
>
>
>
>
>
>

|
>
>
>
|
|
>
>
>
|
>
>
>
|
>
>
>

|
<
|
>
>
>
>
>
>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|

|


|


|


|





>
>
>
>
>
>
>
>
|
>
>
>
>
>
|

>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
>
>
>
>

|
>
|
>
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74

75
76

77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153

154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
                if not bValid:
                    del lPointer[i]
            for dNode in self._getNextMatchingNodes(dToken, dGraph):
                lPointer.append({"nOffset": 0, "dNode": dNode})
            for dPointer in lPointer:
                if "<rules>" in dPointer["dNode"]:
                    for dNode in dGraph[dPointer["dNode"]["<rules>"]]:
                        dErr = self._executeActions(dNode, nOffset)
        return dErr

    def _getNextMatchingNodes (self, dToken, dNode):
        # token value
        if dToken["sValue"] in dNode:
            yield dGraph[dNode[dToken["sValue"]]]
        # token lemmas
        for sLemma in dToken["lLemma"]:
            if sLemma in dNode:
                yield dGraph[dNode[sLemma]]
        # universal arc
        if "*" in dNode:
            yield dGraph[dNode["*"]]
        # regex arcs
        if "~" in dNode:
            for sRegex in dNode["~"]:
                for sMorph in dToken["lMorph"]:
                    if re.search(sRegex, sMorph):
                        yield dGraph[dNode["~"][sRegex]]

    def _executeActions (self, dNode, nOffset):
        for sLineId, nextNodeKey in dNode.items():
            for sArc in dGraph[nextNodeKey]:
                bCondMemo = None
                sFuncCond, cActionType, sWhat, *eAct = dRule[sArc]
                # action in lActions: [ condition, action type, replacement/suggestion/action[, iGroupStart, iGroupEnd[, message, URL]] ]
                try:
                    bCondMemo = not sFuncCond or globals()[sFuncCond](self, sCountry, bCondMemo)
                    if bCondMemo:
                        if cActionType == "-":
                            # grammar error
                            nErrorStart = nSentenceOffset + m.start(eAct[0])
                            nErrorEnd = nSentenceOffset + m.start(eAct[1])
                            if nErrorStart not in dErrs or nPriority > dPriority[nErrorStart]:
                                dErrs[nErrorStart] = _createError(self, sWhat, nErrorStart, nErrorEnd, sLineId, bUppercase, eAct[2], eAct[3], bIdRule, sOption, bContext)
                                dPriority[nErrorStart] = nPriority
                        elif cActionType == "~":
                            # text processor
                            self._rewrite(sWhat, nErrorStart, nErrorEnd)

                        elif cActionType == "@":
                            # jump

                            self._jump(sWhat)
                        elif cActionType == "=":
                            # disambiguation
                            globals()[sWhat](self.lToken)
                        elif cActionType == ">":
                            # we do nothing, this test is just a condition to apply all following actions
                            pass
                        else:
                            print("# error: unknown action at " + sLineId)
                    elif cActionType == ">":
                        break
                except Exception as e:
                    raise Exception(str(e), "# " + sLineId + " # " + sRuleId)

    def _createWriterError (self):
        d = {}
        return d

    def _createDictError (self):
        d = {}
        return d

    def _rewrite (self, sWhat, nErrorStart, nErrorEnd):
        "text processor: rewrite tokens between <nErrorStart> and <nErrorEnd> position"
        lTokenValue = sWhat.split("|")
        if len(lTokenValue) != (nErrorEnd - nErrorStart + 1):
            print("Error. Text processor: number of replacements != number of tokens.")
            return
        for i, sValue in zip(range(nErrorStart, nErrorEnd+1), lTokenValue):
            self.lToken[i]["sValue"] = sValue

    def _jump (self, sWhat):
        try:
            nFrom, nTo = sWhat.split(">")
            self.lToken[int(nFrom)]["iJump"] = int(nTo)
        except:
            print("# Error. Jump failed: ", sWhat)
            traceback.print_exc()
            return


#### Analyse tokens

def g_morph (dToken, sPattern, bStrict=True):
    "analyse a token, return True if <sPattern> in morphologies"
    if "lMorph" in dToken:
        lMorph = dToken["lMorph"]
    else:
        if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]):
            return False
        if not _dAnalyses[dToken["sValue"]]:
            return False
        lMorph = _dAnalyses[dToken["sValue"]]
    zPattern = re.compile(sPattern)
    if bStrict:
        return all(zPattern.search(sMorph)  for sMorph in lMorph)
    return any(zPattern.search(sMorph)  for sMorph in lMorph)

def g_morphex (dToken, sPattern, sNegPattern):
    "analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies"
    if "lMorph" in dToken:
        lMorph = dToken["lMorph"]
    else:
        if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]):
            return False
        if not _dAnalyses[dToken["sValue"]]:
            return False
        lMorph = _dAnalyses[dToken["sValue"]]
    # check negative condition
    zNegPattern = re.compile(sNegPattern)
    if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
        return False
    # search sPattern
    zPattern = re.compile(sPattern)
    return any(zPattern.search(sMorph)  for sMorph in lMorph)

def g_analyse (dToken, sPattern, bStrict=True):

    "analyse a token, return True if <sPattern> in morphologies (disambiguation off)"
    if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]):
        return False
    if not _dAnalyses[dToken["sValue"]]:
        return False
    zPattern = re.compile(sPattern)
    if bStrict:
        return all(zPattern.search(sMorph)  for sMorph in _dAnalyses[dToken["sValue"]])
    return any(zPattern.search(sMorph)  for sMorph in _dAnalyses[dToken["sValue"]])


def g_analysex (dToken, sPattern, sNegPattern):
    "analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies (disambiguation off)"
    if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]):
        return False
    if not _dAnalyses[dToken["sValue"]]:
        return False
    # check negative condition
    zNegPattern = re.compile(sNegPattern)
    if any(zNegPattern.search(sMorph)  for sMorph in _dAnalyses[dToken["sValue"]]):
        return False
    # search sPattern
    zPattern = re.compile(sPattern)
    return any(zPattern.search(sMorph)  for sMorph in _dAnalyses[dToken["sValue"]])


#### Go outside the rule scope

def g_nextToken (i):
    pass

def g_prevToken (i):
    pass

def g_look ():
    pass

def g_lookAndCheck ():
    pass


#### Disambiguator

def g_select (dToken, sPattern, lDefault=None):
    "select morphologies for <dToken> according to <sPattern>, always return True"
    if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]):
        return True
    if len(_dAnalyses[dToken["sValue"]]) == 1:
        return True
    lMorph = dToken["lMorph"] or _dAnalyses[dToken["sValue"]]
    lSelect = [ sMorph  for sMorph in lMorph  if re.search(sPattern, sMorph) ]
    if lSelect:
        if len(lSelect) != len(lMorph):
            dToken["lMorph"] = lSelect
    elif lDefault:
        dToken["lMorph"] = lDefault
    return True


def g_exclude (dToken, sPattern, lDefault=None):
    "select morphologies for <dToken> according to <sPattern>, always return True"
    if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]):
        return True
    if len(_dAnalyses[dToken["sValue"]]) == 1:
        return True
    lMorph = dToken["lMorph"] or _dAnalyses[dToken["sValue"]]
    lSelect = [ sMorph  for sMorph in lMorph  if not re.search(sPattern, sMorph) ]
    if lSelect:
        if len(lSelect) != len(lMorph):
            dToken["lMorph"] = lSelect
    elif lDefault:
        dToken["lMorph"] = lDefault
    return True


def g_define (dToken, lMorph):
    "set morphologies of <dToken>, always return True"
    dToken["lMorph"] = lMorph
    return True


#### CALLABLES (generated code)

${graph_callables}