Grammalecte  Check-in [7c742b5359]

Overview
Comment:[build][core] graph parser update
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | build | rg
Files: files | file ages | folders
SHA3-256: 7c742b5359f19425564d3f6ed2713e6b393fd77d15fcef2c1d547de4e2f77705
User & Date: olr on 2018-05-25 20:07:58
Other Links: branch diff | manifest | tags
Context
2018-05-29
16:17
[core] gc engine update check-in: c06b45b671 user: olr tags: core, rg
2018-05-25
20:07
[build][core] graph parser update check-in: 7c742b5359 user: olr tags: core, build, rg
12:14
[build][core] tests check-in: ac09d7cc19 user: olr tags: core, build, rg
Changes

Modified gc_core/py/lang_core/gc_engine.py from [db19c73d82] to [b7c579e4d3].

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
from itertools import chain

from ..graphspell.spellchecker import SpellChecker
from ..graphspell.echo import echo
from . import gc_options

from ..graphspell.tokenizer import Tokenizer
from .gc_rules_graph import dGraph


__all__ = [ "lang", "locales", "pkg", "name", "version", "author", \
            "load", "parse", "getSpellChecker", \
            "setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \
            "ignoreRule", "resetIgnoreRules", "reactivateRule", "listRules", "displayRules" ]








|







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
from itertools import chain

from ..graphspell.spellchecker import SpellChecker
from ..graphspell.echo import echo
from . import gc_options

from ..graphspell.tokenizer import Tokenizer
from .gc_rules_graph import dGraph, dRule


__all__ = [ "lang", "locales", "pkg", "name", "version", "author", \
            "load", "parse", "getSpellChecker", \
            "setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \
            "ignoreRule", "resetIgnoreRules", "reactivateRule", "listRules", "displayRules" ]

584
585
586
587
588
589
590


591
592

593

594
595



596
597
598


599
600

601
602
603
604


605
606
607
608
609

610
611
612
613
614

615
616
617

618
619
620
621
622

623
624
625
626
627
628

629
630
631

632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
        self.iStart = iStart
        self.lToken = list(_oTokenizer.genTokens(sSentence))

    def parse (self):
        dErr = {}
        lPointer = []
        for dToken in self.lToken:


            for i, dPointer in enumerate(lPointer):
                bValid = False

                for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]):

                    dPointer["nOffset"] = dToken["i"]
                    dPointer["dNode"] = dNode



                    bValid = True
                if not bValid:
                    del lPointer[i]


            for dNode in self._getNextMatchingNodes(dToken, dGraph):
                lPointer.append({"nOffset": 0, "dNode": dNode})

            for dPointer in lPointer:
                if "<rules>" in dPointer["dNode"]:
                    for dNode in dGraph[dPointer["dNode"]["<rules>"]]:
                        dErr = self._executeActions(dNode, nOffset)


        return dErr

    def _getNextMatchingNodes (self, dToken, dNode):
        # token value
        if dToken["sValue"] in dNode:

            yield dGraph[dNode[dToken["sValue"]]]
        # token lemmas
        if "<lemmas>" in dNode:
            for sLemma in _oSpellChecker.getLemma(dToken["sValue"]):
                if sLemma in dNode["<lemmas>"]:

                    yield dGraph[dNode["<lemmas>"][sLemma]]
        # universal arc
        if "*" in dNode:

            yield dGraph[dNode["*"]]
        # regex value arcs
        if "<re_value>" in dNode:
            for sRegex in dNode["<re_value>"]:
                if re.search(sRegex, dToken["sValue"]):

                    yield dGraph[dNode["<re_value>"][sRegex]]
        # regex morph arcs
        if "<re_morph>" in dNode:
            for sRegex in dNode["<re_morph>"]:
                for sMorph in _oSpellChecker.getMorph(dToken["sValue"]):
                    if re.search(sRegex, sMorph):

                        yield dGraph[dNode["<re_morph>"][sRegex]]

    def _executeActions (self, dNode, nOffset):

        for sLineId, nextNodeKey in dNode.items():
            for sArc in dGraph[nextNodeKey]:
                print(sArc)
                bCondMemo = None
                sFuncCond, cActionType, sWhat, *eAct = dRule[sArc]
                # action in lActions: [ condition, action type, replacement/suggestion/action[, iGroupStart, iGroupEnd[, message, URL]] ]
                try:
                    bCondMemo = not sFuncCond or globals()[sFuncCond](self, sCountry, bCondMemo)
                    if bCondMemo:
                        if cActionType == "-":
                            # grammar error
                            print("-")
                            nErrorStart = nSentenceOffset + m.start(eAct[0])
                            nErrorEnd = nSentenceOffset + m.start(eAct[1])
                            if nErrorStart not in dErrs or nPriority > dPriority[nErrorStart]:
                                dErrs[nErrorStart] = _createError(self, sWhat, nErrorStart, nErrorEnd, sLineId, bUppercase, eAct[2], eAct[3], bIdRule, sOption, bContext)
                                dPriority[nErrorStart] = nPriority
                        elif cActionType == "~":
                            # text processor
                            print("~")
                            self._rewrite(sWhat, nErrorStart, nErrorEnd)







>
>


>

>
|
|
>
>
>



>
>
|

>


<
|
>
>





>





>



>





>






>



>












|
|







584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612

613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
        self.iStart = iStart
        self.lToken = list(_oTokenizer.genTokens(sSentence))

    def parse (self):
        dErr = {}
        lPointer = []
        for dToken in self.lToken:
            # check arcs for each existing pointer
            lNewPointer = []
            for i, dPointer in enumerate(lPointer):
                bValid = False
                bFirst = True
                for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]):
                    if bFirst:
                        dPointer["nOffset"] = dToken["i"]
                        dPointer["dNode"] = dNode
                    else:
                        lNewPointer.append({"nOffset": dPointer["nOffset"], "dNode": dNode})
                    bFirst = False
                    bValid = True
                if not bValid:
                    del lPointer[i]
            lPointer.extend(lNewPointer)
            # check arcs of first nodes
            for dNode in self._getNextMatchingNodes(dToken, dGraph[0]):
                lPointer.append({"nOffset": 0, "dNode": dNode})
            # check if there is rules to check for each pointer
            for dPointer in lPointer:
                if "<rules>" in dPointer["dNode"]:

                    dErr = self._executeActions(dPointer["dNode"]["<rules>"], dPointer["nOffset"])
        if dErr:
            print(dErr)
        return dErr

    def _getNextMatchingNodes (self, dToken, dNode):
        # token value
        if dToken["sValue"] in dNode:
            print("value found: ", dToken["sValue"])
            yield dGraph[dNode[dToken["sValue"]]]
        # token lemmas
        if "<lemmas>" in dNode:
            for sLemma in _oSpellChecker.getLemma(dToken["sValue"]):
                if sLemma in dNode["<lemmas>"]:
                    print("lemma found: ", sLemma)
                    yield dGraph[dNode["<lemmas>"][sLemma]]
        # universal arc
        if "*" in dNode:
            print("generic arc")
            yield dGraph[dNode["*"]]
        # regex value arcs
        if "<re_value>" in dNode:
            for sRegex in dNode["<re_value>"]:
                if re.search(sRegex, dToken["sValue"]):
                    print("value regex matching: ", sRegex)
                    yield dGraph[dNode["<re_value>"][sRegex]]
        # regex morph arcs
        if "<re_morph>" in dNode:
            for sRegex in dNode["<re_morph>"]:
                for sMorph in _oSpellChecker.getMorph(dToken["sValue"]):
                    if re.search(sRegex, sMorph):
                        print("morph regex matching: ", sRegex)
                        yield dGraph[dNode["<re_morph>"][sRegex]]

    def _executeActions (self, dNode, nOffset):
        dErrs = {}
        for sLineId, nextNodeKey in dNode.items():
            for sArc in dGraph[nextNodeKey]:
                print(sArc)
                bCondMemo = None
                sFuncCond, cActionType, sWhat, *eAct = dRule[sArc]
                # action in lActions: [ condition, action type, replacement/suggestion/action[, iGroupStart, iGroupEnd[, message, URL]] ]
                try:
                    bCondMemo = not sFuncCond or globals()[sFuncCond](self, sCountry, bCondMemo)
                    if bCondMemo:
                        if cActionType == "-":
                            # grammar error
                            print("-")
                            nErrorStart = self.iStart + self.lToken[eAct[0]]["nStart"]
                            nErrorEnd = self.iStart + self.lToken[eAct[1]]["nEnd"]
                            if nErrorStart not in dErrs or nPriority > dPriority[nErrorStart]:
                                dErrs[nErrorStart] = _createError(self, sWhat, nErrorStart, nErrorEnd, sLineId, bUppercase, eAct[2], eAct[3], bIdRule, sOption, bContext)
                                dPriority[nErrorStart] = nPriority
                        elif cActionType == "~":
                            # text processor
                            print("~")
                            self._rewrite(sWhat, nErrorStart, nErrorEnd)
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
                            print(">")
                            pass
                        else:
                            print("# error: unknown action at " + sLineId)
                    elif cActionType == ">":
                        break
                except Exception as e:
                    raise Exception(str(e), "# " + sLineId + " # " + sRuleId)

    def _createWriterError (self):
        d = {}
        return d

    def _createDictError (self):
        d = {}
        return d

    def _rewrite (self, sWhat, nErrorStart, nErrorEnd):
        "text processor: rewrite tokens between <nErrorStart> and <nErrorEnd> position"
        lTokenValue = sWhat.split("|")
        if len(lTokenValue) != (nErrorEnd - nErrorStart + 1):
            print("Error. Text processor: number of replacements != number of tokens.")
            return







|
<
<
<
|
<
<
<
<







680
681
682
683
684
685
686
687



688




689
690
691
692
693
694
695
                            print(">")
                            pass
                        else:
                            print("# error: unknown action at " + sLineId)
                    elif cActionType == ">":
                        break
                except Exception as e:
                    raise Exception(str(e), sLineId)



        return dErrs





    def _rewrite (self, sWhat, nErrorStart, nErrorEnd):
        "text processor: rewrite tokens between <nErrorStart> and <nErrorEnd> position"
        lTokenValue = sWhat.split("|")
        if len(lTokenValue) != (nErrorEnd - nErrorStart + 1):
            print("Error. Text processor: number of replacements != number of tokens.")
            return

Modified gc_lang/fr/rules_graph.grx from [18deb74635] to [6747fdf087].

35
36
37
38
39
40
41
42
43
44
45

46
47
48
49
50
51
52
53
54
55
56
57



58


# Fin d’interprétation du fichier avec une ligne commençant par #END

# ERREURS COURANTES
# http://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Fautes_d%27orthographe/Courantes


__rule1__
    les  ~:N:.:s
    des  ~:N:.:s
    ces  ~:N:.:s

        <<-  -1>> acquit                        # Message0|http://test.grammalecte.net

__rule2__
    ci important que soi
    ci vraiment il y a
    ci pour ça
        <<- morph(\2, ":[WAR]", False) -1>> si   # Message1|http://test.grammalecte.net

__rule3__
    contre ([nature|pétrie|action]) par ([ennui|sélection])
        <<- morph(\1, "xxxx") -1:2>> =\1+\2     # Message2|http://test.grammalecte.org
        <<-  ~1>> hyper|fonction












|
|
|
|
>
|

|
<
|
|
|

|
|
|
|
>
>
>

>
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49

50
51
52
53
54
55
56
57
58
59
60
61
62

# Fin d’interprétation du fichier avec une ligne commençant par #END

# ERREURS COURANTES
# http://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Fautes_d%27orthographe/Courantes


__avoir_confiance_en__
    >avoir confiance (dans) [moi|toi|soi|lui|elle|nous|vous|eux|elles]
        <<-  -1>> en                                                                                # Avoir confiance en quelqu’un ou quelque chose.|http://grammalecte.net

TEST: Elle avait confiance {{dans}} lui.


__code_legacy__

    legacy code
    code legacy
        <<- -1:2>> code hérité|code reliquat                                                        # Anglicisme superflu.

TEST: c’est du {{legacy code}}.
TEST: ce {{code legacy}} est un cauchemar


__être_en_xxxx__
    [>être|>rester|>demeurer] an [désaccord|accord]
        <<- -2>> en                                                                                 # Confusion. Un an = une année.

TEST: Je suis {{an}} désaccord avec lui.

Modified make.py from [b6664e27ed] to [5704755499].

229
230
231
232
233
234
235

236

237

238
239
240
241
242
243
244
            helpers.copyAndFileTemplate(spLang+"/modules/"+sf, spLangPack+"/"+sf, dVars)
            print(sf, end=", ")
    print()

    # TEST FILES
    with open("grammalecte/"+sLang+"/gc_test.txt", "w", encoding="utf-8", newline="\n") as hDstPy:
        hDstPy.write("# TESTS FOR LANG [" + sLang + "]\n\n")

        hDstPy.write(dVars['regex_gctests'])

        hDstPy.write(dVars['graph_gctests'])


    createOXT(spLang, dVars, xConfig._sections['oxt'], spLangPack, bInstallOXT)

    createServerOptions(sLang, dVars)
    createPackageZip(sLang, dVars, spLangPack)

    #### JAVASCRIPT







>

>

>







229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
            helpers.copyAndFileTemplate(spLang+"/modules/"+sf, spLangPack+"/"+sf, dVars)
            print(sf, end=", ")
    print()

    # TEST FILES
    with open("grammalecte/"+sLang+"/gc_test.txt", "w", encoding="utf-8", newline="\n") as hDstPy:
        hDstPy.write("# TESTS FOR LANG [" + sLang + "]\n\n")
        hDstPy.write("# REGEX RULES\n\n")
        hDstPy.write(dVars['regex_gctests'])
        hDstPy.write("\n\n\n# GRAPH RULES\n\n")
        hDstPy.write(dVars['graph_gctests'])
        hDstPy.write("\n")

    createOXT(spLang, dVars, xConfig._sections['oxt'], spLangPack, bInstallOXT)

    createServerOptions(sLang, dVars)
    createPackageZip(sLang, dVars, spLangPack)

    #### JAVASCRIPT