Grammalecte  Check-in [5bf409e44a]

Overview
Comment:[core] spellchecking for error messages
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | core
Files: files | file ages | folders
SHA3-256: 5bf409e44afcfdbdd20f55444fcd38e7bb0f1ab82ef0aed44acc6d6a9ecd017b
User & Date: olr on 2020-11-23 20:44:24
Other Links: manifest | tags
Context
2020-11-24
21:27
[fr] corrections des messages check-in: b568531f02 user: olr tags: trunk, fr
2020-11-23
20:44
[core] spellchecking for error messages check-in: 5bf409e44a user: olr tags: trunk, core
20:43
[fr] phonet_simil: danse dance check-in: 07bd6248b1 user: olr tags: trunk, fr
Changes

Modified gc_core/py/lang_core/tests_core.py from [9e599f13ec] to [36a2155509].

60
61
62
63
64
65
66

67
68
69
70
71
72
73

    @classmethod
    def setUpClass (cls):
        gc_engine.load()
        cls._zError = re.compile(r"\{\{.*?\}\}")
        cls._zRuleEnd = re.compile(r"_a\d+_\d+$")
        cls._aTestedRules = set()


    def test_parse (self):
        zOption = re.compile("^__([a-zA-Z0-9]+)__ ")
        spHere, _ = os.path.split(__file__)
        spfParsingTest = os.path.join(spHere, "gc_test.txt")
        if not os.path.exists(spfParsingTest):
            print(f"No file <gc_test.txt> in <{spHere}>")







>







60
61
62
63
64
65
66
67
68
69
70
71
72
73
74

    @classmethod
    def setUpClass (cls):
        gc_engine.load()
        cls._zError = re.compile(r"\{\{.*?\}\}")
        cls._zRuleEnd = re.compile(r"_a\d+_\d+$")
        cls._aTestedRules = set()
        cls._oSpellChecker = gc_engine.getSpellChecker()

    def test_parse (self):
        zOption = re.compile("^__([a-zA-Z0-9]+)__ ")
        spHere, _ = os.path.split(__file__)
        spfParsingTest = os.path.join(spHere, "gc_test.txt")
        if not os.path.exists(spfParsingTest):
            print(f"No file <gc_test.txt> in <{spHere}>")
140
141
142
143
144
145
146
147


148
149
150
151
152
153


154
155
156
157
158
159
160
        for dErr in sorted(aErrs, key=lambda d: d["nStart"]):
            sRes = sRes[:dErr["nStart"]] + "~" * (dErr["nEnd"] - dErr["nStart"]) + sRes[dErr["nEnd"]:]
            sListErr += "    * {sLineId} / {sRuleId}  at  {nStart}:{nEnd}\n".format(**dErr)
            lAllSugg.append("|".join(dErr["aSuggestions"]))
            self._aTestedRules.add(dErr["sRuleId"].rstrip("0123456789"))
            # test messages
            if False:
                aMsgErrs = gc_engine.parse(purgeMessage(dErr["sMessage"]))


                if aMsgErrs or "<start>" in dErr["sMessage"] or "<end>" in dErr["sMessage"]:
                    aSelectedErrs = [ dMsgErr  for dMsgErr in sorted(aMsgErrs, key=lambda d: d["nStart"])  if self._zRuleEnd.sub("", dMsgErr["sRuleId"]) != self._zRuleEnd.sub("", dErr["sRuleId"]) ]
                    if aSelectedErrs:
                        print("\n# Error in: <" + dErr["sMessage"] + ">\n    " + dErr["sLineId"] + " / " + dErr["sRuleId"])
                        for dMsgErr in aSelectedErrs:
                            print("        error: {sLineId} / {sRuleId}  at  {nStart}:{nEnd}".format(**dMsgErr))


        return sRes, sListErr, "|||".join(lAllSugg)

    def _getExpectedErrors (self, sLine):
        sRes = " " * len(sLine)
        for i, m in enumerate(self._zError.finditer(sLine)):
            nStart = m.start() - (4 * i)
            nEnd = m.end() - (4 * (i+1))







|
>
>
|
<
<
|
|
|
>
>







141
142
143
144
145
146
147
148
149
150
151


152
153
154
155
156
157
158
159
160
161
162
163
        for dErr in sorted(aErrs, key=lambda d: d["nStart"]):
            sRes = sRes[:dErr["nStart"]] + "~" * (dErr["nEnd"] - dErr["nStart"]) + sRes[dErr["nEnd"]:]
            sListErr += "    * {sLineId} / {sRuleId}  at  {nStart}:{nEnd}\n".format(**dErr)
            lAllSugg.append("|".join(dErr["aSuggestions"]))
            self._aTestedRules.add(dErr["sRuleId"].rstrip("0123456789"))
            # test messages
            if False:
                aGramErrs = gc_engine.parse(purgeMessage(dErr["sMessage"]))
                aGramErrs = [ dMsgErr  for dMsgErr in sorted(aGramErrs, key=lambda d: d["nStart"])  if self._zRuleEnd.sub("", dMsgErr["sRuleId"]) != self._zRuleEnd.sub("", dErr["sRuleId"]) ]
                aSpellErrs = self._oSpellChecker.parseParagraph(re.sub("‹\\w+›", lambda m: " " * len(m.group(0)), dErr["sMessage"]))
                if aGramErrs or aSpellErrs or "<start>" in dErr["sMessage"] or "<end>" in dErr["sMessage"]:


                    print("\n# Error in: <" + dErr["sMessage"] + ">\n    " + dErr["sLineId"] + " / " + dErr["sRuleId"])
                    for dMsgErr in aGramErrs:
                        print("        error: {sLineId} / {sRuleId}  at  {nStart}:{nEnd}".format(**dMsgErr))
                    for dMsgErr in aSpellErrs:
                        print("        spelling mistake: <{sValue}>  at {nStart}:{nEnd}".format(**dMsgErr))
        return sRes, sListErr, "|||".join(lAllSugg)

    def _getExpectedErrors (self, sLine):
        sRes = " " * len(sLine)
        for i, m in enumerate(self._zError.finditer(sLine)):
            nStart = m.start() - (4 * i)
            nEnd = m.end() - (4 * (i+1))