Grammalecte  Check-in [5bf409e44a]

Overview
Comment:[core] spellchecking for error messages
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | core
Files: files | file ages | folders
SHA3-256: 5bf409e44afcfdbdd20f55444fcd38e7bb0f1ab82ef0aed44acc6d6a9ecd017b
User & Date: olr on 2020-11-23 20:44:24
Other Links: manifest | tags
Context
2020-11-24
21:27
[fr] corrections des messages check-in: b568531f02 user: olr tags: fr, trunk
2020-11-23
20:44
[core] spellchecking for error messages check-in: 5bf409e44a user: olr tags: core, trunk
20:43
[fr] phonet_simil: danse dance check-in: 07bd6248b1 user: olr tags: fr, trunk
Changes

Modified gc_core/py/lang_core/tests_core.py from [9e599f13ec] to [36a2155509].

60
61
62
63
64
65
66

67
68
69
70
71
72
73
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74







+








    @classmethod
    def setUpClass (cls):
        gc_engine.load()
        cls._zError = re.compile(r"\{\{.*?\}\}")
        cls._zRuleEnd = re.compile(r"_a\d+_\d+$")
        cls._aTestedRules = set()
        cls._oSpellChecker = gc_engine.getSpellChecker()

    def test_parse (self):
        zOption = re.compile("^__([a-zA-Z0-9]+)__ ")
        spHere, _ = os.path.split(__file__)
        spfParsingTest = os.path.join(spHere, "gc_test.txt")
        if not os.path.exists(spfParsingTest):
            print(f"No file <gc_test.txt> in <{spHere}>")
140
141
142
143
144
145
146
147
148




149
150
151
152
153





154
155
156
157
158
159
160
141
142
143
144
145
146
147


148
149
150
151





152
153
154
155
156
157
158
159
160
161
162
163







-
-
+
+
+
+
-
-
-
-
-
+
+
+
+
+







        for dErr in sorted(aErrs, key=lambda d: d["nStart"]):
            sRes = sRes[:dErr["nStart"]] + "~" * (dErr["nEnd"] - dErr["nStart"]) + sRes[dErr["nEnd"]:]
            sListErr += "    * {sLineId} / {sRuleId}  at  {nStart}:{nEnd}\n".format(**dErr)
            lAllSugg.append("|".join(dErr["aSuggestions"]))
            self._aTestedRules.add(dErr["sRuleId"].rstrip("0123456789"))
            # test messages
            if False:
                aMsgErrs = gc_engine.parse(purgeMessage(dErr["sMessage"]))
                if aMsgErrs or "<start>" in dErr["sMessage"] or "<end>" in dErr["sMessage"]:
                aGramErrs = gc_engine.parse(purgeMessage(dErr["sMessage"]))
                aGramErrs = [ dMsgErr  for dMsgErr in sorted(aGramErrs, key=lambda d: d["nStart"])  if self._zRuleEnd.sub("", dMsgErr["sRuleId"]) != self._zRuleEnd.sub("", dErr["sRuleId"]) ]
                aSpellErrs = self._oSpellChecker.parseParagraph(re.sub("‹\\w+›", lambda m: " " * len(m.group(0)), dErr["sMessage"]))
                if aGramErrs or aSpellErrs or "<start>" in dErr["sMessage"] or "<end>" in dErr["sMessage"]:
                    aSelectedErrs = [ dMsgErr  for dMsgErr in sorted(aMsgErrs, key=lambda d: d["nStart"])  if self._zRuleEnd.sub("", dMsgErr["sRuleId"]) != self._zRuleEnd.sub("", dErr["sRuleId"]) ]
                    if aSelectedErrs:
                        print("\n# Error in: <" + dErr["sMessage"] + ">\n    " + dErr["sLineId"] + " / " + dErr["sRuleId"])
                        for dMsgErr in aSelectedErrs:
                            print("        error: {sLineId} / {sRuleId}  at  {nStart}:{nEnd}".format(**dMsgErr))
                    print("\n# Error in: <" + dErr["sMessage"] + ">\n    " + dErr["sLineId"] + " / " + dErr["sRuleId"])
                    for dMsgErr in aGramErrs:
                        print("        error: {sLineId} / {sRuleId}  at  {nStart}:{nEnd}".format(**dMsgErr))
                    for dMsgErr in aSpellErrs:
                        print("        spelling mistake: <{sValue}>  at {nStart}:{nEnd}".format(**dMsgErr))
        return sRes, sListErr, "|||".join(lAllSugg)

    def _getExpectedErrors (self, sLine):
        sRes = " " * len(sLine)
        for i, m in enumerate(self._zError.finditer(sLine)):
            nStart = m.start() - (4 * i)
            nEnd = m.end() - (4 * (i+1))