Grammalecte  Check-in [7a62b9fd73]

Overview
Comment:[cli] add spellchecker suggestions (patch from Stéphane Veyret)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | cli
Files: files | file ages | folders
SHA3-256: 7a62b9fd735fe6575944584330ec4d37cfe868880c461ea063fa93c69ac80a6f
User & Date: olr on 2017-11-12 18:25:15
Original Comment: [cli] add spellchecker suggestions
Other Links: manifest | tags
Context
2017-11-12
18:33
[cli] change options name check-in: 1254fa8cec user: olr tags: trunk, cli
18:25
[cli] add spellchecker suggestions (patch from Stéphane Veyret) check-in: 7a62b9fd73 user: olr tags: trunk, cli
10:59
[fx] CSS protection for hyphenation check-in: 25276d751a user: olr tags: trunk, fx
Changes

Modified cli.py from [68793c6981] to [b74639032d].

1

2
3
4
5
6
7
8

1
2
3
4
5
6
7
8
-
+







#!python3
#!/usr/bin/env python3

import sys
import os.path
import argparse
import json

import grammalecte.fr as gce
40
41
42
43
44
45
46
47

48
49
50
51
52


53
54
55
56
57
58


59
60
61
62
63
64
65


66
67
68
69
70
71
72
40
41
42
43
44
45
46

47
48
49
50
51
52
53
54
55
56
57
58


59
60
61
62
63
64
65


66
67
68
69
70
71
72
73
74







-
+





+
+




-
-
+
+





-
-
+
+







    if sys.platform == "win32":
        # Apparently, the console transforms «’» in «'».
        # So we reverse it to avoid many useless warnings.
        sText = sText.replace("'", "’")
    return sText


def _getErrors (sText, oTokenizer, oDict, bContext=False, bDebug=False):
def _getErrors (sText, oTokenizer, oDict, bContext=False, bSpellSuggestions=False, bDebug=False):
    "returns a tuple: (grammar errors, spelling errors)"
    aGrammErrs = gce.parse(sText, "FR", bDebug=bDebug, bContext=bContext)
    aSpellErrs = []
    for dToken in oTokenizer.genTokens(sText):
        if dToken['sType'] == "WORD" and not oDict.isValidToken(dToken['sValue']):
            if bSpellSuggestions:
                dToken['aSuggestions'] = oDict.suggest(dToken['sValue'])
            aSpellErrs.append(dToken)
    return aGrammErrs, aSpellErrs


def generateText (sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=False, nWidth=100):
    aGrammErrs, aSpellErrs = _getErrors(sText, oTokenizer, oDict, False, bDebug)
def generateText (sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=False, bSpellSuggestions=False, nWidth=100):
    aGrammErrs, aSpellErrs = _getErrors(sText, oTokenizer, oDict, False, bSpellSuggestions, bDebug)
    if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs:
        return ""
    return txt.generateParagraph(sText, aGrammErrs, aSpellErrs, nWidth)


def generateJSON (iIndex, sText, oTokenizer, oDict, bContext=False, bDebug=False, bEmptyIfNoErrors=False, lLineSet=None, bReturnText=False):
    aGrammErrs, aSpellErrs = _getErrors(sText, oTokenizer, oDict, bContext, bDebug)
def generateJSON (iIndex, sText, oTokenizer, oDict, bContext=False, bDebug=False, bEmptyIfNoErrors=False, bSpellSuggestions=False, lLineSet=None, bReturnText=False):
    aGrammErrs, aSpellErrs = _getErrors(sText, oTokenizer, oDict, bContext, bSpellSuggestions, bDebug)
    aGrammErrs = list(aGrammErrs)
    if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs:
        return ""
    if lLineSet:
        aGrammErrs, aSpellErrs = txt.convertToXY(aGrammErrs, aSpellErrs, lLineSet)
        return json.dumps({ "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False)
    if bReturnText:
110
111
112
113
114
115
116

117
118
119

120
121
122
123
124
125
126
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130







+



+







    xParser.add_argument("-ff", "--file_to_file", help="parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str)
    xParser.add_argument("-owe", "--only_when_errors", help="display results only when there are errors", action="store_true")
    xParser.add_argument("-j", "--json", help="generate list of errors in JSON (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-cl", "--concat_lines", help="concatenate lines not separated by an empty paragraph (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-tf", "--textformatter", help="auto-format text according to typographical rules (unavailable with option --concat_lines)", action="store_true")
    xParser.add_argument("-tfo", "--textformatteronly", help="auto-format text and disable grammar checking (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-ctx", "--context", help="return errors with context (only with option --json)", action="store_true")
    xParser.add_argument("-as", "--add_suggestions", help="add suggestions for spelling errors (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-w", "--width", help="width in characters (40 < width < 200; default: 100)", type=int, choices=range(40,201,10), default=100)
    xParser.add_argument("-lo", "--list_options", help="list options", action="store_true")
    xParser.add_argument("-lr", "--list_rules", nargs="?", help="list rules [regex pattern as filter]", const="*")
    xParser.add_argument("-ls", "--list_suggestions", help="list suggestions", type=str)
    xParser.add_argument("-on", "--opt_on", nargs="+", help="activate options")
    xParser.add_argument("-off", "--opt_off", nargs="+", help="deactivate options")
    xParser.add_argument("-roff", "--rule_off", nargs="+", help="deactivate rules")
    xParser.add_argument("-d", "--debug", help="debugging mode (only in interactive mode)", action="store_true")
    xArgs = xParser.parse_args()

    gce.load()
134
135
136
137
138
139
140









141
142
143
144
145
146
147
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160







+
+
+
+
+
+
+
+
+








    if xArgs.list_options or xArgs.list_rules:
        if xArgs.list_options:
            gce.displayOptions("fr")
        if xArgs.list_rules:
            gce.displayRules(None  if xArgs.list_rules == "*"  else xArgs.list_rules)
        exit()

    if xArgs.list_suggestions:
        lSugg = oDict.suggest(xArgs.list_suggestions)
        if xArgs.json:
            sText = json.dumps({ "aSuggestions": lSugg }, ensure_ascii=False)
        else:
            sText = "Suggestions : " + " | ".join(lSugg)
        echo(sText)
        exit()

    if not xArgs.json:
        xArgs.context = False

    gce.setOptions({"html": True, "latex": True})
    if xArgs.opt_on:
        gce.setOptions({ opt:True  for opt in xArgs.opt_on  if opt in gce.getOptions() })
164
165
166
167
168
169
170
171

172
173

174
175
176
177
178
179
180
181
182
183
184
185
186

187
188

189
190
191
192
193
194
195
177
178
179
180
181
182
183

184
185

186
187
188
189
190
191
192
193
194
195
196
197
198

199
200

201
202
203
204
205
206
207
208







-
+

-
+












-
+

-
+







            for i, sText in enumerate(readfile(sFile), 1):
                if xArgs.textformatter or xArgs.textformatteronly:
                    sText = oTF.formatText(sText)
                if xArgs.textformatteronly:
                    output(sText, hDst)
                else:
                    if xArgs.json:
                        sText = generateJSON(i, sText, oTokenizer, oDict, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bReturnText=xArgs.textformatter)
                        sText = generateJSON(i, sText, oTokenizer, oDict, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSuggestions=xArgs.add_suggestions, bReturnText=xArgs.textformatter)
                    else:
                        sText = generateText(sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width)
                        sText = generateText(sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSuggestions=xArgs.add_suggestions, nWidth=xArgs.width)
                    if sText:
                        if xArgs.json and bComma:
                            output(",\n", hDst)
                        output(sText, hDst)
                        bComma = True
                if hDst:
                    echo("§ %d\r" % i, end="", flush=True)
        else:
            # concaténation des lignes non séparées par une ligne vide
            for i, lLine in enumerate(readfileAndConcatLines(sFile), 1):
                sText, lLineSet = txt.createParagraphWithLines(lLine)
                if xArgs.json:
                    sText = generateJSON(i, sText, oTokenizer, oDict, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, lLineSet=lLineSet)
                    sText = generateJSON(i, sText, oTokenizer, oDict, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSuggestions=xArgs.add_suggestions, lLineSet=lLineSet)
                else:
                    sText = generateText(sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width)
                    sText = generateText(sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSuggestions=xArgs.add_suggestions, nWidth=xArgs.width)
                if sText:
                    if xArgs.json and bComma:
                        output(",\n", hDst)
                    output(sText, hDst)
                    bComma = True
                if hDst:
                    echo("§ %d\r" % i, end="", flush=True)

Modified gc_core/py/text.py from [72d4931466] to [133d154e72].

66
67
68
69
70
71
72
73

74
75
76
77
78
79
80

81
82
83
84
85














86

87
88



89
90


91
92
93
94
95
96
97
66
67
68
69
70
71
72

73





74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95

96
97
98
99
100
101


102
103
104
105
106
107
108
109
110







-
+
-
-
-
-
-


+





+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+


+
+
+
-
-
+
+







                    sErrLine += " " * (nEnd - len(sErrLine))
                sErrLine = sErrLine[:nStart] + "°" * (nEnd - nStart) + sErrLine[nEnd:]
            else:
                break
        if sErrLine:
            sText += sErrLine + "\n"
        if nGrammErr:
            for dErr in lGrammErrs[:nGrammErr]:
            sText += getReadableErrors(lGrammErrs[:nGrammErr], nWidth)
                sMsg, *others = getReadableError(dErr).split("\n")
                sText += "\n".join(textwrap.wrap(sMsg, nWidth, subsequent_indent="  ")) + "\n"
                for arg in others:
                    sText += "\n".join(textwrap.wrap(arg, nWidth, subsequent_indent="    ")) + "\n"
            sText += "\n"
            del lGrammErrs[0:nGrammErr]
        if nSpellErr:
            sText += getReadableErrors(lSpellErrs[:nSpellErr], nWidth, True)
            del lSpellErrs[0:nSpellErr]
        nOffset += ln
    return sText


def getReadableErrors (lErrs, nWidth, bSpell=False):
    "Returns lErrs errors as readable errors"
    sErrors = ""
    for dErr in lErrs:
        if not bSpell or "aSuggestions" in dErr:
            sMsg, *others = getReadableError(dErr, bSpell).split("\n")
            sErrors += "\n".join(textwrap.wrap(sMsg, nWidth, subsequent_indent="  ")) + "\n"
            for arg in others:
                sErrors += "\n".join(textwrap.wrap(arg, nWidth, subsequent_indent="    ")) + "\n"
    if sErrors != "":
        sErrors += "\n"
    return sErrors


def getReadableError (dErr):
def getReadableError (dErr, bSpell=False):
    "Returns an error dErr as a readable error"
    try:
        if bSpell:
            s = u"* {nStart}:{nEnd}  # {sValue}:".format(**dErr)
        else:
        s = u"* {nStart}:{nEnd}  # {sLineId} / {sRuleId}:\n".format(**dErr)
        s += "  " + dErr.get("sMessage", "# error : message not found")
            s = u"* {nStart}:{nEnd}  # {sLineId} / {sRuleId}:\n".format(**dErr)
            s += "  " + dErr.get("sMessage", "# error : message not found")
        if dErr.get("aSuggestions", None):
            s += "\n  > Suggestions : " + " | ".join(dErr.get("aSuggestions", "# error : suggestions not found"))
        if dErr.get("URL", None):
            s += "\n  > URL: " + dErr["URL"]
        return s
    except KeyError:
        return u"* Non-compliant error: {}".format(dErr)