Grammalecte  Check-in [7a62b9fd73]

Overview
Comment:[cli] add spellchecker suggestions (patch from Stéphane Veyret)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | cli
Files: files | file ages | folders
SHA3-256: 7a62b9fd735fe6575944584330ec4d37cfe868880c461ea063fa93c69ac80a6f
User & Date: olr on 2017-11-12 18:25:15
Original Comment: [cli] add spellchecker suggestions
Other Links: manifest | tags
Context
2017-11-12
18:33
[cli] change options name check-in: 1254fa8cec user: olr tags: trunk, cli
18:25
[cli] add spellchecker suggestions (patch from Stéphane Veyret) check-in: 7a62b9fd73 user: olr tags: trunk, cli
10:59
[fx] CSS protection for hyphenation check-in: 25276d751a user: olr tags: trunk, fx
Changes

Modified cli.py from [68793c6981] to [b74639032d].

1
2
3
4
5
6
7
8
#!python3

import sys
import os.path
import argparse
import json

import grammalecte.fr as gce
|







1
2
3
4
5
6
7
8
#!/usr/bin/env python3

import sys
import os.path
import argparse
import json

import grammalecte.fr as gce
40
41
42
43
44
45
46
47
48
49
50
51
52


53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
    if sys.platform == "win32":
        # Apparently, the console transforms «’» in «'».
        # So we reverse it to avoid many useless warnings.
        sText = sText.replace("'", "’")
    return sText


def _getErrors (sText, oTokenizer, oDict, bContext=False, bDebug=False):
    "returns a tuple: (grammar errors, spelling errors)"
    aGrammErrs = gce.parse(sText, "FR", bDebug=bDebug, bContext=bContext)
    aSpellErrs = []
    for dToken in oTokenizer.genTokens(sText):
        if dToken['sType'] == "WORD" and not oDict.isValidToken(dToken['sValue']):


            aSpellErrs.append(dToken)
    return aGrammErrs, aSpellErrs


def generateText (sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=False, nWidth=100):
    aGrammErrs, aSpellErrs = _getErrors(sText, oTokenizer, oDict, False, bDebug)
    if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs:
        return ""
    return txt.generateParagraph(sText, aGrammErrs, aSpellErrs, nWidth)


def generateJSON (iIndex, sText, oTokenizer, oDict, bContext=False, bDebug=False, bEmptyIfNoErrors=False, lLineSet=None, bReturnText=False):
    aGrammErrs, aSpellErrs = _getErrors(sText, oTokenizer, oDict, bContext, bDebug)
    aGrammErrs = list(aGrammErrs)
    if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs:
        return ""
    if lLineSet:
        aGrammErrs, aSpellErrs = txt.convertToXY(aGrammErrs, aSpellErrs, lLineSet)
        return json.dumps({ "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False)
    if bReturnText:







|





>
>




|
|





|
|







40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
    if sys.platform == "win32":
        # Apparently, the console transforms «’» in «'».
        # So we reverse it to avoid many useless warnings.
        sText = sText.replace("'", "’")
    return sText


def _getErrors (sText, oTokenizer, oDict, bContext=False, bSpellSuggestions=False, bDebug=False):
    "returns a tuple: (grammar errors, spelling errors)"
    aGrammErrs = gce.parse(sText, "FR", bDebug=bDebug, bContext=bContext)
    aSpellErrs = []
    for dToken in oTokenizer.genTokens(sText):
        if dToken['sType'] == "WORD" and not oDict.isValidToken(dToken['sValue']):
            if bSpellSuggestions:
                dToken['aSuggestions'] = oDict.suggest(dToken['sValue'])
            aSpellErrs.append(dToken)
    return aGrammErrs, aSpellErrs


def generateText (sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=False, bSpellSuggestions=False, nWidth=100):
    aGrammErrs, aSpellErrs = _getErrors(sText, oTokenizer, oDict, False, bSpellSuggestions, bDebug)
    if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs:
        return ""
    return txt.generateParagraph(sText, aGrammErrs, aSpellErrs, nWidth)


def generateJSON (iIndex, sText, oTokenizer, oDict, bContext=False, bDebug=False, bEmptyIfNoErrors=False, bSpellSuggestions=False, lLineSet=None, bReturnText=False):
    aGrammErrs, aSpellErrs = _getErrors(sText, oTokenizer, oDict, bContext, bSpellSuggestions, bDebug)
    aGrammErrs = list(aGrammErrs)
    if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs:
        return ""
    if lLineSet:
        aGrammErrs, aSpellErrs = txt.convertToXY(aGrammErrs, aSpellErrs, lLineSet)
        return json.dumps({ "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False)
    if bReturnText:
110
111
112
113
114
115
116

117
118
119

120
121
122
123
124
125
126
    xParser.add_argument("-ff", "--file_to_file", help="parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str)
    xParser.add_argument("-owe", "--only_when_errors", help="display results only when there are errors", action="store_true")
    xParser.add_argument("-j", "--json", help="generate list of errors in JSON (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-cl", "--concat_lines", help="concatenate lines not separated by an empty paragraph (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-tf", "--textformatter", help="auto-format text according to typographical rules (unavailable with option --concat_lines)", action="store_true")
    xParser.add_argument("-tfo", "--textformatteronly", help="auto-format text and disable grammar checking (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-ctx", "--context", help="return errors with context (only with option --json)", action="store_true")

    xParser.add_argument("-w", "--width", help="width in characters (40 < width < 200; default: 100)", type=int, choices=range(40,201,10), default=100)
    xParser.add_argument("-lo", "--list_options", help="list options", action="store_true")
    xParser.add_argument("-lr", "--list_rules", nargs="?", help="list rules [regex pattern as filter]", const="*")

    xParser.add_argument("-on", "--opt_on", nargs="+", help="activate options")
    xParser.add_argument("-off", "--opt_off", nargs="+", help="deactivate options")
    xParser.add_argument("-roff", "--rule_off", nargs="+", help="deactivate rules")
    xParser.add_argument("-d", "--debug", help="debugging mode (only in interactive mode)", action="store_true")
    xArgs = xParser.parse_args()

    gce.load()







>



>







112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
    xParser.add_argument("-ff", "--file_to_file", help="parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str)
    xParser.add_argument("-owe", "--only_when_errors", help="display results only when there are errors", action="store_true")
    xParser.add_argument("-j", "--json", help="generate list of errors in JSON (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-cl", "--concat_lines", help="concatenate lines not separated by an empty paragraph (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-tf", "--textformatter", help="auto-format text according to typographical rules (unavailable with option --concat_lines)", action="store_true")
    xParser.add_argument("-tfo", "--textformatteronly", help="auto-format text and disable grammar checking (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-ctx", "--context", help="return errors with context (only with option --json)", action="store_true")
    xParser.add_argument("-as", "--add_suggestions", help="add suggestions for spelling errors (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-w", "--width", help="width in characters (40 < width < 200; default: 100)", type=int, choices=range(40,201,10), default=100)
    xParser.add_argument("-lo", "--list_options", help="list options", action="store_true")
    xParser.add_argument("-lr", "--list_rules", nargs="?", help="list rules [regex pattern as filter]", const="*")
    xParser.add_argument("-ls", "--list_suggestions", help="list suggestions", type=str)
    xParser.add_argument("-on", "--opt_on", nargs="+", help="activate options")
    xParser.add_argument("-off", "--opt_off", nargs="+", help="deactivate options")
    xParser.add_argument("-roff", "--rule_off", nargs="+", help="deactivate rules")
    xParser.add_argument("-d", "--debug", help="debugging mode (only in interactive mode)", action="store_true")
    xArgs = xParser.parse_args()

    gce.load()
134
135
136
137
138
139
140









141
142
143
144
145
146
147

    if xArgs.list_options or xArgs.list_rules:
        if xArgs.list_options:
            gce.displayOptions("fr")
        if xArgs.list_rules:
            gce.displayRules(None  if xArgs.list_rules == "*"  else xArgs.list_rules)
        exit()










    if not xArgs.json:
        xArgs.context = False

    gce.setOptions({"html": True, "latex": True})
    if xArgs.opt_on:
        gce.setOptions({ opt:True  for opt in xArgs.opt_on  if opt in gce.getOptions() })







>
>
>
>
>
>
>
>
>







138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160

    if xArgs.list_options or xArgs.list_rules:
        if xArgs.list_options:
            gce.displayOptions("fr")
        if xArgs.list_rules:
            gce.displayRules(None  if xArgs.list_rules == "*"  else xArgs.list_rules)
        exit()

    if xArgs.list_suggestions:
        lSugg = oDict.suggest(xArgs.list_suggestions)
        if xArgs.json:
            sText = json.dumps({ "aSuggestions": lSugg }, ensure_ascii=False)
        else:
            sText = "Suggestions : " + " | ".join(lSugg)
        echo(sText)
        exit()

    if not xArgs.json:
        xArgs.context = False

    gce.setOptions({"html": True, "latex": True})
    if xArgs.opt_on:
        gce.setOptions({ opt:True  for opt in xArgs.opt_on  if opt in gce.getOptions() })
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
            for i, sText in enumerate(readfile(sFile), 1):
                if xArgs.textformatter or xArgs.textformatteronly:
                    sText = oTF.formatText(sText)
                if xArgs.textformatteronly:
                    output(sText, hDst)
                else:
                    if xArgs.json:
                        sText = generateJSON(i, sText, oTokenizer, oDict, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bReturnText=xArgs.textformatter)
                    else:
                        sText = generateText(sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width)
                    if sText:
                        if xArgs.json and bComma:
                            output(",\n", hDst)
                        output(sText, hDst)
                        bComma = True
                if hDst:
                    echo("§ %d\r" % i, end="", flush=True)
        else:
            # concaténation des lignes non séparées par une ligne vide
            for i, lLine in enumerate(readfileAndConcatLines(sFile), 1):
                sText, lLineSet = txt.createParagraphWithLines(lLine)
                if xArgs.json:
                    sText = generateJSON(i, sText, oTokenizer, oDict, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, lLineSet=lLineSet)
                else:
                    sText = generateText(sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width)
                if sText:
                    if xArgs.json and bComma:
                        output(",\n", hDst)
                    output(sText, hDst)
                    bComma = True
                if hDst:
                    echo("§ %d\r" % i, end="", flush=True)







|

|












|

|







177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
            for i, sText in enumerate(readfile(sFile), 1):
                if xArgs.textformatter or xArgs.textformatteronly:
                    sText = oTF.formatText(sText)
                if xArgs.textformatteronly:
                    output(sText, hDst)
                else:
                    if xArgs.json:
                        sText = generateJSON(i, sText, oTokenizer, oDict, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSuggestions=xArgs.add_suggestions, bReturnText=xArgs.textformatter)
                    else:
                        sText = generateText(sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSuggestions=xArgs.add_suggestions, nWidth=xArgs.width)
                    if sText:
                        if xArgs.json and bComma:
                            output(",\n", hDst)
                        output(sText, hDst)
                        bComma = True
                if hDst:
                    echo("§ %d\r" % i, end="", flush=True)
        else:
            # concaténation des lignes non séparées par une ligne vide
            for i, lLine in enumerate(readfileAndConcatLines(sFile), 1):
                sText, lLineSet = txt.createParagraphWithLines(lLine)
                if xArgs.json:
                    sText = generateJSON(i, sText, oTokenizer, oDict, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSuggestions=xArgs.add_suggestions, lLineSet=lLineSet)
                else:
                    sText = generateText(sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSuggestions=xArgs.add_suggestions, nWidth=xArgs.width)
                if sText:
                    if xArgs.json and bComma:
                        output(",\n", hDst)
                    output(sText, hDst)
                    bComma = True
                if hDst:
                    echo("§ %d\r" % i, end="", flush=True)

Modified gc_core/py/text.py from [72d4931466] to [133d154e72].

66
67
68
69
70
71
72
73
74
75
76
77
78
79
80

81
82
83
84
85














86
87
88



89
90
91
92
93
94
95
96
97
                    sErrLine += " " * (nEnd - len(sErrLine))
                sErrLine = sErrLine[:nStart] + "°" * (nEnd - nStart) + sErrLine[nEnd:]
            else:
                break
        if sErrLine:
            sText += sErrLine + "\n"
        if nGrammErr:
            for dErr in lGrammErrs[:nGrammErr]:
                sMsg, *others = getReadableError(dErr).split("\n")
                sText += "\n".join(textwrap.wrap(sMsg, nWidth, subsequent_indent="  ")) + "\n"
                for arg in others:
                    sText += "\n".join(textwrap.wrap(arg, nWidth, subsequent_indent="    ")) + "\n"
            sText += "\n"
            del lGrammErrs[0:nGrammErr]
        if nSpellErr:

            del lSpellErrs[0:nSpellErr]
        nOffset += ln
    return sText
















def getReadableError (dErr):
    "Returns an error dErr as a readable error"
    try:



        s = u"* {nStart}:{nEnd}  # {sLineId} / {sRuleId}:\n".format(**dErr)
        s += "  " + dErr.get("sMessage", "# error : message not found")
        if dErr.get("aSuggestions", None):
            s += "\n  > Suggestions : " + " | ".join(dErr.get("aSuggestions", "# error : suggestions not found"))
        if dErr.get("URL", None):
            s += "\n  > URL: " + dErr["URL"]
        return s
    except KeyError:
        return u"* Non-compliant error: {}".format(dErr)







|
<
<
<
<
<


>





>
>
>
>
>
>
>
>
>
>
>
>
>
>
|


>
>
>
|
|







66
67
68
69
70
71
72
73





74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
                    sErrLine += " " * (nEnd - len(sErrLine))
                sErrLine = sErrLine[:nStart] + "°" * (nEnd - nStart) + sErrLine[nEnd:]
            else:
                break
        if sErrLine:
            sText += sErrLine + "\n"
        if nGrammErr:
            sText += getReadableErrors(lGrammErrs[:nGrammErr], nWidth)





            del lGrammErrs[0:nGrammErr]
        if nSpellErr:
            sText += getReadableErrors(lSpellErrs[:nSpellErr], nWidth, True)
            del lSpellErrs[0:nSpellErr]
        nOffset += ln
    return sText


def getReadableErrors (lErrs, nWidth, bSpell=False):
    "Returns lErrs errors as readable errors"
    sErrors = ""
    for dErr in lErrs:
        if not bSpell or "aSuggestions" in dErr:
            sMsg, *others = getReadableError(dErr, bSpell).split("\n")
            sErrors += "\n".join(textwrap.wrap(sMsg, nWidth, subsequent_indent="  ")) + "\n"
            for arg in others:
                sErrors += "\n".join(textwrap.wrap(arg, nWidth, subsequent_indent="    ")) + "\n"
    if sErrors != "":
        sErrors += "\n"
    return sErrors


def getReadableError (dErr, bSpell=False):
    "Returns an error dErr as a readable error"
    try:
        if bSpell:
            s = u"* {nStart}:{nEnd}  # {sValue}:".format(**dErr)
        else:
            s = u"* {nStart}:{nEnd}  # {sLineId} / {sRuleId}:\n".format(**dErr)
            s += "  " + dErr.get("sMessage", "# error : message not found")
        if dErr.get("aSuggestions", None):
            s += "\n  > Suggestions : " + " | ".join(dErr.get("aSuggestions", "# error : suggestions not found"))
        if dErr.get("URL", None):
            s += "\n  > URL: " + dErr["URL"]
        return s
    except KeyError:
        return u"* Non-compliant error: {}".format(dErr)