40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
|
if sys.platform == "win32":
# Apparently, the console transforms «’» in «'».
# So we reverse it to avoid many useless warnings.
sText = sText.replace("'", "’")
return sText
def _getErrors (sText, oTokenizer, oSpellChecker, bContext=False, bSpellSugg=False, bDebug=False):
"returns a tuple: (grammar errors, spelling errors)"
aGrammErrs = gce.parse(sText, "FR", bDebug=bDebug, bContext=bContext)
aSpellErrs = []
for dToken in oTokenizer.genTokens(sText):
if dToken['sType'] == "WORD" and not oSpellChecker.isValidToken(dToken['sValue']):
if bSpellSugg:
dToken['aSuggestions'] = []
for lSugg in oSpellChecker.suggest(dToken['sValue']):
dToken['aSuggestions'].extend(lSugg)
aSpellErrs.append(dToken)
return aGrammErrs, aSpellErrs
def generateText (sText, oTokenizer, oSpellChecker, bDebug=False, bEmptyIfNoErrors=False, bSpellSugg=False, nWidth=100):
aGrammErrs, aSpellErrs = _getErrors(sText, oTokenizer, oSpellChecker, False, bSpellSugg, bDebug)
if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs:
return ""
return txt.generateParagraph(sText, aGrammErrs, aSpellErrs, nWidth)
def generateJSON (iIndex, sText, oTokenizer, oSpellChecker, bContext=False, bDebug=False, bEmptyIfNoErrors=False, bSpellSugg=False, lLineSet=None, bReturnText=False):
aGrammErrs, aSpellErrs = _getErrors(sText, oTokenizer, oSpellChecker, bContext, bSpellSugg, bDebug)
aGrammErrs = list(aGrammErrs)
if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs:
return ""
if lLineSet:
aGrammErrs, aSpellErrs = txt.convertToXY(aGrammErrs, aSpellErrs, lLineSet)
return json.dumps({ "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False)
if bReturnText:
return json.dumps({ "iParagraph": iIndex, "sText": sText, "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False)
return json.dumps({ "iParagraph": iIndex, "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False)
def readfile (spf):
"generator: returns file line by line"
if os.path.isfile(spf):
with open(spf, "r", encoding="utf-8") as hSrc:
for sLine in hSrc:
yield sLine
else:
print("# Error: file not found.")
def readfileAndConcatLines (spf):
"generator: returns text by list of lines not separated by an empty line"
lLine = []
for i, sLine in enumerate(readfile(spf), 1):
if sLine.strip():
lLine.append((i, sLine))
elif lLine:
yield lLine
lLine = []
if lLine:
yield lLine
def output (sText, hDst=None):
if not hDst:
echo(sText, end="")
else:
hDst.write(sText)
def main ():
xParser = argparse.ArgumentParser()
xParser.add_argument("-f", "--file", help="parse file (UTF-8 required!) [on Windows, -f is similar to -ff]", type=str)
xParser.add_argument("-ff", "--file_to_file", help="parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str)
xParser.add_argument("-owe", "--only_when_errors", help="display results only when there are errors", action="store_true")
xParser.add_argument("-j", "--json", help="generate list of errors in JSON (only with option --file or --file_to_file)", action="store_true")
xParser.add_argument("-cl", "--concat_lines", help="concatenate lines not separated by an empty paragraph (only with option --file or --file_to_file)", action="store_true")
xParser.add_argument("-tf", "--textformatter", help="auto-format text according to typographical rules (unavailable with option --concat_lines)", action="store_true")
xParser.add_argument("-tfo", "--textformatteronly", help="auto-format text and disable grammar checking (only with option --file or --file_to_file)", action="store_true")
xParser.add_argument("-ctx", "--context", help="return errors with context (only with option --json)", action="store_true")
xParser.add_argument("-wss", "--with_spell_sugg", help="add suggestions for spelling errors (only with option --file or --file_to_file)", action="store_true")
xParser.add_argument("-w", "--width", help="width in characters (40 < width < 200; default: 100)", type=int, choices=range(40,201,10), default=100)
xParser.add_argument("-lo", "--list_options", help="list options", action="store_true")
xParser.add_argument("-lr", "--list_rules", nargs="?", help="list rules [regex pattern as filter]", const="*")
xParser.add_argument("-sug", "--suggest", help="get suggestions list for given word", type=str)
xParser.add_argument("-on", "--opt_on", nargs="+", help="activate options")
xParser.add_argument("-off", "--opt_off", nargs="+", help="deactivate options")
xParser.add_argument("-roff", "--rule_off", nargs="+", help="deactivate rules")
xParser.add_argument("-d", "--debug", help="debugging mode (only in interactive mode)", action="store_true")
xArgs = xParser.parse_args()
gce.load()
if not xArgs.json:
echo("Grammalecte v{}".format(gce.version))
oSpellChecker = gce.getSpellChecker()
oTokenizer = tkz.Tokenizer("fr")
oLexGraphe = lxg.Lexicographe(oSpellChecker)
if xArgs.textformatter or xArgs.textformatteronly:
oTF = tf.TextFormatter()
if xArgs.list_options or xArgs.list_rules:
if xArgs.list_options:
gce.displayOptions("fr")
if xArgs.list_rules:
gce.displayRules(None if xArgs.list_rules == "*" else xArgs.list_rules)
exit()
if xArgs.suggest:
for lSugg in oSpellChecker.suggest(xArgs.suggest):
if xArgs.json:
sText = json.dumps({ "aSuggestions": lSugg }, ensure_ascii=False)
else:
sText = "Suggestions : " + " | ".join(lSugg)
echo(sText)
exit()
if not xArgs.json:
xArgs.context = False
gce.setOptions({"html": True, "latex": True})
if xArgs.opt_on:
gce.setOptions({ opt:True for opt in xArgs.opt_on if opt in gce.getOptions() })
if xArgs.opt_off:
gce.setOptions({ opt:False for opt in xArgs.opt_off if opt in gce.getOptions() })
if xArgs.rule_off:
for sRule in xArgs.rule_off:
gce.ignoreRule(sRule)
sFile = xArgs.file or xArgs.file_to_file
if sFile:
# file processing
hDst = open(sFile[:sFile.rfind(".")]+".res.txt", "w", encoding="utf-8", newline="\n") if xArgs.file_to_file or sys.platform == "win32" else None
bComma = False
if xArgs.json:
output('{ "grammalecte": "'+gce.version+'", "lang": "'+gce.lang+'", "data" : [\n', hDst)
if not xArgs.concat_lines:
# pas de concaténation des lignes
for i, sText in enumerate(readfile(sFile), 1):
if xArgs.textformatter or xArgs.textformatteronly:
sText = oTF.formatText(sText)
if xArgs.textformatteronly:
output(sText, hDst)
else:
if xArgs.json:
sText = generateJSON(i, sText, oTokenizer, oSpellChecker, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSugg=xArgs.with_spell_sugg, bReturnText=xArgs.textformatter)
else:
sText = generateText(sText, oTokenizer, oSpellChecker, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSugg=xArgs.with_spell_sugg, nWidth=xArgs.width)
if sText:
if xArgs.json and bComma:
output(",\n", hDst)
output(sText, hDst)
bComma = True
if hDst:
echo("§ %d\r" % i, end="", flush=True)
else:
# concaténation des lignes non séparées par une ligne vide
for i, lLine in enumerate(readfileAndConcatLines(sFile), 1):
sText, lLineSet = txt.createParagraphWithLines(lLine)
if xArgs.json:
sText = generateJSON(i, sText, oTokenizer, oSpellChecker, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSugg=xArgs.with_spell_sugg, lLineSet=lLineSet)
else:
sText = generateText(sText, oTokenizer, oSpellChecker, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSugg=xArgs.with_spell_sugg, nWidth=xArgs.width)
if sText:
if xArgs.json and bComma:
output(",\n", hDst)
output(sText, hDst)
bComma = True
if hDst:
echo("§ %d\r" % i, end="", flush=True)
if xArgs.json:
output("\n]}\n", hDst)
else:
# pseudo-console
sInputText = "\n~==========~ Enter your text [/h /q] ~==========~\n"
sText = _getText(sInputText)
while True:
if sText.startswith("?"):
for sWord in sText[1:].strip().split():
if sWord:
echo("* " + sWord)
for sMorph in oSpellChecker.getMorph(sWord):
echo(" {:<32} {}".format(sMorph, oLexGraphe.formatTags(sMorph)))
elif sText.startswith("!"):
for sWord in sText[1:].strip().split():
if sWord:
for lSugg in oSpellChecker.suggest(sWord):
echo(" | ".join(lSugg))
elif sText.startswith(">"):
oSpellChecker.drawPath(sText[1:].strip())
elif sText.startswith("="):
for sRes in oSpellChecker.select(sText[1:].strip()):
echo(sRes)
elif sText.startswith("/+ "):
gce.setOptions({ opt:True for opt in sText[3:].strip().split() if opt in gce.getOptions() })
echo("done")
elif sText.startswith("/- "):
gce.setOptions({ opt:False for opt in sText[3:].strip().split() if opt in gce.getOptions() })
echo("done")
elif sText.startswith("/-- "):
for sRule in sText[3:].strip().split():
gce.ignoreRule(sRule)
echo("done")
elif sText.startswith("/++ "):
for sRule in sText[3:].strip().split():
gce.reactivateRule(sRule)
echo("done")
elif sText == "/debug" or sText == "/d":
xArgs.debug = not(xArgs.debug)
echo("debug mode on" if xArgs.debug else "debug mode off")
elif sText == "/textformatter" or sText == "/tf":
xArgs.textformatter = not(xArgs.textformatter)
echo("textformatter on" if xArgs.debug else "textformatter off")
elif sText == "/help" or sText == "/h":
echo(_HELP)
elif sText == "/lopt" or sText == "/lo":
gce.displayOptions("fr")
elif sText.startswith("/lr"):
sText = sText.strip()
sFilter = sText[sText.find(" "):].strip() if sText != "/lr" and sText != "/rules" else None
gce.displayRules(sFilter)
elif sText == "/quit" or sText == "/q":
break
elif sText.startswith("/rl"):
# reload (todo)
pass
else:
for sParagraph in txt.getParagraph(sText):
if xArgs.textformatter:
sText = oTF.formatText(sText)
sRes = generateText(sText, oTokenizer, oSpellChecker, bDebug=xArgs.debug, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width)
if sRes:
echo("\n" + sRes)
else:
echo("\nNo error found.")
sText = _getText(sInputText)
if __name__ == '__main__':
main()
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
|
|
|
>
>
>
>
|
>
|
|
|
|
>
|
|
>
|
>
|
|
>
>
>
>
|
|
<
<
<
<
<
>
|
|
>
>
>
>
>
|
|
|
>
|
|
|
<
<
|
|
|
|
<
>
|
|
<
|
<
<
<
<
<
<
<
|
<
<
<
<
<
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
|
if sys.platform == "win32":
# Apparently, the console transforms «’» in «'».
# So we reverse it to avoid many useless warnings.
sText = sText.replace("'", "’")
return sText
def readFile (spf):
"generator: returns file line by line"
if os.path.isfile(spf):
with open(spf, "r", encoding="utf-8") as hSrc:
for sLine in hSrc:
yield sLine
else:
print("# Error: file <" + spf + ">not found.")
def generateParagraphFromFile (spf, bConcatLines=False):
"generator: returns text by tuple of (iParagraph, sParagraph, lLineSet)"
if not bConcatLines:
for iParagraph, sLine in enumerate(readFile(spf), 1):
yield iParagraph, sLine, None
else:
lLine = []
iParagraph = 1
for iLine, sLine in enumerate(readFile(spf), 1):
if sLine.strip():
lLine.append((iLine, sLine))
elif lLine:
sText, lLineSet = txt.createParagraphWithLines(lLine)
yield iParagraph, sText, lLineSet
lLine = []
iParagraph += 1
if lLine:
sText, lLineSet = txt.createParagraphWithLines(lLine)
yield iParagraph, sText, lLineSet
def output (sText, hDst=None):
if not hDst:
echo(sText, end="")
else:
hDst.write(sText)
def main ():
xParser = argparse.ArgumentParser()
xParser.add_argument("-f", "--file", help="parse file (UTF-8 required!) [on Windows, -f is similar to -ff]", type=str)
xParser.add_argument("-ff", "--file_to_file", help="parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str)
xParser.add_argument("-owe", "--only_when_errors", help="display results only when there are errors", action="store_true")
xParser.add_argument("-j", "--json", help="generate list of errors in JSON (only with option --file or --file_to_file)", action="store_true")
xParser.add_argument("-cl", "--concat_lines", help="concatenate lines not separated by an empty paragraph (only with option --file or --file_to_file)", action="store_true")
xParser.add_argument("-tf", "--textformatter", help="auto-format text according to typographical rules (not with option --concat_lines)", action="store_true")
xParser.add_argument("-tfo", "--textformatteronly", help="auto-format text and disable grammar checking (only with option --file or --file_to_file)", action="store_true")
xParser.add_argument("-ctx", "--context", help="return errors with context (only with option --json)", action="store_true")
xParser.add_argument("-wss", "--with_spell_sugg", help="add suggestions for spelling errors (only with option --file or --file_to_file)", action="store_true")
xParser.add_argument("-w", "--width", help="width in characters (40 < width < 200; default: 100)", type=int, choices=range(40,201,10), default=100)
xParser.add_argument("-lo", "--list_options", help="list options", action="store_true")
xParser.add_argument("-lr", "--list_rules", nargs="?", help="list rules [regex pattern as filter]", const="*")
xParser.add_argument("-sug", "--suggest", help="get suggestions list for given word", type=str)
xParser.add_argument("-on", "--opt_on", nargs="+", help="activate options")
xParser.add_argument("-off", "--opt_off", nargs="+", help="deactivate options")
xParser.add_argument("-roff", "--rule_off", nargs="+", help="deactivate rules")
xParser.add_argument("-d", "--debug", help="debugging mode (only in interactive mode)", action="store_true")
xArgs = xParser.parse_args()
oGrammarChecker = grammalecte.GrammarChecker("fr")
oSpellChecker = oGrammarChecker.getSpellChecker()
oLexicographer = oGrammarChecker.getLexicographer()
oTextFormatter = oGrammarChecker.getTextFormatter()
if not xArgs.json:
echo("Grammalecte v{}".format(oGrammarChecker.gce.version))
# list options or rules
if xArgs.list_options or xArgs.list_rules:
if xArgs.list_options:
oGrammarChecker.gce.displayOptions("fr")
if xArgs.list_rules:
oGrammarChecker.gce.displayRules(None if xArgs.list_rules == "*" else xArgs.list_rules)
exit()
# spell suggestions
if xArgs.suggest:
for lSugg in oSpellChecker.suggest(xArgs.suggest):
if xArgs.json:
sText = json.dumps({ "aSuggestions": lSugg }, ensure_ascii=False)
else:
sText = "Suggestions : " + " | ".join(lSugg)
echo(sText)
exit()
# disable options
if not xArgs.json:
xArgs.context = False
if xArgs.concat_lines:
xArgs.textformatter = False
# grammar options
oGrammarChecker.gce.setOptions({"html": True, "latex": True})
if xArgs.opt_on:
oGrammarChecker.gce.setOptions({ opt:True for opt in xArgs.opt_on if opt in oGrammarChecker.gce.getOptions() })
if xArgs.opt_off:
oGrammarChecker.gce.setOptions({ opt:False for opt in xArgs.opt_off if opt in oGrammarChecker.gce.getOptions() })
# disable grammar rules
if xArgs.rule_off:
for sRule in xArgs.rule_off:
oGrammarChecker.gce.ignoreRule(sRule)
sFile = xArgs.file or xArgs.file_to_file
if sFile:
# file processing
hDst = open(sFile[:sFile.rfind(".")]+".res.txt", "w", encoding="utf-8", newline="\n") if xArgs.file_to_file or sys.platform == "win32" else None
bComma = False
if xArgs.json:
output('{ "grammalecte": "'+oGrammarChecker.gce.version+'", "lang": "'+oGrammarChecker.gce.lang+'", "data" : [\n', hDst)
for i, sText, lLineSet in generateParagraphFromFile(sFile, xArgs.concat_lines):
if xArgs.textformatter or xArgs.textformatteronly:
sText = oTextFormatter.formatText(sText)
if xArgs.textformatteronly:
output(sText, hDst)
continue
if xArgs.json:
sText = oGrammarChecker.generateParagraphAsJSON(i, sText, bContext=xArgs.context, bEmptyIfNoErrors=xArgs.only_when_errors, \
bSpellSugg=xArgs.with_spell_sugg, bReturnText=xArgs.textformatter, lLineSet=lLineSet)
else:
sText = oGrammarChecker.generateParagraph(sText, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSugg=xArgs.with_spell_sugg, nWidth=xArgs.width)
if sText:
if xArgs.json and bComma:
output(",\n", hDst)
output(sText, hDst)
bComma = True
if hDst:
echo("§ %d\r" % i, end="", flush=True)
if xArgs.json:
output("\n]}\n", hDst)
else:
# pseudo-console
sInputText = "\n~==========~ Enter your text [/h /q] ~==========~\n"
sText = _getText(sInputText)
while True:
if sText.startswith("?"):
for sWord in sText[1:].strip().split():
if sWord:
echo("* " + sWord)
for sMorph in oSpellChecker.getMorph(sWord):
echo(" {:<32} {}".format(sMorph, oLexicographer.formatTags(sMorph)))
elif sText.startswith("!"):
for sWord in sText[1:].strip().split():
if sWord:
for lSugg in oSpellChecker.suggest(sWord):
echo(" | ".join(lSugg))
elif sText.startswith(">"):
oSpellChecker.drawPath(sText[1:].strip())
elif sText.startswith("="):
for sRes in oSpellChecker.select(sText[1:].strip()):
echo(sRes)
elif sText.startswith("/+ "):
oGrammarChecker.gce.setOptions({ opt:True for opt in sText[3:].strip().split() if opt in oGrammarChecker.gce.getOptions() })
echo("done")
elif sText.startswith("/- "):
oGrammarChecker.gce.setOptions({ opt:False for opt in sText[3:].strip().split() if opt in oGrammarChecker.gce.getOptions() })
echo("done")
elif sText.startswith("/-- "):
for sRule in sText[3:].strip().split():
oGrammarChecker.gce.ignoreRule(sRule)
echo("done")
elif sText.startswith("/++ "):
for sRule in sText[3:].strip().split():
oGrammarChecker.gce.reactivateRule(sRule)
echo("done")
elif sText == "/debug" or sText == "/d":
xArgs.debug = not(xArgs.debug)
echo("debug mode on" if xArgs.debug else "debug mode off")
elif sText == "/textformatter" or sText == "/tf":
xArgs.textformatter = not(xArgs.textformatter)
echo("textformatter on" if xArgs.debug else "textformatter off")
elif sText == "/help" or sText == "/h":
echo(_HELP)
elif sText == "/lopt" or sText == "/lo":
oGrammarChecker.gce.displayOptions("fr")
elif sText.startswith("/lr"):
sText = sText.strip()
sFilter = sText[sText.find(" "):].strip() if sText != "/lr" and sText != "/rules" else None
oGrammarChecker.gce.displayRules(sFilter)
elif sText == "/quit" or sText == "/q":
break
elif sText.startswith("/rl"):
# reload (todo)
pass
else:
for sParagraph in txt.getParagraph(sText):
if xArgs.textformatter:
sText = oTextFormatter.formatText(sText)
sRes = oGrammarChecker.generateParagraph(sText, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width, bDebug=xArgs.debug)
if sRes:
echo("\n" + sRes)
else:
echo("\nNo error found.")
sText = _getText(sInputText)
if __name__ == '__main__':
main()
|