Grammalecte  Check-in [63b8e1b23a]

Overview
Comment:[core][fr] code cleaning (pylint)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | fr | core
Files: files | file ages | folders
SHA3-256: 63b8e1b23a899bd865b7878b3e6e10e5422ca5dd7fd676e24837c911a7b3c67b
User & Date: olr on 2019-05-12 10:01:25
Other Links: manifest | tags
Context
2019-05-12
10:30
[fr] faux positifs check-in: 414cbe8c5a user: olr tags: trunk, fr
10:01
[core][fr] code cleaning (pylint) check-in: 63b8e1b23a user: olr tags: trunk, fr, core
10:00
[graphspell] code cleaning (pylint) check-in: c2f4d1d4ee user: olr tags: trunk, graphspell
Changes

Modified gc_core/py/lang_core/gc_engine.py from [0d99523db5] to [cb1b186a08].

60
61
62
63
64
65
66
67

68
69
70
71
72
73
74
60
61
62
63
64
65
66

67
68
69
70
71
72
73
74







-
+







    global _sAppContext
    global _dOptions
    global _dOptionsColors
    global _oTokenizer
    try:
        _oSpellChecker = SpellChecker("${lang}", "${dic_main_filename_py}", "${dic_community_filename_py}", "${dic_personal_filename_py}")
        _sAppContext = sContext
        _dOptions = dict(gc_options.getOptions(sContext))   # duplication necessary, to be able to reset to default
        _dOptions = gc_options.getOptions(sContext).copy()   # duplication necessary, to be able to reset to default
        _dOptionsColors = gc_options.getOptionsColors(sContext, sColorType)
        _oTokenizer = _oSpellChecker.getTokenizer()
        _oSpellChecker.activateStorage()
    except:
        traceback.print_exc()


164
165
166
167
168
169
170
171

172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189

190
191
192
193
194
195
196
164
165
166
167
168
169
170

171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188

189
190
191
192
193
194
195
196







-
+

















-
+







def getOptions ():
    "return the dictionary of current options"
    return _dOptions


def getDefaultOptions ():
    "return the dictionary of default options"
    return dict(gc_options.getOptions(_sAppContext))
    return gc_options.getOptions(_sAppContext).copy()


def getOptionsLabels (sLang):
    "return options labels"
    return gc_options.getUI(sLang)


def displayOptions (sLang):
    "display the list of grammar checking options"
    echo("List of options")
    echo("\n".join( [ k+":\t"+str(v)+"\t"+gc_options.getUI(sLang).get(k, ("?", ""))[0]  for k, v  in sorted(_dOptions.items()) ] ))
    echo("")


def resetOptions ():
    "set options to default values"
    global _dOptions
    _dOptions = dict(gc_options.getOptions(_sAppContext))
    _dOptions = getDefaultOptions()


#### Parsing

_zEndOfSentence = re.compile(r'([.?!:;…]\W+(?=[A-ZÉÈÎÔ])|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
_zEndOfParagraph = re.compile(r"\W*$")
1020
1021
1022
1023
1024
1025
1026

1027
1028
1029
1030
1031
1032
1033
1034

1035
1036
1037
1038
1039
1040
1041
1042

1043
1044
1045
1046

1047
1048
1049
1050
1051
1052
1053
1054
1055

1056
1057
1058
1059
1060
1061
1062
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067







+








+








+




+









+







    bResult = any(zPattern.search(sMorph)  for sMorph in lMorph)
    if bResult and bSetMorph:
        dToken1["lMorph"] = lMorph
    return bResult


def g_tag_before (dToken, dTags, sTag):
    "returns True if <sTag> is present on tokens before <dToken>"
    if sTag not in dTags:
        return False
    if dToken["i"] > dTags[sTag][0]:
        return True
    return False


def g_tag_after (dToken, dTags, sTag):
    "returns True if <sTag> is present on tokens after <dToken>"
    if sTag not in dTags:
        return False
    if dToken["i"] < dTags[sTag][1]:
        return True
    return False


def g_tag (dToken, sTag):
    "returns True if <sTag> is present on token <dToken>"
    return "aTags" in dToken and sTag in dToken["aTags"]


def g_space_between_tokens (dToken1, dToken2, nMin, nMax=None):
    "checks if spaces between tokens is >= <nMin> and <= <nMax>"
    nSpace = dToken2["nStart"] - dToken1["nEnd"]
    if nSpace < nMin:
        return False
    if nMax is not None and nSpace > nMax:
        return False
    return True


def g_token (lToken, i):
    "return token at index <i> in lToken (or the closest one)"
    if i < 0:
        return lToken[0]
    if i >= len(lToken):
        return lToken[-1]
    return lToken[i]


1152
1153
1154
1155
1156
1157
1158

1159
1160
1161
1162
1163
1164
1165
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171







+







    "set morphologies of <dToken>, always return True"
    dToken["lMorph"] = lMorph
    #echo("DA:", dToken["sValue"], lMorph)
    return True


def g_define_from (dToken, nLeft=None, nRight=None):
    "set morphologies of <dToken> with slicing its value with <nLeft> and <nRight>"
    if nLeft is not None:
        dToken["lMorph"] = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)])
    else:
        dToken["lMorph"] = _oSpellChecker.getMorph(dToken["sValue"])
    return True


Modified gc_lang/fr/modules/tests.py from [3f65f251a0] to [438adee14a].

20
21
22
23
24
25
26

27
28
29
30
31
32
33
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34







+








def _fuckBackslashUTF8 (s):
    "fuck that shit"
    return s.replace("\u2019", "'").replace("\u2013", "–").replace("\u2014", "—")


class TestDictionary (unittest.TestCase):
    "Test du correcteur orthographique"

    @classmethod
    def setUpClass (cls):
        cls.oDic = IBDAWG("${dic_main_filename_py}")

    def test_lookup (self):
        for sWord in ["branche", "Émilie"]:
43
44
45
46
47
48
49

50
51
52
53
54
55
56
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58







+








    def test_isvalid_failed (self):
        for sWord in ["BranchE", "BRanche", "BRAnCHE", "émilie", "éMILIE", "émiLie"]:
            self.assertFalse(self.oDic.isValid(sWord), sWord)


class TestConjugation (unittest.TestCase):
    "Tests des conjugaisons"

    @classmethod
    def setUpClass (cls):
        pass

    def test_isverb (self):
        for sVerb in ["avoir", "être", "aller", "manger", "courir", "venir", "faire", "finir"]:
66
67
68
69
70
71
72

73
74
75
76
77
78
79
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82







+







    def test_getconj (self):
        for sVerb, sTense, sWho, sConj in [("aller", ":E", ":2s", "va"), ("avoir", ":Iq", ":1s", "avais"), ("être", ":Ip", ":2p", "êtes"),
                                           ("manger", ":Sp", ":3s", "mange"), ("finir", ":K", ":3p", "finiraient"), ("prendre", ":If", ":1p", "prendrons")]:
            self.assertEqual(conj.getConj(sVerb, sTense, sWho), sConj, sVerb)


class TestPhonet (unittest.TestCase):
    "Tests des équivalences phonétiques"

    @classmethod
    def setUpClass (cls):
        cls.lSet = [
            ["ce", "se"],
            ["ces", "ses", "sais", "sait"],
            ["cet", "cette", "sept", "set", "sets"],
102
103
104
105
106
107
108

109
110
111
112
113
114
115
116
117
118
119
120
121
122
123

124
125
126
127
128
129
130
131
132
133

134
135
136
137
138
139
140
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137

138
139
140
141
142
143
144
145







+















+









-
+







    def test_getsimil (self):
        for aSet in self.lSet:
            for sWord in aSet:
                self.assertListEqual(phonet.getSimil(sWord), sorted(aSet))


class TestMasFemSingPlur (unittest.TestCase):
    "Tests des masculins, féminins, singuliers et pluriels"

    @classmethod
    def setUpClass (cls):
        cls.lPlural = [
            ("travail", ["travaux"]),
            ("vœu", ["vœux"]),
            ("gentleman", ["gentlemans", "gentlemen"])
        ]

    def test_getplural (self):
        for sSing, lPlur in self.lPlural:
            self.assertListEqual(mfsp.getMiscPlural(sSing), lPlur)


class TestGrammarChecking (unittest.TestCase):
    "Tests du correcteur grammatical"

    @classmethod
    def setUpClass (cls):
        gce.load()
        cls._zError = re.compile(r"\{\{.*?\}\}")
        cls._aTestedRules = set()

    def test_parse (self):
        zOption = re.compile("^__([a-zA-Z0-9]+)__ ")
        spHere, spfThisFile = os.path.split(__file__)
        spHere, _ = os.path.split(__file__)
        with open(os.path.join(spHere, "gc_test.txt"), "r", encoding="utf-8") as hSrc:
            nError = 0
            for sLine in ( s for s in hSrc if not s.startswith("#") and s.strip() ):
                sLineNum = sLine[:10].strip()
                sLine = sLine[10:].strip()
                sOption = None
                m = zOption.search(sLine)
212
213
214
215
216
217
218

219
220
221
222
223
224
225
226
227
228
229
230
231
232
233

234
235

236
237
238
239
240
241

242
243
244
245
246

247
248
249
250
251
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238

239
240

241
242
243
244
245
246

247
248
249
250
251
252
253
254
255
256
257
258







+














-
+

-
+





-
+





+





            sRes = sRes[:nStart] + "~" * (nEnd - nStart) + sRes[nEnd:-4]
        return sRes


from contextlib import contextmanager
@contextmanager
def timeblock (label, hDst):
    "performance counter (contextmanager)"
    start = time.perf_counter()
    try:
        yield
    finally:
        end = time.perf_counter()
        print('{} : {}'.format(label, end - start))
        if hDst:
            hDst.write("{:<12.6}".format(end-start))


def perf (sVersion, hDst=None):
    "performance tests"
    print("\nPerformance tests")
    gce.load()
    aErrs = gce.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.")
    gce.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.")

    spHere, spfThisFile = os.path.split(__file__)
    spHere, _ = os.path.split(__file__)
    with open(os.path.join(spHere, "perf.txt"), "r", encoding="utf-8") as hSrc:
        if hDst:
            hDst.write("{:<12}{:<20}".format(sVersion, time.strftime("%Y.%m.%d %H:%M")))
        for sText in ( s.strip() for s in hSrc if not s.startswith("#") and s.strip() ):
            with timeblock(sText[:sText.find(".")], hDst):
                aErrs = gce.parse(sText)
                gce.parse(sText)
        if hDst:
            hDst.write("\n")


def main():
    "start function"
    unittest.main()


if __name__ == '__main__':
    main()

Modified gc_lang/fr/modules/textformatter.py from [e20bd3ea84] to [3443a1d35d].

183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243






















































244

245
246
247
248
249
250
251

252
253
254
255
256
257

258
259
260
261




183
184
185
186
187
188
189






















































190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243

244
245
246
247
248
249
250

251
252
253
254
255
256

257
258
259
260
261
262
263
264
265







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+






-
+





-
+




+
+
+
+
    ## missing apostrophes
    "ma_word":                  [("(?i)(qu|lorsqu|puisqu|quoiqu|presqu|jusqu|aujourd|entr|quelqu|prud) ", "\\1’")],
    "ma_1letter_lowercase":     [("\\b([ldjnmtscç]) (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])", "\\1’")],
    "ma_1letter_uppercase":     [("\\b([LDJNMTSCÇ]) (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])", "\\1’")]
}


lOptRepl = [
    ("ts_units", True),
    ("start_of_paragraph", True),
    ("end_of_paragraph", True),
    ("between_words", True),
    ("before_punctuation", True),
    ("within_parenthesis", True),
    ("within_square_brackets", True),
    ("within_quotation_marks", True),
    ("nbsp_before_punctuation", True),
    ("nbsp_within_quotation_marks", True),
    ("nbsp_within_numbers", True),
    ("nnbsp_before_punctuation", False),
    ("nnbsp_within_quotation_marks", False),
    ("nnbsp_within_numbers", False),
    ("nbsp_titles", False),
    ("nbsp_before_symbol", True),
    ("nbsp_before_units", True),
    ("nbsp_repair", True),
    ("add_space_after_punctuation", True),
    ("add_space_around_hyphens", True),
    ("add_space_repair", True),
    ("erase_non_breaking_hyphens", False),
    ("ts_apostrophe", True),
    ("ts_ellipsis", True),
    ("ts_n_dash_middle", True),
    ("ts_m_dash_middle", False),
    ("ts_n_dash_start", False),
    ("ts_m_dash_start", True),
    ("ts_quotation_marks", True),
    ("ts_spell", True),
    ("ts_ligature_ffi_on", False),
    ("ts_ligature_ffl_on", False),
    ("ts_ligature_fi_on", False),
    ("ts_ligature_fl_on", False),
    ("ts_ligature_ff_on", False),
    ("ts_ligature_ft_on", False),
    ("ts_ligature_st_on", False),
    ("ts_ligature_fi_off", False),
    ("ts_ligature_fl_off", False),
    ("ts_ligature_ff_off", False),
    ("ts_ligature_ffi_off", False),
    ("ts_ligature_ffl_off", False),
    ("ts_ligature_ft_off", False),
    ("ts_ligature_st_off", False),
    ("ordinals_exponant", False),
    ("ordinals_no_exponant", True),
    ("etc", True),
    ("mh_interrogatives", True),
    ("mh_numbers", True),
    ("mh_frequent_words", True),
    ("ma_word", True),
    ("ma_1letter_lowercase", False),
    ("ma_1letter_uppercase", False),
dDefaultOptions = {
    "ts_units": True,
    "start_of_paragraph": True,
    "end_of_paragraph": True,
    "between_words": True,
    "before_punctuation": True,
    "within_parenthesis": True,
    "within_square_brackets": True,
    "within_quotation_marks": True,
    "nbsp_before_punctuation": True,
    "nbsp_within_quotation_marks": True,
    "nbsp_within_numbers": True,
    "nnbsp_before_punctuation": False,
    "nnbsp_within_quotation_marks": False,
    "nnbsp_within_numbers": False,
    "nbsp_titles": False,
    "nbsp_before_symbol": True,
    "nbsp_before_units": True,
    "nbsp_repair": True,
    "add_space_after_punctuation": True,
    "add_space_around_hyphens": True,
    "add_space_repair": True,
    "erase_non_breaking_hyphens": False,
    "ts_apostrophe": True,
    "ts_ellipsis": True,
    "ts_n_dash_middle": True,
    "ts_m_dash_middle": False,
    "ts_n_dash_start": False,
    "ts_m_dash_start": True,
    "ts_quotation_marks": True,
    "ts_spell": True,
    "ts_ligature_ffi_on": False,
    "ts_ligature_ffl_on": False,
    "ts_ligature_fi_on": False,
    "ts_ligature_fl_on": False,
    "ts_ligature_ff_on": False,
    "ts_ligature_ft_on": False,
    "ts_ligature_st_on": False,
    "ts_ligature_fi_off": False,
    "ts_ligature_fl_off": False,
    "ts_ligature_ff_off": False,
    "ts_ligature_ffi_off": False,
    "ts_ligature_ffl_off": False,
    "ts_ligature_ft_off": False,
    "ts_ligature_st_off": False,
    "ordinals_exponant": False,
    "ordinals_no_exponant": True,
    "etc": True,
    "mh_interrogatives": True,
    "mh_numbers": True,
    "mh_frequent_words": True,
    "ma_word": True,
    "ma_1letter_lowercase": False,
    "ma_1letter_uppercase": False
]
}


class TextFormatter:
    "Text Formatter: purge typographic mistakes from text"

    def __init__ (self):
        for sOpt, lTup in dReplTable.items():
        for _, lTup in dReplTable.items():
            for i, t in enumerate(lTup):
                lTup[i] = (re.compile(t[0]), t[1])

    def formatText (self, sText):
        "returns formatted text"
        for sOptName, bVal in lOptRepl:
        for sOptName, bVal in dDefaultOptions.items():
            if bVal:
                for zRgx, sRep in dReplTable[sOptName]:
                    sText = zRgx.sub(sRep, sText)
        return sText

    def getDefaultOptions (self):
        "returns default options"
        return dDefaultOptions.copy()

Modified pylintrc from [273c5fc7d5] to [c576979eec].

230
231
232
233
234
235
236
237

238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
230
231
232
233
234
235
236

237
238
239
240
241
242
243
244
245
246
247

248
249
250
251
252
253
254







-
+










-







docstring-min-length=-1

# Naming style matching correct function names
function-naming-style=camelCase

# Regular expression matching correct function names. Overrides function-
# naming-style
function-rgx=^[a-z]\w+|^_*
function-rgx=^[a-z]\w+|^_

# Good variable names which should always be accepted, separated by a comma
good-names=i,
           i1,
           i2,
           j,
           k,
           s,
           s1,
           s2,
           sx,
           sf,
           sp,
           spf,
           c,
           c1,
           c2,
           n,
281
282
283
284
285
286
287
288

289
290
291
292
293
294
295
280
281
282
283
284
285
286

287
288
289
290
291
292
293
294







-
+







#inlinevar-rgx=

# Naming style matching correct method names
method-naming-style=camelCase

# Regular expression matching correct method names. Overrides method-naming-
# style
#method-rgx=
#method-rgx=^test_

# Naming style matching correct module names
module-naming-style=snake_case

# Regular expression matching correct module names. Overrides module-naming-
# style
#module-rgx=
329
330
331
332
333
334
335
336

337
338
339
340
341
342
343
328
329
330
331
332
333
334

335
336
337
338
339
340
341
342







-
+







# tab).
indent-string='    '

# Maximum number of characters on a single line.
max-line-length=200

# Maximum number of lines in a module
max-module-lines=1000
max-module-lines=5000

# List of optional constructs for which whitespace checking is disabled. `dict-
# separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
# `trailing-comma` allows a space between comma and closing bracket: (a, ).
# `empty-line` allows space-only lines.
no-space-check=trailing-comma,
               dict-separator
488
489
490
491
492
493
494
495




496
497
498
499
500
501
502
487
488
489
490
491
492
493

494
495
496
497
498
499
500
501
502
503
504







-
+
+
+
+








# List of member names, which should be excluded from the protected access
# warning.
exclude-protected=_asdict,
                  _fields,
                  _replace,
                  _source,
                  _make
                  _make,
                  _getTags,
                  _hasConjWithTags,
                  _getConjWithTags

# List of valid names for the first argument in a class method.
valid-classmethod-first-arg=cls

# List of valid names for the first argument in a metaclass class method.
valid-metaclass-classmethod-first-arg=mcs