Grammalecte  Check-in [c65b7e2b8b]

Overview
Comment:[graphspell][core][fr] code cleaning (pylint)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | fr | core | graphspell
Files: files | file ages | folders
SHA3-256: c65b7e2b8b4b1507adbe01cd765e64b1b7329abb805b2d1c4df3e753ecbb5d1f
User & Date: olr on 2019-05-15 11:55:44
Other Links: manifest | tags
Context
2019-05-15
16:12
[fx] position and size of panels check-in: eb18e7fd4b user: olr tags: trunk, fx
11:55
[graphspell][core][fr] code cleaning (pylint) check-in: c65b7e2b8b user: olr tags: trunk, fr, core, graphspell
10:47
[graphspell] import dictionary: include lang code in error message check-in: 2be0562a74 user: olr tags: trunk, graphspell
Changes

Modified gc_core/py/lang_core/__init__.py from [0ee899897d] to [90d4c538b4].




1
2
1
2
3
4
5
+
+
+


"""
Grammalecte - core grammar checker engine
"""

from .gc_engine import *

Modified gc_core/py/lang_core/gc_engine.py from [2f0b34979e] to [b546c7e179].

550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
550
551
552
553
554
555
556

557
558
559
560
561
562
563







-







                                globals()[sWhat](self.lToken, nTokenOffset, nLastToken)
                                if bDebug:
                                    echo("    DISAMBIGUATOR: ({})  [{}:{}]".format(sWhat, self.lToken[nTokenOffset+1]["sValue"], self.lToken[nLastToken]["sValue"]))
                            elif cActionType == ">":
                                # we do nothing, this test is just a condition to apply all following actions
                                if bDebug:
                                    echo("    COND_OK")
                                pass
                            elif cActionType == "/":
                                # Tag
                                nTokenStart = nTokenOffset + eAct[0]  if eAct[0] > 0  else nLastToken + eAct[0]
                                nTokenEnd = nTokenOffset + eAct[1]  if eAct[1] > 0  else nLastToken + eAct[1]
                                for i in range(nTokenStart, nTokenEnd+1):
                                    if "aTags" in self.lToken[i]:
                                        self.lToken[i]["aTags"].update(sWhat.split("|"))

Modified gc_lang/fr/modules/tests.py from [438adee14a] to [4d8492a2e0].

1
2
3
4
5
6
7
8
9
10
11

12
13
14
15
16
17
18
19






























20
21
22
23
24
25
26
1
2
3
4
5
6
7
8
9
10

11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56










-
+








+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







#! python3

"""
Grammar checker tests for French language
"""

import unittest
import os
import re
import time

from contextlib import contextmanager

from ..graphspell.ibdawg import IBDAWG
from ..graphspell.echo import echo
from . import gc_engine as gce
from . import conj
from . import phonet
from . import mfsp


@contextmanager
def timeblock (label, hDst):
    "performance counter (contextmanager)"
    start = time.perf_counter()
    try:
        yield
    finally:
        end = time.perf_counter()
        print('{} : {}'.format(label, end - start))
        if hDst:
            hDst.write("{:<12.6}".format(end-start))


def perf (sVersion, hDst=None):
    "performance tests"
    print("\nPerformance tests")
    gce.load()
    gce.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.")

    spHere, _ = os.path.split(__file__)
    with open(os.path.join(spHere, "perf.txt"), "r", encoding="utf-8") as hSrc:
        if hDst:
            hDst.write("{:<12}{:<20}".format(sVersion, time.strftime("%Y.%m.%d %H:%M")))
        for sText in ( s.strip() for s in hSrc if not s.startswith("#") and s.strip() ):
            with timeblock(sText[:sText.find(".")], hDst):
                gce.parse(sText)
        if hDst:
            hDst.write("\n")


def _fuckBackslashUTF8 (s):
    "fuck that shit"
    return s.replace("\u2019", "'").replace("\u2013", "–").replace("\u2014", "—")


class TestDictionary (unittest.TestCase):
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
243
244
245
246
247
248
249































250
251
252
253
254
255
256
257







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-








        sRes = " " * len(sLine)
        for i, m in enumerate(self._zError.finditer(sLine)):
            nStart = m.start() - (4 * i)
            nEnd = m.end() - (4 * (i+1))
            sRes = sRes[:nStart] + "~" * (nEnd - nStart) + sRes[nEnd:-4]
        return sRes


from contextlib import contextmanager
@contextmanager
def timeblock (label, hDst):
    "performance counter (contextmanager)"
    start = time.perf_counter()
    try:
        yield
    finally:
        end = time.perf_counter()
        print('{} : {}'.format(label, end - start))
        if hDst:
            hDst.write("{:<12.6}".format(end-start))


def perf (sVersion, hDst=None):
    "performance tests"
    print("\nPerformance tests")
    gce.load()
    gce.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.")

    spHere, _ = os.path.split(__file__)
    with open(os.path.join(spHere, "perf.txt"), "r", encoding="utf-8") as hSrc:
        if hDst:
            hDst.write("{:<12}{:<20}".format(sVersion, time.strftime("%Y.%m.%d %H:%M")))
        for sText in ( s.strip() for s in hSrc if not s.startswith("#") and s.strip() ):
            with timeblock(sText[:sText.find(".")], hDst):
                gce.parse(sText)
        if hDst:
            hDst.write("\n")


def main():
    "start function"
    unittest.main()


if __name__ == '__main__':
    main()

Modified graphspell/ibdawg.py from [2ca6102d5a] to [0b64fb2804].

186
187
188
189
190
191
192

193
194
195
196

197
198
199
200
201
202
203
186
187
188
189
190
191
192
193
194
195
196

197
198
199
200
201
202
203
204







+



-
+







        for i in range(1, self.nChar+1):
            self.dChar[self.lArcVal[i]] = i
        self.dCharVal = { v: k  for k, v in self.dChar.items() }
        self.nBytesOffset = 1 # version 3

    def _initJSON (self, oJSON):
        "initialize with a JSON text file"
        self.sByDic = ""  # init to prevent pylint whining
        self.__dict__.update(oJSON)
        self.byDic = binascii.unhexlify(self.sByDic)
        self.dCharVal = { v: k  for k, v in self.dChar.items() }
        self.a2grams = set(self.l2grams)  if hasattr(self, 'l2grams')  else None
        self.a2grams = set(getattr(self, 'l2grams'))  if hasattr(self, 'l2grams')  else None

    def getInfo (self):
        "return string about the IBDAWG"
        return  "  Language: {0.sLangName}   Lang code: {0.sLangCode}   Dictionary name: {0.sDicName}" \
                "  Compression method: {0.nCompressionMethod:>2}   Date: {0.sDate}   Stemming: {0.cStemming}FX\n" \
                "  Arcs values:  {0.nArcVal:>10,} = {0.nChar:>5,} characters,  {0.nAff:>6,} affixes,  {0.nTag:>6,} tags\n" \
                "  Dictionary: {0.nEntry:>12,} entries,    {0.nNode:>11,} nodes,   {0.nArc:>11,} arcs\n" \

Modified graphspell/tokenizer.py from [08b2581ffe] to [7d6a173497].

56
57
58
59
60
61
62




56
57
58
59
60
61
62
63
64
65
66







+
+
+
+
        if bStartEndToken:
            yield { "i": 0, "sType": "INFO", "sValue": "<start>", "nStart": 0, "nEnd": 0, "lMorph": ["<start>"] }
        for i, m in enumerate(self.zToken.finditer(sText), 1):
            yield { "i": i, "sType": m.lastgroup, "sValue": m.group(), "nStart": m.start(), "nEnd": m.end() }
        if bStartEndToken:
            iEnd = len(sText)
            yield { "i": i+1, "sType": "INFO", "sValue": "<end>", "nStart": iEnd, "nEnd": iEnd, "lMorph": ["<end>"] }

    def getTokenTypes (self):
        "returns list of token types as tuple (token name, regex)"
        return [ sRegex[4:-1].split(">")  for sRegex in _PATTERNS[self.sLang] ]

Modified pylintrc from [75525a4a8c] to [032bb6abd3].

280
281
282
283
284
285
286
287

288
289
290
291
292
293
294
280
281
282
283
284
285
286

287
288
289
290
291
292
293
294







-
+







#inlinevar-rgx=

# Naming style matching correct method names
method-naming-style=camelCase

# Regular expression matching correct method names. Overrides method-naming-
# style
#method-rgx=^test_
method-rgx=^test_|^_|^[a-zA-Z][a-zA-Z0-9]+$

# Naming style matching correct module names
module-naming-style=snake_case

# Regular expression matching correct module names. Overrides module-naming-
# style
#module-rgx=