Overview
Comment: | [graphspell][core][fr] code cleaning (pylint) |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | fr | core | graphspell |
Files: | files | file ages | folders |
SHA3-256: |
c65b7e2b8b4b1507adbe01cd765e64b1 |
User & Date: | olr on 2019-05-15 11:55:44 |
Other Links: | manifest | tags |
Context
2019-05-15
| ||
16:12 | [fx] position and size of panels check-in: eb18e7fd4b user: olr tags: trunk, fx | |
11:55 | [graphspell][core][fr] code cleaning (pylint) check-in: c65b7e2b8b user: olr tags: trunk, fr, core, graphspell | |
10:47 | [graphspell] import dictionary: include lang code in error message check-in: 2be0562a74 user: olr tags: trunk, graphspell | |
Changes
Modified gc_core/py/lang_core/__init__.py from [0ee899897d] to [90d4c538b4].
1 2 | from .gc_engine import * | > > > | 1 2 3 4 5 | """ Grammalecte - core grammar checker engine """ from .gc_engine import * |
Modified gc_core/py/lang_core/gc_engine.py from [2f0b34979e] to [b546c7e179].
︙ | ︙ | |||
550 551 552 553 554 555 556 | globals()[sWhat](self.lToken, nTokenOffset, nLastToken) if bDebug: echo(" DISAMBIGUATOR: ({}) [{}:{}]".format(sWhat, self.lToken[nTokenOffset+1]["sValue"], self.lToken[nLastToken]["sValue"])) elif cActionType == ">": # we do nothing, this test is just a condition to apply all following actions if bDebug: echo(" COND_OK") | < | 550 551 552 553 554 555 556 557 558 559 560 561 562 563 | globals()[sWhat](self.lToken, nTokenOffset, nLastToken) if bDebug: echo(" DISAMBIGUATOR: ({}) [{}:{}]".format(sWhat, self.lToken[nTokenOffset+1]["sValue"], self.lToken[nLastToken]["sValue"])) elif cActionType == ">": # we do nothing, this test is just a condition to apply all following actions if bDebug: echo(" COND_OK") elif cActionType == "/": # Tag nTokenStart = nTokenOffset + eAct[0] if eAct[0] > 0 else nLastToken + eAct[0] nTokenEnd = nTokenOffset + eAct[1] if eAct[1] > 0 else nLastToken + eAct[1] for i in range(nTokenStart, nTokenEnd+1): if "aTags" in self.lToken[i]: self.lToken[i]["aTags"].update(sWhat.split("|")) |
︙ | ︙ |
Modified gc_lang/fr/modules/tests.py from [438adee14a] to [4d8492a2e0].
1 2 3 4 5 6 7 8 9 10 | #! python3 """ Grammar checker tests for French language """ import unittest import os import re import time | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | #! python3 """ Grammar checker tests for French language """ import unittest import os import re import time from contextlib import contextmanager from ..graphspell.ibdawg import IBDAWG from ..graphspell.echo import echo from . import gc_engine as gce from . import conj from . import phonet from . import mfsp @contextmanager def timeblock (label, hDst): "performance counter (contextmanager)" start = time.perf_counter() try: yield finally: end = time.perf_counter() print('{} : {}'.format(label, end - start)) if hDst: hDst.write("{:<12.6}".format(end-start)) def perf (sVersion, hDst=None): "performance tests" print("\nPerformance tests") gce.load() gce.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.") spHere, _ = os.path.split(__file__) with open(os.path.join(spHere, "perf.txt"), "r", encoding="utf-8") as hSrc: if hDst: hDst.write("{:<12}{:<20}".format(sVersion, time.strftime("%Y.%m.%d %H:%M"))) for sText in ( s.strip() for s in hSrc if not s.startswith("#") and s.strip() ): with timeblock(sText[:sText.find(".")], hDst): gce.parse(sText) if hDst: hDst.write("\n") def _fuckBackslashUTF8 (s): "fuck that shit" return s.replace("\u2019", "'").replace("\u2013", "–").replace("\u2014", "—") class TestDictionary (unittest.TestCase): |
︙ | ︙ | |||
213 214 215 216 217 218 219 | sRes = " " * len(sLine) for i, m in enumerate(self._zError.finditer(sLine)): nStart = m.start() - (4 * i) nEnd = m.end() - (4 * (i+1)) sRes = sRes[:nStart] + "~" * (nEnd - nStart) + sRes[nEnd:-4] return sRes | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 | sRes = " " * len(sLine) for i, m in enumerate(self._zError.finditer(sLine)): nStart = m.start() - (4 * i) nEnd = m.end() - (4 * (i+1)) sRes = sRes[:nStart] + "~" * (nEnd - nStart) + sRes[nEnd:-4] return sRes def main(): "start function" unittest.main() if __name__ == '__main__': main() |
Modified graphspell/ibdawg.py from [2ca6102d5a] to [0b64fb2804].
︙ | ︙ | |||
186 187 188 189 190 191 192 193 194 195 | for i in range(1, self.nChar+1): self.dChar[self.lArcVal[i]] = i self.dCharVal = { v: k for k, v in self.dChar.items() } self.nBytesOffset = 1 # version 3 def _initJSON (self, oJSON): "initialize with a JSON text file" self.__dict__.update(oJSON) self.byDic = binascii.unhexlify(self.sByDic) self.dCharVal = { v: k for k, v in self.dChar.items() } | > | | 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 | for i in range(1, self.nChar+1): self.dChar[self.lArcVal[i]] = i self.dCharVal = { v: k for k, v in self.dChar.items() } self.nBytesOffset = 1 # version 3 def _initJSON (self, oJSON): "initialize with a JSON text file" self.sByDic = "" # init to prevent pylint whining self.__dict__.update(oJSON) self.byDic = binascii.unhexlify(self.sByDic) self.dCharVal = { v: k for k, v in self.dChar.items() } self.a2grams = set(getattr(self, 'l2grams')) if hasattr(self, 'l2grams') else None def getInfo (self): "return string about the IBDAWG" return " Language: {0.sLangName} Lang code: {0.sLangCode} Dictionary name: {0.sDicName}" \ " Compression method: {0.nCompressionMethod:>2} Date: {0.sDate} Stemming: {0.cStemming}FX\n" \ " Arcs values: {0.nArcVal:>10,} = {0.nChar:>5,} characters, {0.nAff:>6,} affixes, {0.nTag:>6,} tags\n" \ " Dictionary: {0.nEntry:>12,} entries, {0.nNode:>11,} nodes, {0.nArc:>11,} arcs\n" \ |
︙ | ︙ |
Modified graphspell/tokenizer.py from [08b2581ffe] to [7d6a173497].
︙ | ︙ | |||
56 57 58 59 60 61 62 | if bStartEndToken: yield { "i": 0, "sType": "INFO", "sValue": "<start>", "nStart": 0, "nEnd": 0, "lMorph": ["<start>"] } for i, m in enumerate(self.zToken.finditer(sText), 1): yield { "i": i, "sType": m.lastgroup, "sValue": m.group(), "nStart": m.start(), "nEnd": m.end() } if bStartEndToken: iEnd = len(sText) yield { "i": i+1, "sType": "INFO", "sValue": "<end>", "nStart": iEnd, "nEnd": iEnd, "lMorph": ["<end>"] } | > > > > | 56 57 58 59 60 61 62 63 64 65 66 | if bStartEndToken: yield { "i": 0, "sType": "INFO", "sValue": "<start>", "nStart": 0, "nEnd": 0, "lMorph": ["<start>"] } for i, m in enumerate(self.zToken.finditer(sText), 1): yield { "i": i, "sType": m.lastgroup, "sValue": m.group(), "nStart": m.start(), "nEnd": m.end() } if bStartEndToken: iEnd = len(sText) yield { "i": i+1, "sType": "INFO", "sValue": "<end>", "nStart": iEnd, "nEnd": iEnd, "lMorph": ["<end>"] } def getTokenTypes (self): "returns list of token types as tuple (token name, regex)" return [ sRegex[4:-1].split(">") for sRegex in _PATTERNS[self.sLang] ] |
Modified pylintrc from [75525a4a8c] to [032bb6abd3].
︙ | ︙ | |||
280 281 282 283 284 285 286 | #inlinevar-rgx= # Naming style matching correct method names method-naming-style=camelCase # Regular expression matching correct method names. Overrides method-naming- # style | | | 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 | #inlinevar-rgx= # Naming style matching correct method names method-naming-style=camelCase # Regular expression matching correct method names. Overrides method-naming- # style method-rgx=^test_|^_|^[a-zA-Z][a-zA-Z0-9]+$ # Naming style matching correct module names module-naming-style=snake_case # Regular expression matching correct module names. Overrides module-naming- # style #module-rgx= |
︙ | ︙ |