Overview
| Comment: | [graphspell][core][fr] code cleaning (pylint) |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | fr | core | graphspell |
| Files: | files | file ages | folders |
| SHA3-256: |
c65b7e2b8b4b1507adbe01cd765e64b1 |
| User & Date: | olr on 2019-05-15 11:55:44 |
| Other Links: | manifest | tags |
Context
|
2019-05-15
| ||
| 16:12 | [fx] position and size of panels check-in: eb18e7fd4b user: olr tags: trunk, fx | |
| 11:55 | [graphspell][core][fr] code cleaning (pylint) check-in: c65b7e2b8b user: olr tags: trunk, fr, core, graphspell | |
| 10:47 | [graphspell] import dictionary: include lang code in error message check-in: 2be0562a74 user: olr tags: trunk, graphspell | |
Changes
Modified gc_core/py/lang_core/__init__.py from [0ee899897d] to [90d4c538b4].
1 2 | from .gc_engine import * | > > > | 1 2 3 4 5 | """ Grammalecte - core grammar checker engine """ from .gc_engine import * |
Modified gc_core/py/lang_core/gc_engine.py from [2f0b34979e] to [b546c7e179].
| ︙ | ︙ | |||
550 551 552 553 554 555 556 |
globals()[sWhat](self.lToken, nTokenOffset, nLastToken)
if bDebug:
echo(" DISAMBIGUATOR: ({}) [{}:{}]".format(sWhat, self.lToken[nTokenOffset+1]["sValue"], self.lToken[nLastToken]["sValue"]))
elif cActionType == ">":
# we do nothing, this test is just a condition to apply all following actions
if bDebug:
echo(" COND_OK")
| < | 550 551 552 553 554 555 556 557 558 559 560 561 562 563 |
globals()[sWhat](self.lToken, nTokenOffset, nLastToken)
if bDebug:
echo(" DISAMBIGUATOR: ({}) [{}:{}]".format(sWhat, self.lToken[nTokenOffset+1]["sValue"], self.lToken[nLastToken]["sValue"]))
elif cActionType == ">":
# we do nothing, this test is just a condition to apply all following actions
if bDebug:
echo(" COND_OK")
elif cActionType == "/":
# Tag
nTokenStart = nTokenOffset + eAct[0] if eAct[0] > 0 else nLastToken + eAct[0]
nTokenEnd = nTokenOffset + eAct[1] if eAct[1] > 0 else nLastToken + eAct[1]
for i in range(nTokenStart, nTokenEnd+1):
if "aTags" in self.lToken[i]:
self.lToken[i]["aTags"].update(sWhat.split("|"))
|
| ︙ | ︙ |
Modified gc_lang/fr/modules/tests.py from [438adee14a] to [4d8492a2e0].
1 2 3 4 5 6 7 8 9 10 | #! python3 """ Grammar checker tests for French language """ import unittest import os import re import time | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
#! python3
"""
Grammar checker tests for French language
"""
import unittest
import os
import re
import time
from contextlib import contextmanager
from ..graphspell.ibdawg import IBDAWG
from ..graphspell.echo import echo
from . import gc_engine as gce
from . import conj
from . import phonet
from . import mfsp
@contextmanager
def timeblock (label, hDst):
"performance counter (contextmanager)"
start = time.perf_counter()
try:
yield
finally:
end = time.perf_counter()
print('{} : {}'.format(label, end - start))
if hDst:
hDst.write("{:<12.6}".format(end-start))
def perf (sVersion, hDst=None):
"performance tests"
print("\nPerformance tests")
gce.load()
gce.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.")
spHere, _ = os.path.split(__file__)
with open(os.path.join(spHere, "perf.txt"), "r", encoding="utf-8") as hSrc:
if hDst:
hDst.write("{:<12}{:<20}".format(sVersion, time.strftime("%Y.%m.%d %H:%M")))
for sText in ( s.strip() for s in hSrc if not s.startswith("#") and s.strip() ):
with timeblock(sText[:sText.find(".")], hDst):
gce.parse(sText)
if hDst:
hDst.write("\n")
def _fuckBackslashUTF8 (s):
"fuck that shit"
return s.replace("\u2019", "'").replace("\u2013", "–").replace("\u2014", "—")
class TestDictionary (unittest.TestCase):
|
| ︙ | ︙ | |||
213 214 215 216 217 218 219 |
sRes = " " * len(sLine)
for i, m in enumerate(self._zError.finditer(sLine)):
nStart = m.start() - (4 * i)
nEnd = m.end() - (4 * (i+1))
sRes = sRes[:nStart] + "~" * (nEnd - nStart) + sRes[nEnd:-4]
return sRes
| < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 |
sRes = " " * len(sLine)
for i, m in enumerate(self._zError.finditer(sLine)):
nStart = m.start() - (4 * i)
nEnd = m.end() - (4 * (i+1))
sRes = sRes[:nStart] + "~" * (nEnd - nStart) + sRes[nEnd:-4]
return sRes
def main():
"start function"
unittest.main()
if __name__ == '__main__':
main()
|
Modified graphspell/ibdawg.py from [2ca6102d5a] to [0b64fb2804].
| ︙ | ︙ | |||
186 187 188 189 190 191 192 193 194 195 |
for i in range(1, self.nChar+1):
self.dChar[self.lArcVal[i]] = i
self.dCharVal = { v: k for k, v in self.dChar.items() }
self.nBytesOffset = 1 # version 3
def _initJSON (self, oJSON):
"initialize with a JSON text file"
self.__dict__.update(oJSON)
self.byDic = binascii.unhexlify(self.sByDic)
self.dCharVal = { v: k for k, v in self.dChar.items() }
| > | | 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
for i in range(1, self.nChar+1):
self.dChar[self.lArcVal[i]] = i
self.dCharVal = { v: k for k, v in self.dChar.items() }
self.nBytesOffset = 1 # version 3
def _initJSON (self, oJSON):
"initialize with a JSON text file"
self.sByDic = "" # init to prevent pylint whining
self.__dict__.update(oJSON)
self.byDic = binascii.unhexlify(self.sByDic)
self.dCharVal = { v: k for k, v in self.dChar.items() }
self.a2grams = set(getattr(self, 'l2grams')) if hasattr(self, 'l2grams') else None
def getInfo (self):
"return string about the IBDAWG"
return " Language: {0.sLangName} Lang code: {0.sLangCode} Dictionary name: {0.sDicName}" \
" Compression method: {0.nCompressionMethod:>2} Date: {0.sDate} Stemming: {0.cStemming}FX\n" \
" Arcs values: {0.nArcVal:>10,} = {0.nChar:>5,} characters, {0.nAff:>6,} affixes, {0.nTag:>6,} tags\n" \
" Dictionary: {0.nEntry:>12,} entries, {0.nNode:>11,} nodes, {0.nArc:>11,} arcs\n" \
|
| ︙ | ︙ |
Modified graphspell/tokenizer.py from [08b2581ffe] to [7d6a173497].
| ︙ | ︙ | |||
56 57 58 59 60 61 62 |
if bStartEndToken:
yield { "i": 0, "sType": "INFO", "sValue": "<start>", "nStart": 0, "nEnd": 0, "lMorph": ["<start>"] }
for i, m in enumerate(self.zToken.finditer(sText), 1):
yield { "i": i, "sType": m.lastgroup, "sValue": m.group(), "nStart": m.start(), "nEnd": m.end() }
if bStartEndToken:
iEnd = len(sText)
yield { "i": i+1, "sType": "INFO", "sValue": "<end>", "nStart": iEnd, "nEnd": iEnd, "lMorph": ["<end>"] }
| > > > > | 56 57 58 59 60 61 62 63 64 65 66 |
if bStartEndToken:
yield { "i": 0, "sType": "INFO", "sValue": "<start>", "nStart": 0, "nEnd": 0, "lMorph": ["<start>"] }
for i, m in enumerate(self.zToken.finditer(sText), 1):
yield { "i": i, "sType": m.lastgroup, "sValue": m.group(), "nStart": m.start(), "nEnd": m.end() }
if bStartEndToken:
iEnd = len(sText)
yield { "i": i+1, "sType": "INFO", "sValue": "<end>", "nStart": iEnd, "nEnd": iEnd, "lMorph": ["<end>"] }
def getTokenTypes (self):
"returns list of token types as tuple (token name, regex)"
return [ sRegex[4:-1].split(">") for sRegex in _PATTERNS[self.sLang] ]
|
Modified pylintrc from [75525a4a8c] to [032bb6abd3].
| ︙ | ︙ | |||
280 281 282 283 284 285 286 | #inlinevar-rgx= # Naming style matching correct method names method-naming-style=camelCase # Regular expression matching correct method names. Overrides method-naming- # style | | | 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 | #inlinevar-rgx= # Naming style matching correct method names method-naming-style=camelCase # Regular expression matching correct method names. Overrides method-naming- # style method-rgx=^test_|^_|^[a-zA-Z][a-zA-Z0-9]+$ # Naming style matching correct module names module-naming-style=snake_case # Regular expression matching correct module names. Overrides module-naming- # style #module-rgx= |
| ︙ | ︙ |