Index: gc_core/py/lang_core/__init__.py ================================================================== --- gc_core/py/lang_core/__init__.py +++ gc_core/py/lang_core/__init__.py @@ -1,2 +1,5 @@ +""" +Grammalecte - core grammar checker engine +""" from .gc_engine import * Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -552,11 +552,10 @@ echo(" DISAMBIGUATOR: ({}) [{}:{}]".format(sWhat, self.lToken[nTokenOffset+1]["sValue"], self.lToken[nLastToken]["sValue"])) elif cActionType == ">": # we do nothing, this test is just a condition to apply all following actions if bDebug: echo(" COND_OK") - pass elif cActionType == "/": # Tag nTokenStart = nTokenOffset + eAct[0] if eAct[0] > 0 else nLastToken + eAct[0] nTokenEnd = nTokenOffset + eAct[1] if eAct[1] > 0 else nLastToken + eAct[1] for i in range(nTokenStart, nTokenEnd+1): Index: gc_lang/fr/modules/tests.py ================================================================== --- gc_lang/fr/modules/tests.py +++ gc_lang/fr/modules/tests.py @@ -6,19 +6,49 @@ import unittest import os import re import time - +from contextlib import contextmanager from ..graphspell.ibdawg import IBDAWG from ..graphspell.echo import echo from . import gc_engine as gce from . import conj from . import phonet from . import mfsp + +@contextmanager +def timeblock (label, hDst): + "performance counter (contextmanager)" + start = time.perf_counter() + try: + yield + finally: + end = time.perf_counter() + print('{} : {}'.format(label, end - start)) + if hDst: + hDst.write("{:<12.6}".format(end-start)) + + +def perf (sVersion, hDst=None): + "performance tests" + print("\nPerformance tests") + gce.load() + gce.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.") + + spHere, _ = os.path.split(__file__) + with open(os.path.join(spHere, "perf.txt"), "r", encoding="utf-8") as hSrc: + if hDst: + hDst.write("{:<12}{:<20}".format(sVersion, time.strftime("%Y.%m.%d %H:%M"))) + for sText in ( s.strip() for s in hSrc if not s.startswith("#") and s.strip() ): + with timeblock(sText[:sText.find(".")], hDst): + gce.parse(sText) + if hDst: + hDst.write("\n") + def _fuckBackslashUTF8 (s): "fuck that shit" return s.replace("\u2019", "'").replace("\u2013", "–").replace("\u2014", "—") @@ -215,44 +245,13 @@ nStart = m.start() - (4 * i) nEnd = m.end() - (4 * (i+1)) sRes = sRes[:nStart] + "~" * (nEnd - nStart) + sRes[nEnd:-4] return sRes - -from contextlib import contextmanager -@contextmanager -def timeblock (label, hDst): - "performance counter (contextmanager)" - start = time.perf_counter() - try: - yield - finally: - end = time.perf_counter() - print('{} : {}'.format(label, end - start)) - if hDst: - hDst.write("{:<12.6}".format(end-start)) - - -def perf (sVersion, hDst=None): - "performance tests" - print("\nPerformance tests") - gce.load() - gce.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.") - - spHere, _ = os.path.split(__file__) - with open(os.path.join(spHere, "perf.txt"), "r", encoding="utf-8") as hSrc: - if hDst: - hDst.write("{:<12}{:<20}".format(sVersion, time.strftime("%Y.%m.%d %H:%M"))) - for sText in ( s.strip() for s in hSrc if not s.startswith("#") and s.strip() ): - with timeblock(sText[:sText.find(".")], hDst): - gce.parse(sText) - if hDst: - hDst.write("\n") - def main(): "start function" unittest.main() if __name__ == '__main__': main() Index: graphspell/ibdawg.py ================================================================== --- graphspell/ibdawg.py +++ graphspell/ibdawg.py @@ -188,14 +188,15 @@ self.dCharVal = { v: k for k, v in self.dChar.items() } self.nBytesOffset = 1 # version 3 def _initJSON (self, oJSON): "initialize with a JSON text file" + self.sByDic = "" # init to prevent pylint whining self.__dict__.update(oJSON) self.byDic = binascii.unhexlify(self.sByDic) self.dCharVal = { v: k for k, v in self.dChar.items() } - self.a2grams = set(self.l2grams) if hasattr(self, 'l2grams') else None + self.a2grams = set(getattr(self, 'l2grams')) if hasattr(self, 'l2grams') else None def getInfo (self): "return string about the IBDAWG" return " Language: {0.sLangName} Lang code: {0.sLangCode} Dictionary name: {0.sDicName}" \ " Compression method: {0.nCompressionMethod:>2} Date: {0.sDate} Stemming: {0.cStemming}FX\n" \ Index: graphspell/tokenizer.py ================================================================== --- graphspell/tokenizer.py +++ graphspell/tokenizer.py @@ -58,5 +58,9 @@ for i, m in enumerate(self.zToken.finditer(sText), 1): yield { "i": i, "sType": m.lastgroup, "sValue": m.group(), "nStart": m.start(), "nEnd": m.end() } if bStartEndToken: iEnd = len(sText) yield { "i": i+1, "sType": "INFO", "sValue": "", "nStart": iEnd, "nEnd": iEnd, "lMorph": [""] } + + def getTokenTypes (self): + "returns list of token types as tuple (token name, regex)" + return [ sRegex[4:-1].split(">") for sRegex in _PATTERNS[self.sLang] ] Index: pylintrc ================================================================== --- pylintrc +++ pylintrc @@ -282,11 +282,11 @@ # Naming style matching correct method names method-naming-style=camelCase # Regular expression matching correct method names. Overrides method-naming- # style -#method-rgx=^test_ +method-rgx=^test_|^_|^[a-zA-Z][a-zA-Z0-9]+$ # Naming style matching correct module names module-naming-style=snake_case # Regular expression matching correct module names. Overrides module-naming-