Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -62,11 +62,11 @@ global _dOptionsColors global _oTokenizer try: _oSpellChecker = SpellChecker("${lang}", "${dic_main_filename_py}", "${dic_community_filename_py}", "${dic_personal_filename_py}") _sAppContext = sContext - _dOptions = dict(gc_options.getOptions(sContext)) # duplication necessary, to be able to reset to default + _dOptions = gc_options.getOptions(sContext).copy() # duplication necessary, to be able to reset to default _dOptionsColors = gc_options.getOptionsColors(sContext, sColorType) _oTokenizer = _oSpellChecker.getTokenizer() _oSpellChecker.activateStorage() except: traceback.print_exc() @@ -166,11 +166,11 @@ return _dOptions def getDefaultOptions (): "return the dictionary of default options" - return dict(gc_options.getOptions(_sAppContext)) + return gc_options.getOptions(_sAppContext).copy() def getOptionsLabels (sLang): "return options labels" return gc_options.getUI(sLang) @@ -184,11 +184,11 @@ def resetOptions (): "set options to default values" global _dOptions - _dOptions = dict(gc_options.getOptions(_sAppContext)) + _dOptions = getDefaultOptions() #### Parsing _zEndOfSentence = re.compile(r'([.?!:;…]\W+(?=[A-ZÉÈÎÔ])|.$)') @@ -1022,39 +1022,44 @@ dToken1["lMorph"] = lMorph return bResult def g_tag_before (dToken, dTags, sTag): + "returns True if is present on tokens before " if sTag not in dTags: return False if dToken["i"] > dTags[sTag][0]: return True return False def g_tag_after (dToken, dTags, sTag): + "returns True if is present on tokens after " if sTag not in dTags: return False if dToken["i"] < dTags[sTag][1]: return True return False def g_tag (dToken, sTag): + "returns True if is present on token " return "aTags" in dToken and sTag in dToken["aTags"] def g_space_between_tokens (dToken1, dToken2, nMin, nMax=None): + "checks if spaces between tokens is >= and <= " nSpace = dToken2["nStart"] - dToken1["nEnd"] if nSpace < nMin: return False if nMax is not None and nSpace > nMax: return False return True def g_token (lToken, i): + "return token at index in lToken (or the closest one)" if i < 0: return lToken[0] if i >= len(lToken): return lToken[-1] return lToken[i] @@ -1154,10 +1159,11 @@ #echo("DA:", dToken["sValue"], lMorph) return True def g_define_from (dToken, nLeft=None, nRight=None): + "set morphologies of with slicing its value with and " if nLeft is not None: dToken["lMorph"] = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)]) else: dToken["lMorph"] = _oSpellChecker.getMorph(dToken["sValue"]) return True Index: gc_lang/fr/modules/tests.py ================================================================== --- gc_lang/fr/modules/tests.py +++ gc_lang/fr/modules/tests.py @@ -22,10 +22,11 @@ "fuck that shit" return s.replace("\u2019", "'").replace("\u2013", "–").replace("\u2014", "—") class TestDictionary (unittest.TestCase): + "Test du correcteur orthographique" @classmethod def setUpClass (cls): cls.oDic = IBDAWG("${dic_main_filename_py}") @@ -45,10 +46,11 @@ for sWord in ["BranchE", "BRanche", "BRAnCHE", "émilie", "éMILIE", "émiLie"]: self.assertFalse(self.oDic.isValid(sWord), sWord) class TestConjugation (unittest.TestCase): + "Tests des conjugaisons" @classmethod def setUpClass (cls): pass @@ -68,10 +70,11 @@ ("manger", ":Sp", ":3s", "mange"), ("finir", ":K", ":3p", "finiraient"), ("prendre", ":If", ":1p", "prendrons")]: self.assertEqual(conj.getConj(sVerb, sTense, sWho), sConj, sVerb) class TestPhonet (unittest.TestCase): + "Tests des équivalences phonétiques" @classmethod def setUpClass (cls): cls.lSet = [ ["ce", "se"], @@ -104,10 +107,11 @@ for sWord in aSet: self.assertListEqual(phonet.getSimil(sWord), sorted(aSet)) class TestMasFemSingPlur (unittest.TestCase): + "Tests des masculins, féminins, singuliers et pluriels" @classmethod def setUpClass (cls): cls.lPlural = [ ("travail", ["travaux"]), @@ -119,20 +123,21 @@ for sSing, lPlur in self.lPlural: self.assertListEqual(mfsp.getMiscPlural(sSing), lPlur) class TestGrammarChecking (unittest.TestCase): + "Tests du correcteur grammatical" @classmethod def setUpClass (cls): gce.load() cls._zError = re.compile(r"\{\{.*?\}\}") cls._aTestedRules = set() def test_parse (self): zOption = re.compile("^__([a-zA-Z0-9]+)__ ") - spHere, spfThisFile = os.path.split(__file__) + spHere, _ = os.path.split(__file__) with open(os.path.join(spHere, "gc_test.txt"), "r", encoding="utf-8") as hSrc: nError = 0 for sLine in ( s for s in hSrc if not s.startswith("#") and s.strip() ): sLineNum = sLine[:10].strip() sLine = sLine[10:].strip() @@ -214,10 +219,11 @@ from contextlib import contextmanager @contextmanager def timeblock (label, hDst): + "performance counter (contextmanager)" start = time.perf_counter() try: yield finally: end = time.perf_counter() @@ -228,24 +234,25 @@ def perf (sVersion, hDst=None): "performance tests" print("\nPerformance tests") gce.load() - aErrs = gce.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.") + gce.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.") - spHere, spfThisFile = os.path.split(__file__) + spHere, _ = os.path.split(__file__) with open(os.path.join(spHere, "perf.txt"), "r", encoding="utf-8") as hSrc: if hDst: hDst.write("{:<12}{:<20}".format(sVersion, time.strftime("%Y.%m.%d %H:%M"))) for sText in ( s.strip() for s in hSrc if not s.startswith("#") and s.strip() ): with timeblock(sText[:sText.find(".")], hDst): - aErrs = gce.parse(sText) + gce.parse(sText) if hDst: hDst.write("\n") def main(): + "start function" unittest.main() if __name__ == '__main__': main() Index: gc_lang/fr/modules/textformatter.py ================================================================== --- gc_lang/fr/modules/textformatter.py +++ gc_lang/fr/modules/textformatter.py @@ -185,77 +185,81 @@ "ma_1letter_lowercase": [("\\b([ldjnmtscç]) (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])", "\\1’")], "ma_1letter_uppercase": [("\\b([LDJNMTSCÇ]) (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])", "\\1’")] } -lOptRepl = [ - ("ts_units", True), - ("start_of_paragraph", True), - ("end_of_paragraph", True), - ("between_words", True), - ("before_punctuation", True), - ("within_parenthesis", True), - ("within_square_brackets", True), - ("within_quotation_marks", True), - ("nbsp_before_punctuation", True), - ("nbsp_within_quotation_marks", True), - ("nbsp_within_numbers", True), - ("nnbsp_before_punctuation", False), - ("nnbsp_within_quotation_marks", False), - ("nnbsp_within_numbers", False), - ("nbsp_titles", False), - ("nbsp_before_symbol", True), - ("nbsp_before_units", True), - ("nbsp_repair", True), - ("add_space_after_punctuation", True), - ("add_space_around_hyphens", True), - ("add_space_repair", True), - ("erase_non_breaking_hyphens", False), - ("ts_apostrophe", True), - ("ts_ellipsis", True), - ("ts_n_dash_middle", True), - ("ts_m_dash_middle", False), - ("ts_n_dash_start", False), - ("ts_m_dash_start", True), - ("ts_quotation_marks", True), - ("ts_spell", True), - ("ts_ligature_ffi_on", False), - ("ts_ligature_ffl_on", False), - ("ts_ligature_fi_on", False), - ("ts_ligature_fl_on", False), - ("ts_ligature_ff_on", False), - ("ts_ligature_ft_on", False), - ("ts_ligature_st_on", False), - ("ts_ligature_fi_off", False), - ("ts_ligature_fl_off", False), - ("ts_ligature_ff_off", False), - ("ts_ligature_ffi_off", False), - ("ts_ligature_ffl_off", False), - ("ts_ligature_ft_off", False), - ("ts_ligature_st_off", False), - ("ordinals_exponant", False), - ("ordinals_no_exponant", True), - ("etc", True), - ("mh_interrogatives", True), - ("mh_numbers", True), - ("mh_frequent_words", True), - ("ma_word", True), - ("ma_1letter_lowercase", False), - ("ma_1letter_uppercase", False), -] +dDefaultOptions = { + "ts_units": True, + "start_of_paragraph": True, + "end_of_paragraph": True, + "between_words": True, + "before_punctuation": True, + "within_parenthesis": True, + "within_square_brackets": True, + "within_quotation_marks": True, + "nbsp_before_punctuation": True, + "nbsp_within_quotation_marks": True, + "nbsp_within_numbers": True, + "nnbsp_before_punctuation": False, + "nnbsp_within_quotation_marks": False, + "nnbsp_within_numbers": False, + "nbsp_titles": False, + "nbsp_before_symbol": True, + "nbsp_before_units": True, + "nbsp_repair": True, + "add_space_after_punctuation": True, + "add_space_around_hyphens": True, + "add_space_repair": True, + "erase_non_breaking_hyphens": False, + "ts_apostrophe": True, + "ts_ellipsis": True, + "ts_n_dash_middle": True, + "ts_m_dash_middle": False, + "ts_n_dash_start": False, + "ts_m_dash_start": True, + "ts_quotation_marks": True, + "ts_spell": True, + "ts_ligature_ffi_on": False, + "ts_ligature_ffl_on": False, + "ts_ligature_fi_on": False, + "ts_ligature_fl_on": False, + "ts_ligature_ff_on": False, + "ts_ligature_ft_on": False, + "ts_ligature_st_on": False, + "ts_ligature_fi_off": False, + "ts_ligature_fl_off": False, + "ts_ligature_ff_off": False, + "ts_ligature_ffi_off": False, + "ts_ligature_ffl_off": False, + "ts_ligature_ft_off": False, + "ts_ligature_st_off": False, + "ordinals_exponant": False, + "ordinals_no_exponant": True, + "etc": True, + "mh_interrogatives": True, + "mh_numbers": True, + "mh_frequent_words": True, + "ma_word": True, + "ma_1letter_lowercase": False, + "ma_1letter_uppercase": False +} class TextFormatter: "Text Formatter: purge typographic mistakes from text" def __init__ (self): - for sOpt, lTup in dReplTable.items(): + for _, lTup in dReplTable.items(): for i, t in enumerate(lTup): lTup[i] = (re.compile(t[0]), t[1]) def formatText (self, sText): "returns formatted text" - for sOptName, bVal in lOptRepl: + for sOptName, bVal in dDefaultOptions.items(): if bVal: for zRgx, sRep in dReplTable[sOptName]: sText = zRgx.sub(sRep, sText) return sText + + def getDefaultOptions (self): + "returns default options" + return dDefaultOptions.copy() Index: pylintrc ================================================================== --- pylintrc +++ pylintrc @@ -232,11 +232,11 @@ # Naming style matching correct function names function-naming-style=camelCase # Regular expression matching correct function names. Overrides function- # naming-style -function-rgx=^[a-z]\w+|^_* +function-rgx=^[a-z]\w+|^_ # Good variable names which should always be accepted, separated by a comma good-names=i, i1, i2, @@ -243,11 +243,10 @@ j, k, s, s1, s2, - sx, sf, sp, spf, c, c1, @@ -283,11 +282,11 @@ # Naming style matching correct method names method-naming-style=camelCase # Regular expression matching correct method names. Overrides method-naming- # style -#method-rgx= +#method-rgx=^test_ # Naming style matching correct module names module-naming-style=snake_case # Regular expression matching correct module names. Overrides module-naming- @@ -331,11 +330,11 @@ # Maximum number of characters on a single line. max-line-length=200 # Maximum number of lines in a module -max-module-lines=1000 +max-module-lines=5000 # List of optional constructs for which whitespace checking is disabled. `dict- # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. # `trailing-comma` allows a space between comma and closing bracket: (a, ). # `empty-line` allows space-only lines. @@ -490,11 +489,14 @@ # warning. exclude-protected=_asdict, _fields, _replace, _source, - _make + _make, + _getTags, + _hasConjWithTags, + _getConjWithTags # List of valid names for the first argument in a class method. valid-classmethod-first-arg=cls # List of valid names for the first argument in a metaclass class method.