Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -1,7 +1,9 @@ -# Grammalecte -# Grammar checker engine +""" +Grammalecte +Grammar checker engine +""" import re import sys import os import traceback @@ -55,16 +57,16 @@ #### Initialization def load (sContext="Python"): + "initialization of the grammar checker" global _oSpellChecker global _sAppContext global _dOptions global _oTokenizer global _createRegexError - global _createTokenError try: _oSpellChecker = SpellChecker("${lang}", "${dic_main_filename_py}", "${dic_extended_filename_py}", "${dic_community_filename_py}", "${dic_personal_filename_py}") _sAppContext = sContext _dOptions = dict(gc_options.getOptions(sContext)) # duplication necessary, to be able to reset to default _oTokenizer = _oSpellChecker.getTokenizer() @@ -244,14 +246,14 @@ xErr.aFullComment = sMessage # sMessage.split("|")[-1] # in dialog if bShowRuleId: xErr.aShortComment += " # " + sLineId + " # " + sRuleId # URL if sURL: - p = PropertyValue() - p.Name = "FullCommentURL" - p.Value = sURL - xErr.aProperties = (p,) + xProperty = PropertyValue() + xProperty.Name = "FullCommentURL" + xProperty.Value = sURL + xErr.aProperties = (xProperty,) else: xErr.aProperties = () return xErr @@ -311,18 +313,21 @@ sNew = sNew + " " * (nLen-len(sNew)) return sSentence[0:m.start(iGroup)] + sNew + sSentence[m.end(iGroup):] def ignoreRule (sRuleId): + "disable rule " _aIgnoredRules.add(sRuleId) def resetIgnoreRules (): + "clear all ignored rules" _aIgnoredRules.clear() def reactivateRule (sRuleId): + "(re)activate rule " _aIgnoredRules.discard(sRuleId) def listRules (sFilter=None): "generator: returns typle (sOption, sLineId, sRuleId)" @@ -338,50 +343,59 @@ if not sFilter or zFilter.search(sRuleId): yield (sOption, sLineId, sRuleId) def displayRules (sFilter=None): + "display the name of rules, with the filter " echo("List of rules. Filter: << " + str(sFilter) + " >>") for sOption, sLineId, sRuleId in listRules(sFilter): echo("{:<10} {:<10} {}".format(sOption, sLineId, sRuleId)) def setOption (sOpt, bVal): + "set option with if it exists" if sOpt in _dOptions: _dOptions[sOpt] = bVal def setOptions (dOpt): + "update the dictionary of options with " for sKey, bVal in dOpt.items(): if sKey in _dOptions: _dOptions[sKey] = bVal def getOptions (): + "return the dictionary of current options" return _dOptions def getDefaultOptions (): + "return the dictionary of default options" return dict(gc_options.getOptions(_sAppContext)) def getOptionsLabels (sLang): + "return options labels" return gc_options.getUI(sLang) def displayOptions (sLang): + "display the list of grammar checking options" echo("List of options") echo("\n".join( [ k+":\t"+str(v)+"\t"+gc_options.getUI(sLang).get(k, ("?", ""))[0] for k, v in sorted(_dOptions.items()) ] )) echo("") def resetOptions (): + "set options to default values" global _dOptions _dOptions = dict(gc_options.getOptions(_sAppContext)) def getSpellChecker (): + "return the spellchecker object" return _oSpellChecker def _getPath (): return os.path.join(os.path.dirname(sys.modules[__name__].__file__), __name__ + ".py") @@ -389,11 +403,11 @@ #### common functions def option (sOpt): - "return True if option sOpt is active" + "return True if option is active" return _dOptions.get(sOpt, False) def displayInfo (dTokenPos, tWord): "for debugging: retrieve info of word" @@ -415,14 +429,14 @@ if not tWord: return bNoWord lMorph = dTokenPos[tWord[0]]["lMorph"] if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]] else _oSpellChecker.getMorph(tWord[1]) if not lMorph: return False - p = re.compile(sPattern) + zPattern = re.compile(sPattern) if bStrict: - return all(p.search(s) for s in lMorph) - return any(p.search(s) for s in lMorph) + return all(zPattern.search(s) for s in lMorph) + return any(zPattern.search(s) for s in lMorph) def morphex (dTokenPos, tWord, sPattern, sNegPattern, bNoWord=False): "analyse a tuple (position, word), returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)" if not tWord: @@ -429,41 +443,41 @@ return bNoWord lMorph = dTokenPos[tWord[0]]["lMorph"] if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]] else _oSpellChecker.getMorph(tWord[1]) if not lMorph: return False # check negative condition - np = re.compile(sNegPattern) - if any(np.search(s) for s in lMorph): + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(s) for s in lMorph): return False # search sPattern - p = re.compile(sPattern) - return any(p.search(s) for s in lMorph) + zPattern = re.compile(sPattern) + return any(zPattern.search(s) for s in lMorph) def analyse (sWord, sPattern, bStrict=True): "analyse a word, return True if sPattern in morphologies (disambiguation off)" lMorph = _oSpellChecker.getMorph(sWord) if not lMorph: return False - p = re.compile(sPattern) + zPattern = re.compile(sPattern) if bStrict: - return all(p.search(s) for s in lMorph) - return any(p.search(s) for s in lMorph) + return all(zPattern.search(s) for s in lMorph) + return any(zPattern.search(s) for s in lMorph) def analysex (sWord, sPattern, sNegPattern): "analyse a word, returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation off)" lMorph = _oSpellChecker.getMorph(sWord) if not lMorph: return False # check negative condition - np = re.compile(sNegPattern) - if any(np.search(s) for s in lMorph): + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(s) for s in lMorph): return False # search sPattern - p = re.compile(sPattern) - return any(p.search(s) for s in lMorph) + zPattern = re.compile(sPattern) + return any(zPattern.search(s) for s in lMorph) ## functions to get text outside pattern scope @@ -529,10 +543,11 @@ #### Disambiguator def select (dTokenPos, nPos, sWord, sPattern, lDefault=None): + "Disambiguation: select morphologies of matching " if not sWord: return True if nPos not in dTokenPos: print("Error. There should be a token at this position: ", nPos) return True @@ -547,10 +562,11 @@ dTokenPos[nPos]["lMorph"] = lDefault return True def exclude (dTokenPos, nPos, sWord, sPattern, lDefault=None): + "Disambiguation: exclude morphologies of matching " if not sWord: return True if nPos not in dTokenPos: print("Error. There should be a token at this position: ", nPos) return True @@ -565,10 +581,11 @@ dTokenPos[nPos]["lMorph"] = lDefault return True def define (dTokenPos, nPos, lMorph): + "Disambiguation: set morphologies of token at with " if nPos not in dTokenPos: print("Error. There should be a token at this position: ", nPos) return True dTokenPos[nPos]["lMorph"] = lMorph return True @@ -577,10 +594,11 @@ #### TOKEN SENTENCE CHECKER class TokenSentence: + "Text parser" def __init__ (self, sSentence, sSentence0, nOffset): self.sSentence = sSentence self.sSentence0 = sSentence0 self.nOffsetWithinParagraph = nOffset @@ -589,10 +607,11 @@ self.dTags = {} self.dError = {} self.createError = self._createWriterError if _bWriterError else self._createDictError def update (self, sSentence): + "update and retokenize" self.sSentence = sSentence self.lToken = list(_oTokenizer.genTokens(sSentence, True)) def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False): "generator: return nodes where “values” match arcs" @@ -686,10 +705,11 @@ print(" MATCH: *" + sMeta) yield dGraph[dNode[""][sMeta]] def parse (self, dGraph, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False): + "parse tokens from the text and execute actions encountered" self.dError = {} dPriority = {} # Key = position; value = priority dOpt = _dOptions if not dOptions else dOptions lPointer = [] bTagAndRewrite = False @@ -817,14 +837,14 @@ xErr.aFullComment = sMessage # sMessage.split("|")[-1] # in dialog if bShowRuleId: xErr.aShortComment += " " + sLineId + " # " + sRuleId # URL if sURL: - p = PropertyValue() - p.Name = "FullCommentURL" - p.Value = sURL - xErr.aProperties = (p,) + xProperty = PropertyValue() + xProperty.Name = "FullCommentURL" + xProperty.Value = sURL + xErr.aProperties = (xProperty,) else: xErr.aProperties = () return xErr def _createDictError (self, sSugg, nTokenOffset, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext): @@ -933,11 +953,11 @@ bKeepToken = False if "nMergeUntil" in dToken: if dToken["i"] > nMergeUntil: # this token is not already merged with a previous token dTokenMerger = dToken if dToken["nMergeUntil"] > nMergeUntil: - nMergeUntil = dToken["nMergeUntil"] + nMergeUntil = dToken["nMergeUntil"] del dToken["nMergeUntil"] elif "bToRemove" in dToken: # remove useless token self.sSentence = self.sSentence[:dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[dToken["nEnd"]:] if bDebug: DELETED gc_core/py/lang_core/gc_sentence.py Index: gc_core/py/lang_core/gc_sentence.py ================================================================== --- gc_core/py/lang_core/gc_sentence.py +++ /dev/null @@ -1,237 +0,0 @@ -# Sentence checker - -from ..graphspell.tokenizer import Tokenizer -from .gc_rules_graph import dGraph - - -oTokenizer = Tokenizer("${lang}") - - -class TokenSentence: - - def __init__ (self, sSentence, sSentence0, nOffset): - self.sSentence = sSentence - self.sSentence0 = sSentence0 - self.nOffset = nOffset - self.lToken = list(oTokenizer.genTokens()) - - def parse (self): - dErr = {} - lPointer = [] - for dToken in self.lToken: - for i, dPointer in enumerate(lPointer): - bValid = False - for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]): - dPointer["nOffset"] = dToken["i"] - dPointer["dNode"] = dNode - bValid = True - if not bValid: - del lPointer[i] - for dNode in self._getNextMatchingNodes(dToken, dGraph): - lPointer.append({"nOffset": 0, "dNode": dNode}) - for dPointer in lPointer: - if "" in dPointer["dNode"]: - for dNode in dGraph[dPointer["dNode"][""]]: - dErr = self._executeActions(dNode, nOffset) - return dErr - - def _getNextMatchingNodes (self, dToken, dNode): - # token value - if dToken["sValue"] in dNode: - yield dGraph[dNode[dToken["sValue"]]] - # token lemmas - for sLemma in dToken["lLemma"]: - if sLemma in dNode: - yield dGraph[dNode[sLemma]] - # universal arc - if "*" in dNode: - yield dGraph[dNode["*"]] - # regex arcs - if "~" in dNode: - for sRegex in dNode["~"]: - for sMorph in dToken["lMorph"]: - if re.search(sRegex, sMorph): - yield dGraph[dNode["~"][sRegex]] - - def _executeActions (self, dNode, nOffset): - for sLineId, nextNodeKey in dNode.items(): - for sArc in dGraph[nextNodeKey]: - bCondMemo = None - sFuncCond, cActionType, sWhat, *eAct = dRule[sArc] - # action in lActions: [ condition, action type, replacement/suggestion/action[, iGroupStart, iGroupEnd[, message, URL]] ] - try: - bCondMemo = not sFuncCond or globals()[sFuncCond](self, sCountry, bCondMemo) - if bCondMemo: - if cActionType == "-": - # grammar error - nErrorStart = nSentenceOffset + m.start(eAct[0]) - nErrorEnd = nSentenceOffset + m.start(eAct[1]) - if nErrorStart not in dErrs or nPriority > dPriority[nErrorStart]: - dErrs[nErrorStart] = _createError(self, sWhat, nErrorStart, nErrorEnd, sLineId, bUppercase, eAct[2], eAct[3], bIdRule, sOption, bContext) - dPriority[nErrorStart] = nPriority - elif cActionType == "~": - # text processor - self._rewrite(sWhat, nErrorStart, nErrorEnd) - elif cActionType == "@": - # jump - self._jump(sWhat) - elif cActionType == "=": - # disambiguation - globals()[sWhat](self.lToken) - elif cActionType == ">": - # we do nothing, this test is just a condition to apply all following actions - pass - else: - print("# error: unknown action at " + sLineId) - elif cActionType == ">": - break - except Exception as e: - raise Exception(str(e), "# " + sLineId + " # " + sRuleId) - - def _createWriterError (self): - d = {} - return d - - def _createDictError (self): - d = {} - return d - - def _rewrite (self, sWhat, nErrorStart, nErrorEnd): - "text processor: rewrite tokens between and position" - lTokenValue = sWhat.split("|") - if len(lTokenValue) != (nErrorEnd - nErrorStart + 1): - print("Error. Text processor: number of replacements != number of tokens.") - return - for i, sValue in zip(range(nErrorStart, nErrorEnd+1), lTokenValue): - self.lToken[i]["sValue"] = sValue - - def _jump (self, sWhat): - try: - nFrom, nTo = sWhat.split(">") - self.lToken[int(nFrom)]["iJump"] = int(nTo) - except: - print("# Error. Jump failed: ", sWhat) - traceback.print_exc() - return - - -#### Analyse tokens - -def g_morph (dToken, sPattern, bStrict=True): - "analyse a token, return True if in morphologies" - if "lMorph" in dToken: - lMorph = dToken["lMorph"] - else: - if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]): - return False - if not _dAnalyses[dToken["sValue"]]: - return False - lMorph = _dAnalyses[dToken["sValue"]] - zPattern = re.compile(sPattern) - if bStrict: - return all(zPattern.search(sMorph) for sMorph in lMorph) - return any(zPattern.search(sMorph) for sMorph in lMorph) - -def g_morphex (dToken, sPattern, sNegPattern): - "analyse a token, return True if not in morphologies and in morphologies" - if "lMorph" in dToken: - lMorph = dToken["lMorph"] - else: - if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]): - return False - if not _dAnalyses[dToken["sValue"]]: - return False - lMorph = _dAnalyses[dToken["sValue"]] - # check negative condition - zNegPattern = re.compile(sNegPattern) - if any(zNegPattern.search(sMorph) for sMorph in lMorph): - return False - # search sPattern - zPattern = re.compile(sPattern) - return any(zPattern.search(sMorph) for sMorph in lMorph) - -def g_analyse (dToken, sPattern, bStrict=True): - "analyse a token, return True if in morphologies (disambiguation off)" - if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]): - return False - if not _dAnalyses[dToken["sValue"]]: - return False - zPattern = re.compile(sPattern) - if bStrict: - return all(zPattern.search(sMorph) for sMorph in _dAnalyses[dToken["sValue"]]) - return any(zPattern.search(sMorph) for sMorph in _dAnalyses[dToken["sValue"]]) - - -def g_analysex (dToken, sPattern, sNegPattern): - "analyse a token, return True if not in morphologies and in morphologies (disambiguation off)" - if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]): - return False - if not _dAnalyses[dToken["sValue"]]: - return False - # check negative condition - zNegPattern = re.compile(sNegPattern) - if any(zNegPattern.search(sMorph) for sMorph in _dAnalyses[dToken["sValue"]]): - return False - # search sPattern - zPattern = re.compile(sPattern) - return any(zPattern.search(sMorph) for sMorph in _dAnalyses[dToken["sValue"]]) - - -#### Go outside the rule scope - -def g_nextToken (i): - pass - -def g_prevToken (i): - pass - -def g_look (): - pass - -def g_lookAndCheck (): - pass - - -#### Disambiguator - -def g_select (dToken, sPattern, lDefault=None): - "select morphologies for according to , always return True" - if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]): - return True - if len(_dAnalyses[dToken["sValue"]]) == 1: - return True - lMorph = dToken["lMorph"] or _dAnalyses[dToken["sValue"]] - lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ] - if lSelect: - if len(lSelect) != len(lMorph): - dToken["lMorph"] = lSelect - elif lDefault: - dToken["lMorph"] = lDefault - return True - - -def g_exclude (dToken, sPattern, lDefault=None): - "select morphologies for according to , always return True" - if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]): - return True - if len(_dAnalyses[dToken["sValue"]]) == 1: - return True - lMorph = dToken["lMorph"] or _dAnalyses[dToken["sValue"]] - lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ] - if lSelect: - if len(lSelect) != len(lMorph): - dToken["lMorph"] = lSelect - elif lDefault: - dToken["lMorph"] = lDefault - return True - - -def g_define (dToken, lMorph): - "set morphologies of , always return True" - dToken["lMorph"] = lMorph - return True - - -#### CALLABLES (generated code) - -${graph_callables} Index: gc_lang/fr/modules/conj.py ================================================================== --- gc_lang/fr/modules/conj.py +++ gc_lang/fr/modules/conj.py @@ -1,6 +1,9 @@ -# Grammalecte - Conjugueur +""" +Grammalecte - Conjugueur +""" + # License: GPL 3 import re import traceback @@ -27,10 +30,11 @@ _dTenseIdx = { ":PQ": 0, ":Ip": 1, ":Iq": 2, ":Is": 3, ":If": 4, ":K": 5, ":Sp": 6, ":Sq": 7, ":E": 8 } def isVerb (sVerb): + "return True if it’s a existing verb" return sVerb in _dVerb def getConj (sVerb, sTense, sWho): "returns conjugation (can be an empty string)" @@ -54,10 +58,11 @@ return None return _lVtyp[_dVerb[sVerb][0]] def getSimil (sWord, sMorph, bSubst=False): + "returns a set of verbal forms similar to , according to " if ":V" not in sMorph: return set() sInfi = sMorph[1:sMorph.find("/")] aSugg = set() tTags = _getTags(sInfi) @@ -98,10 +103,11 @@ aSugg.clear() return aSugg def getConjSimilInfiV1 (sInfi): + "returns verbal forms phonetically similar to infinitive form (for verb in group 1)" if sInfi not in _dVerb: return set() aSugg = set() tTags = _getTags(sInfi) if tTags: @@ -140,16 +146,18 @@ return "" if sSfx == "0": return sWord try: return sWord[:-(ord(sSfx[0])-48)] + sSfx[1:] if sSfx[0] != '0' else sWord + sSfx[1:] # 48 is the ASCII code for "0" - except: + except (IndexError, TypeError): return "## erreur, code : " + str(sSfx) + " ##" - + class Verb (): + "Verb and its conjugation" + def __init__ (self, sVerb, sVerbPattern=""): # conjugate a unknown verb with rules from sVerbPattern if not isinstance(sVerb, str): raise TypeError("sVerb should be a string") if not sVerb: Index: gc_lang/fr/modules/conj_generator.py ================================================================== --- gc_lang/fr/modules/conj_generator.py +++ gc_lang/fr/modules/conj_generator.py @@ -1,22 +1,26 @@ -# Conjugation generator -# beta stage, unfinished, the root for a new way to generate flexions… +""" +Conjugation generator +beta stage, unfinished, the root for a new way to generate flexions… +""" import re def conjugate (sVerb, sVerbTag="i_____a", bVarPpas=True): + "conjugate and returns a list of tuples (conjugation form, tags)" lConj = [] cGroup = getVerbGroupChar(sVerb) for nCut, sAdd, sFlexTags, sPattern in getConjRules(sVerb, bVarPpas): if not sPattern or re.search(sPattern, sVerb): sFlexion = sVerb[0:-nCut] + sAdd if nCut else sVerb + sAdd lConj.append((sFlexion, ":V" + cGroup + "_" + sVerbTag + sFlexTags)) return lConj -def getVerbGroupChar (sVerb, ): +def getVerbGroupChar (sVerb): + "returns the group number of guessing on its ending" sVerb = sVerb.lower() if sVerb.endswith("er"): return "1" if sVerb.endswith("ir"): return "2" @@ -26,10 +30,11 @@ return "3" return "4" def getConjRules (sVerb, bVarPpas=True, nGroup=2): + "returns a list of lists to conjugate a verb, guessing on its ending" if sVerb.endswith("er"): # premier groupe, conjugaison en fonction de la terminaison du lemme # 5 lettres if sVerb[-5:] in oConj["V1"]: lConj = list(oConj["V1"][sVerb[-5:]]) @@ -115,11 +120,11 @@ [2, "ît", ":Sq:3s/*", False], [2, "is", ":E:2s/*", False], [2, "issons", ":E:1p/*", False], [2, "issez", ":E:2p/*", False] ], - + # premier groupe (bien plus irrégulier que prétendu) "V1": { # a # verbes en -er, -ger, -yer, -cer "er": [ Index: gc_lang/fr/modules/cregex.py ================================================================== --- gc_lang/fr/modules/cregex.py +++ gc_lang/fr/modules/cregex.py @@ -1,11 +1,13 @@ -# Grammalecte - Compiled regular expressions +""" +Grammalecte - Compiled regular expressions +""" import re #### Lemme -Lemma = re.compile("^>(\w[\w-]*)") +Lemma = re.compile(r"^>(\w[\w-]*)") #### Analyses Gender = re.compile(":[mfe]") Number = re.compile(":[spi]") @@ -78,13 +80,15 @@ #### FONCTIONS def getLemmaOfMorph (s): + "return lemma in morphology " return Lemma.search(s).group(1) def checkAgreement (l1, l2): + "returns True if agreement in gender and number is possible between morphologies and " # check number agreement if not mbInv(l1) and not mbInv(l2): if mbSg(l1) and not mbSg(l2): return False if mbPl(l1) and not mbPl(l2): @@ -115,11 +119,11 @@ def getNumber (lMorph): "returns number of word (':s', ':p', ':i' or empty string)." sNumber = "" for sMorph in lMorph: - m = Number.search(sWord) + m = Number.search(sMorph) if m: if not sNumber: sNumber = m.group(0) elif sNumber != m.group(0): return ":i" @@ -129,98 +133,126 @@ # mbWhat (lMorph) returns True if lMorph contains What at least once ## isXXX = it’s certain def isNom (lMorph): + "returns True if all morphologies are “nom”" return all(":N" in s for s in lMorph) def isNomNotAdj (lMorph): + "returns True if all morphologies are “nom”, but not “adjectif”" return all(NnotA.search(s) for s in lMorph) def isAdj (lMorph): + "returns True if all morphologies are “adjectif”" return all(":A" in s for s in lMorph) def isNomAdj (lMorph): + "returns True if all morphologies are “nom” or “adjectif”" return all(NA.search(s) for s in lMorph) def isNomVconj (lMorph): + "returns True if all morphologies are “nom” or “verbe conjugué”" return all(NVconj.search(s) for s in lMorph) def isInv (lMorph): + "returns True if all morphologies are “invariable”" return all(":i" in s for s in lMorph) def isSg (lMorph): + "returns True if all morphologies are “singulier”" return all(":s" in s for s in lMorph) def isPl (lMorph): + "returns True if all morphologies are “pluriel”" return all(":p" in s for s in lMorph) def isEpi (lMorph): + "returns True if all morphologies are “épicène”" return all(":e" in s for s in lMorph) def isMas (lMorph): + "returns True if all morphologies are “masculin”" return all(":m" in s for s in lMorph) def isFem (lMorph): + "returns True if all morphologies are “féminin”" return all(":f" in s for s in lMorph) ## mbXXX = MAYBE XXX def mbNom (lMorph): + "returns True if one morphology is “nom”" return any(":N" in s for s in lMorph) def mbAdj (lMorph): + "returns True if one morphology is “adjectif”" return any(":A" in s for s in lMorph) def mbAdjNb (lMorph): + "returns True if one morphology is “adjectif” or “nombre”" return any(AD.search(s) for s in lMorph) def mbNomAdj (lMorph): + "returns True if one morphology is “nom” or “adjectif”" return any(NA.search(s) for s in lMorph) def mbNomNotAdj (lMorph): + "returns True if one morphology is “nom”, but not “adjectif”" b = False for s in lMorph: if ":A" in s: return False if ":N" in s: b = True return b def mbPpasNomNotAdj (lMorph): + "returns True if one morphology is “nom” or “participe passé”, but not “adjectif”" return any(PNnotA.search(s) for s in lMorph) def mbVconj (lMorph): + "returns True if one morphology is “nom” or “verbe conjugué”" return any(Vconj.search(s) for s in lMorph) def mbVconj123 (lMorph): + "returns True if one morphology is “nom” or “verbe conjugué” (but not “avoir” or “être”)" return any(Vconj123.search(s) for s in lMorph) def mbMG (lMorph): + "returns True if one morphology is “mot grammatical”" return any(":G" in s for s in lMorph) def mbInv (lMorph): + "returns True if one morphology is “invariable”" return any(":i" in s for s in lMorph) def mbSg (lMorph): + "returns True if one morphology is “singulier”" return any(":s" in s for s in lMorph) def mbPl (lMorph): + "returns True if one morphology is “pluriel”" return any(":p" in s for s in lMorph) def mbEpi (lMorph): + "returns True if one morphology is “épicène”" return any(":e" in s for s in lMorph) def mbMas (lMorph): + "returns True if one morphology is “masculin”" return any(":m" in s for s in lMorph) def mbFem (lMorph): + "returns True if one morphology is “féminin”" return any(":f" in s for s in lMorph) def mbNpr (lMorph): + "returns True if one morphology is “nom propre” or “titre de civilité”" return any(NP.search(s) for s in lMorph) def mbNprMasNotFem (lMorph): + "returns True if one morphology is “nom propre masculin” but not “féminin”" if any(NPf.search(s) for s in lMorph): return False return any(NPm.search(s) for s in lMorph) Index: gc_lang/fr/modules/gce_analyseur.py ================================================================== --- gc_lang/fr/modules/gce_analyseur.py +++ gc_lang/fr/modules/gce_analyseur.py @@ -2,11 +2,11 @@ from . import cregex as cr def rewriteSubject (s1, s2): - # s1 is supposed to be prn/patr/npr (M[12P]) + "rewrite complex subject: a prn/patr/npr (M[12P]) followed by “et” and " if s2 == "lui": return "ils" if s2 == "moi": return "nous" if s2 == "toi": @@ -15,11 +15,11 @@ return "nous" if s2 == "vous": return "vous" if s2 == "eux": return "ils" - if s2 == "elle" or s2 == "elles": + if s2 == "elle" or s2 == "elles": if cr.mbNprMasNotFem(_oSpellChecker.getMorph(s1)): return "ils" # si épicène, indéterminable, mais OSEF, le féminin l’emporte return "elles" return s1 + " et " + s2 @@ -39,11 +39,11 @@ return True return False def isAmbiguousAndWrong (sWord1, sWord2, sReqMorphNA, sReqMorphConj): - "use it if sWord1 won’t be a verb; word2 is assumed to be True via isAmbiguousNAV" + "use it if won’t be a verb; is assumed to be True via isAmbiguousNAV" a2 = _oSpellChecker.getMorph(sWord2) if not a2: return False if cr.checkConjVerb(a2, sReqMorphConj): # verb word2 is ok @@ -55,11 +55,11 @@ return False return True def isVeryAmbiguousAndWrong (sWord1, sWord2, sReqMorphNA, sReqMorphConj, bLastHopeCond): - "use it if sWord1 can be also a verb; word2 is assumed to be True via isAmbiguousNAV" + "use it if can be also a verb; is assumed to be True via isAmbiguousNAV" a2 = _oSpellChecker.getMorph(sWord2) if not a2: return False if cr.checkConjVerb(a2, sReqMorphConj): # verb word2 is ok @@ -77,10 +77,11 @@ return True return False def checkAgreement (sWord1, sWord2): + "check agreement between and " a2 = _oSpellChecker.getMorph(sWord2) if not a2: return True a1 = _oSpellChecker.getMorph(sWord1) if not a1: @@ -90,10 +91,11 @@ _zUnitSpecial = re.compile("[µ/⁰¹²³⁴⁵⁶⁷⁸⁹Ωℓ·]") _zUnitNumbers = re.compile("[0-9]") def mbUnit (s): + "returns True it can be a measurement unit" if _zUnitSpecial.search(s): return True if 1 < len(s) < 16 and s[0:1].islower() and (not s[1:].islower() or _zUnitNumbers.search(s)): return True return False @@ -104,10 +106,11 @@ _zEndOfNG1 = re.compile(" *$| +(?:, +|)(?:n(?:’|e |o(?:u?s|tre) )|l(?:’|e(?:urs?|s|) |a )|j(?:’|e )|m(?:’|es? |a |on )|t(?:’|es? |a |u )|s(?:’|es? |a )|c(?:’|e(?:t|tte|s|) )|ç(?:a |’)|ils? |vo(?:u?s|tre) )") _zEndOfNG2 = re.compile(r" +(\w[\w-]+)") _zEndOfNG3 = re.compile(r" *, +(\w[\w-]+)") def isEndOfNG (dDA, s, iOffset): + "returns True if next word doesn’t belong to a noun group" if _zEndOfNG1.match(s): return True m = _zEndOfNG2.match(s) if m and morphex(dDA, (iOffset+m.start(1), m.group(1)), ":[VR]", ":[NAQP]"): return True @@ -120,10 +123,11 @@ _zNextIsNotCOD1 = re.compile(" *,") _zNextIsNotCOD2 = re.compile(" +(?:[mtsnj](e +|’)|[nv]ous |tu |ils? |elles? )") _zNextIsNotCOD3 = re.compile(r" +([a-zéèî][\w-]+)") def isNextNotCOD (dDA, s, iOffset): + "returns True if next word is not a COD" if _zNextIsNotCOD1.match(s) or _zNextIsNotCOD2.match(s): return True m = _zNextIsNotCOD3.match(s) if m and morphex(dDA, (iOffset+m.start(1), m.group(1)), ":[123][sp]", ":[DM]"): return True @@ -132,10 +136,11 @@ _zNextIsVerb1 = re.compile(" +[nmts](?:e |’)") _zNextIsVerb2 = re.compile(r" +(\w[\w-]+)") def isNextVerb (dDA, s, iOffset): + "returns True if next word is a verb" if _zNextIsVerb1.match(s): return True m = _zNextIsVerb2.match(s) if m and morph(dDA, (iOffset+m.start(1), m.group(1)), ":[123][sp]", False): return True @@ -145,6 +150,6 @@ #### Exceptions aREGULARPLURAL = frozenset(["abricot", "amarante", "aubergine", "acajou", "anthracite", "brique", "caca", "café", \ "carotte", "cerise", "chataigne", "corail", "citron", "crème", "grave", "groseille", \ "jonquille", "marron", "olive", "pervenche", "prune", "sable"]) -aSHOULDBEVERB = frozenset(["aller", "manger"]) +aSHOULDBEVERB = frozenset(["aller", "manger"]) Index: gc_lang/fr/modules/gce_suggestions.py ================================================================== --- gc_lang/fr/modules/gce_suggestions.py +++ gc_lang/fr/modules/gce_suggestions.py @@ -6,10 +6,11 @@ ## Verbs def suggVerb (sFlex, sWho, funcSugg2=None): + "change conjugation according to " aSugg = set() for sStem in _oSpellChecker.getLemma(sFlex): tTags = conj._getTags(sStem) if tTags: # we get the tense @@ -38,43 +39,44 @@ if aSugg: return "|".join(aSugg) return "" -def suggVerbPpas (sFlex, sWhat=None): +def suggVerbPpas (sFlex, sPattern=None): + "suggest past participles for " aSugg = set() for sStem in _oSpellChecker.getLemma(sFlex): tTags = conj._getTags(sStem) if tTags: - if not sWhat: + if not sPattern: aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1")) aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q2")) aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q3")) aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q4")) aSugg.discard("") - elif sWhat == ":m:s": + elif sPattern == ":m:s": aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1")) - elif sWhat == ":m:p": + elif sPattern == ":m:p": if conj._hasConjWithTags(tTags, ":PQ", ":Q2"): aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q2")) else: aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1")) - elif sWhat == ":f:s": + elif sPattern == ":f:s": if conj._hasConjWithTags(tTags, ":PQ", ":Q3"): aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q3")) else: aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1")) - elif sWhat == ":f:p": + elif sPattern == ":f:p": if conj._hasConjWithTags(tTags, ":PQ", ":Q4"): aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q4")) else: aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1")) - elif sWhat == ":s": + elif sPattern == ":s": aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1")) aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q3")) aSugg.discard("") - elif sWhat == ":p": + elif sPattern == ":p": aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q2")) aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q4")) aSugg.discard("") else: aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1")) @@ -82,10 +84,11 @@ return "|".join(aSugg) return "" def suggVerbTense (sFlex, sTense, sWho): + "change to a verb according to and " aSugg = set() for sStem in _oSpellChecker.getLemma(sFlex): if conj.hasConj(sStem, sTense, sWho): aSugg.add(conj.getConj(sStem, sTense, sWho)) if aSugg: @@ -92,10 +95,11 @@ return "|".join(aSugg) return "" def suggVerbImpe (sFlex): + "change to a verb at imperative form" aSugg = set() for sStem in _oSpellChecker.getLemma(sFlex): tTags = conj._getTags(sStem) if tTags: if conj._hasConjWithTags(tTags, ":E", ":2s"): @@ -108,19 +112,21 @@ return "|".join(aSugg) return "" def suggVerbInfi (sFlex): + "returns infinitive forms of " return "|".join([ sStem for sStem in _oSpellChecker.getLemma(sFlex) if conj.isVerb(sStem) ]) _dQuiEst = { "je": ":1s", "j’": ":1s", "j’en": ":1s", "j’y": ":1s", \ "tu": ":2s", "il": ":3s", "on": ":3s", "elle": ":3s", "nous": ":1p", "vous": ":2p", "ils": ":3p", "elles": ":3p" } _lIndicatif = [":Ip", ":Iq", ":Is", ":If"] _lSubjonctif = [":Sp", ":Sq"] def suggVerbMode (sFlex, cMode, sSuj): + "returns other conjugations of acconding to and " if cMode == ":I": lMode = _lIndicatif elif cMode == ":S": lMode = _lSubjonctif elif cMode.startswith((":I", ":S")): @@ -298,19 +304,21 @@ return "|".join(aSugg) return "" def hasFemForm (sFlex): + "return True if there is a feminine form of " for sStem in _oSpellChecker.getLemma(sFlex): if mfsp.isFemForm(sStem) or conj.hasConj(sStem, ":PQ", ":Q3"): return True if phonet.hasSimil(sFlex, ":f"): return True return False def hasMasForm (sFlex): + "return True if there is a masculine form of " for sStem in _oSpellChecker.getLemma(sFlex): if mfsp.isFemForm(sStem) or conj.hasConj(sStem, ":PQ", ":Q1"): # what has a feminine form also has a masculine form return True if phonet.hasSimil(sFlex, ":m"): @@ -317,10 +325,11 @@ return True return False def switchGender (sFlex, bPlur=None): + "return feminine or masculine form(s) of " aSugg = set() if bPlur == None: for sMorph in _oSpellChecker.getMorph(sFlex): if ":f" in sMorph: if ":s" in sMorph: @@ -351,10 +360,11 @@ return "|".join(aSugg) return "" def switchPlural (sFlex): + "return plural or singular form(s) of " aSugg = set() for sMorph in _oSpellChecker.getMorph(sFlex): if ":s" in sMorph: aSugg.add(suggPlur(sFlex)) elif ":p" in sMorph: @@ -363,10 +373,11 @@ return "|".join(aSugg) return "" def hasSimil (sWord, sPattern=None): + "return True if there is words phonetically similar to (according to if required)" return phonet.hasSimil(sWord, sPattern) def suggSimil (sWord, sPattern=None, bSubst=False): "return list of words phonetically similar to sWord and whom POS is matching sPattern" @@ -378,26 +389,29 @@ return "|".join(aSugg) return "" def suggCeOrCet (sWord): + "suggest “ce” or “cet” or both according to the first letter of " if re.match("(?i)[aeéèêiouyâîï]", sWord): return "cet" if sWord[0:1] == "h" or sWord[0:1] == "H": return "ce|cet" return "ce" def suggLesLa (sWord): + "suggest “les” or “la” according to " if any( ":p" in sMorph for sMorph in _oSpellChecker.getMorph(sWord) ): return "les|la" return "la" _zBinary = re.compile("^[01]+$") def formatNumber (s): + "add spaces or hyphens to big numbers" nLen = len(s) if nLen < 4: return s sRes = "" # nombre ordinaire @@ -428,10 +442,11 @@ sRes += "|" + s[0:2] + " " + s[2:5] + " " + s[5:7] + " " + s[7:9] # fixe belge 2 return sRes def formatNF (s): + "typography: format NF reference (norme française)" try: m = re.match("NF[  -]?(C|E|P|Q|S|X|Z|EN(?:[  -]ISO|))[  -]?([0-9]+(?:[/‑-][0-9]+|))", s) if not m: return "" return "NF " + m.group(1).upper().replace(" ", " ").replace("-", " ") + " " + m.group(2).replace("/", "‑").replace("-", "‑") @@ -439,10 +454,11 @@ traceback.print_exc() return "# erreur #" def undoLigature (c): + "typography: split ligature character in several chars" if c == "fi": return "fi" elif c == "fl": return "fl" elif c == "ff": @@ -463,10 +479,11 @@ _xNormalizedCharsForInclusiveWriting = str.maketrans({ '(': '_', ')': '_', '.': '_', '·': '_', '–': '_', '—': '_', '/': '_' - }) +}) def normalizeInclusiveWriting (sToken): + "typography: replace word separators used in inclusive writing by underscore (_)" return sToken.translate(_xNormalizedCharsForInclusiveWriting) Index: gc_lang/fr/modules/lexicographe.py ================================================================== --- gc_lang/fr/modules/lexicographe.py +++ gc_lang/fr/modules/lexicographe.py @@ -1,14 +1,17 @@ -# Grammalecte - Lexicographe +""" +Grammalecte - Lexicographe +""" + # License: MPL 2 import re import traceback -_dTAGS = { +_dTAGS = { ':N': (" nom,", "Nom"), ':A': (" adjectif,", "Adjectif"), ':M1': (" prénom,", "Prénom"), ':M2': (" patronyme,", "Patronyme, matronyme, nom de famille…"), ':MP': (" nom propre,", "Nom propre"), @@ -78,11 +81,11 @@ ':C': (" conjonction,", "Conjonction"), ':Ĉ': (" conjonction (él.),", "Conjonction (élément)"), ':Cc': (" conjonction de coordination,", "Conjonction de coordination"), ':Cs': (" conjonction de subordination,", "Conjonction de subordination"), ':Ĉs': (" conjonction de subordination (él.),", "Conjonction de subordination (élément)"), - + ':Ñ': (" locution nominale (él.),", "Locution nominale (élément)"), ':Â': (" locution adjectivale (él.),", "Locution adjectivale (élément)"), ':Ṽ': (" locution verbale (él.),", "Locution verbale (élément)"), ':Ŵ': (" locution adverbiale (él.),", "Locution adverbiale (élément)"), ':Ŕ': (" locution prépositive (él.),", "Locution prépositive (élément)"), @@ -125,18 +128,18 @@ 'elle': " pronom personnel sujet, 3ᵉ pers. fém. sing.", 'nous': " pronom personnel sujet/objet, 1ʳᵉ pers. plur.", 'vous': " pronom personnel sujet/objet, 2ᵉ pers. plur.", 'ils': " pronom personnel sujet, 3ᵉ pers. masc. plur.", 'elles': " pronom personnel sujet, 3ᵉ pers. masc. plur.", - + "là": " particule démonstrative", "ci": " particule démonstrative", - + 'le': " COD, masc. sing.", 'la': " COD, fém. sing.", 'les': " COD, plur.", - + 'moi': " COI (à moi), sing.", 'toi': " COI (à toi), sing.", 'lui': " COI (à lui ou à elle), sing.", 'nous2': " COI (à nous), plur.", 'vous2': " COI (à vous), plur.", Index: gc_lang/fr/modules/mfsp.py ================================================================== --- gc_lang/fr/modules/mfsp.py +++ gc_lang/fr/modules/mfsp.py @@ -1,6 +1,8 @@ -# Masculins, féminins, singuliers et pluriels +""" +Masculins, féminins, singuliers et pluriels +""" from .mfsp_data import lTagMiscPlur as _lTagMiscPlur from .mfsp_data import lTagMasForm as _lTagMasForm from .mfsp_data import dMiscPlur as _dMiscPlur from .mfsp_data import dMasForm as _dMasForm Index: gc_lang/fr/modules/phonet.py ================================================================== --- gc_lang/fr/modules/phonet.py +++ gc_lang/fr/modules/phonet.py @@ -1,6 +1,9 @@ -# Grammalecte - Suggestion phonétique +""" +Grammalecte - Suggestion phonétique +""" + # License: GPL 3 import re from .phonet_data import dWord as _dWord Index: gc_lang/fr/modules/textformatter.py ================================================================== --- gc_lang/fr/modules/textformatter.py +++ gc_lang/fr/modules/textformatter.py @@ -1,6 +1,10 @@ #!python3 + +""" +Text formatter +""" import re dReplTable = { @@ -65,11 +69,11 @@ "ts_apostrophe": [ ("(?i)\\b([ldnjmtscç])['´‘′`](?=\\w)", "\\1’"), ("(?i)(qu|jusqu|lorsqu|puisqu|quoiqu|quelqu|presqu|entr|aujourd|prud)['´‘′`]", "\\1’") ], "ts_ellipsis": [ ("\\.\\.\\.", "…"), ("(?<=…)[.][.]", "…"), ("…[.](?![.])", "…") ], - "ts_n_dash_middle": [ (" [-—] ", " – "), + "ts_n_dash_middle": [ (" [-—] ", " – "), (" [-—],", " –,") ], "ts_m_dash_middle": [ (" [-–] ", " — "), (" [-–],", " —,") ], "ts_n_dash_start": [ ("^[-—][  ]", "– "), ("^– ", "– "),