@@ -1,7 +1,9 @@ -# Grammalecte -# Grammar checker engine +""" +Grammalecte +Grammar checker engine +""" import re import sys import os import traceback @@ -55,16 +57,16 @@ #### Initialization def load (sContext="Python"): + "initialization of the grammar checker" global _oSpellChecker global _sAppContext global _dOptions global _oTokenizer global _createRegexError - global _createTokenError try: _oSpellChecker = SpellChecker("${lang}", "${dic_main_filename_py}", "${dic_extended_filename_py}", "${dic_community_filename_py}", "${dic_personal_filename_py}") _sAppContext = sContext _dOptions = dict(gc_options.getOptions(sContext)) # duplication necessary, to be able to reset to default _oTokenizer = _oSpellChecker.getTokenizer() @@ -244,14 +246,14 @@ xErr.aFullComment = sMessage # sMessage.split("|")[-1] # in dialog if bShowRuleId: xErr.aShortComment += " # " + sLineId + " # " + sRuleId # URL if sURL: - p = PropertyValue() - p.Name = "FullCommentURL" - p.Value = sURL - xErr.aProperties = (p,) + xProperty = PropertyValue() + xProperty.Name = "FullCommentURL" + xProperty.Value = sURL + xErr.aProperties = (xProperty,) else: xErr.aProperties = () return xErr @@ -311,18 +313,21 @@ sNew = sNew + " " * (nLen-len(sNew)) return sSentence[0:m.start(iGroup)] + sNew + sSentence[m.end(iGroup):] def ignoreRule (sRuleId): + "disable rule " _aIgnoredRules.add(sRuleId) def resetIgnoreRules (): + "clear all ignored rules" _aIgnoredRules.clear() def reactivateRule (sRuleId): + "(re)activate rule " _aIgnoredRules.discard(sRuleId) def listRules (sFilter=None): "generator: returns typle (sOption, sLineId, sRuleId)" @@ -338,50 +343,59 @@ if not sFilter or zFilter.search(sRuleId): yield (sOption, sLineId, sRuleId) def displayRules (sFilter=None): + "display the name of rules, with the filter " echo("List of rules. Filter: << " + str(sFilter) + " >>") for sOption, sLineId, sRuleId in listRules(sFilter): echo("{:<10} {:<10} {}".format(sOption, sLineId, sRuleId)) def setOption (sOpt, bVal): + "set option with if it exists" if sOpt in _dOptions: _dOptions[sOpt] = bVal def setOptions (dOpt): + "update the dictionary of options with " for sKey, bVal in dOpt.items(): if sKey in _dOptions: _dOptions[sKey] = bVal def getOptions (): + "return the dictionary of current options" return _dOptions def getDefaultOptions (): + "return the dictionary of default options" return dict(gc_options.getOptions(_sAppContext)) def getOptionsLabels (sLang): + "return options labels" return gc_options.getUI(sLang) def displayOptions (sLang): + "display the list of grammar checking options" echo("List of options") echo("\n".join( [ k+":\t"+str(v)+"\t"+gc_options.getUI(sLang).get(k, ("?", ""))[0] for k, v in sorted(_dOptions.items()) ] )) echo("") def resetOptions (): + "set options to default values" global _dOptions _dOptions = dict(gc_options.getOptions(_sAppContext)) def getSpellChecker (): + "return the spellchecker object" return _oSpellChecker def _getPath (): return os.path.join(os.path.dirname(sys.modules[__name__].__file__), __name__ + ".py") @@ -389,11 +403,11 @@ #### common functions def option (sOpt): - "return True if option sOpt is active" + "return True if option is active" return _dOptions.get(sOpt, False) def displayInfo (dTokenPos, tWord): "for debugging: retrieve info of word" @@ -415,14 +429,14 @@ if not tWord: return bNoWord lMorph = dTokenPos[tWord[0]]["lMorph"] if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]] else _oSpellChecker.getMorph(tWord[1]) if not lMorph: return False - p = re.compile(sPattern) + zPattern = re.compile(sPattern) if bStrict: - return all(p.search(s) for s in lMorph) - return any(p.search(s) for s in lMorph) + return all(zPattern.search(s) for s in lMorph) + return any(zPattern.search(s) for s in lMorph) def morphex (dTokenPos, tWord, sPattern, sNegPattern, bNoWord=False): "analyse a tuple (position, word), returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)" if not tWord: @@ -429,41 +443,41 @@ return bNoWord lMorph = dTokenPos[tWord[0]]["lMorph"] if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]] else _oSpellChecker.getMorph(tWord[1]) if not lMorph: return False # check negative condition - np = re.compile(sNegPattern) - if any(np.search(s) for s in lMorph): + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(s) for s in lMorph): return False # search sPattern - p = re.compile(sPattern) - return any(p.search(s) for s in lMorph) + zPattern = re.compile(sPattern) + return any(zPattern.search(s) for s in lMorph) def analyse (sWord, sPattern, bStrict=True): "analyse a word, return True if sPattern in morphologies (disambiguation off)" lMorph = _oSpellChecker.getMorph(sWord) if not lMorph: return False - p = re.compile(sPattern) + zPattern = re.compile(sPattern) if bStrict: - return all(p.search(s) for s in lMorph) - return any(p.search(s) for s in lMorph) + return all(zPattern.search(s) for s in lMorph) + return any(zPattern.search(s) for s in lMorph) def analysex (sWord, sPattern, sNegPattern): "analyse a word, returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation off)" lMorph = _oSpellChecker.getMorph(sWord) if not lMorph: return False # check negative condition - np = re.compile(sNegPattern) - if any(np.search(s) for s in lMorph): + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(s) for s in lMorph): return False # search sPattern - p = re.compile(sPattern) - return any(p.search(s) for s in lMorph) + zPattern = re.compile(sPattern) + return any(zPattern.search(s) for s in lMorph) ## functions to get text outside pattern scope @@ -529,10 +543,11 @@ #### Disambiguator def select (dTokenPos, nPos, sWord, sPattern, lDefault=None): + "Disambiguation: select morphologies of matching " if not sWord: return True if nPos not in dTokenPos: print("Error. There should be a token at this position: ", nPos) return True @@ -547,10 +562,11 @@ dTokenPos[nPos]["lMorph"] = lDefault return True def exclude (dTokenPos, nPos, sWord, sPattern, lDefault=None): + "Disambiguation: exclude morphologies of matching " if not sWord: return True if nPos not in dTokenPos: print("Error. There should be a token at this position: ", nPos) return True @@ -565,10 +581,11 @@ dTokenPos[nPos]["lMorph"] = lDefault return True def define (dTokenPos, nPos, lMorph): + "Disambiguation: set morphologies of token at with " if nPos not in dTokenPos: print("Error. There should be a token at this position: ", nPos) return True dTokenPos[nPos]["lMorph"] = lMorph return True @@ -577,10 +594,11 @@ #### TOKEN SENTENCE CHECKER class TokenSentence: + "Text parser" def __init__ (self, sSentence, sSentence0, nOffset): self.sSentence = sSentence self.sSentence0 = sSentence0 self.nOffsetWithinParagraph = nOffset @@ -589,10 +607,11 @@ self.dTags = {} self.dError = {} self.createError = self._createWriterError if _bWriterError else self._createDictError def update (self, sSentence): + "update and retokenize" self.sSentence = sSentence self.lToken = list(_oTokenizer.genTokens(sSentence, True)) def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False): "generator: return nodes where “values” match arcs" @@ -686,10 +705,11 @@ print(" MATCH: *" + sMeta) yield dGraph[dNode[""][sMeta]] def parse (self, dGraph, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False): + "parse tokens from the text and execute actions encountered" self.dError = {} dPriority = {} # Key = position; value = priority dOpt = _dOptions if not dOptions else dOptions lPointer = [] bTagAndRewrite = False @@ -817,14 +837,14 @@ xErr.aFullComment = sMessage # sMessage.split("|")[-1] # in dialog if bShowRuleId: xErr.aShortComment += " " + sLineId + " # " + sRuleId # URL if sURL: - p = PropertyValue() - p.Name = "FullCommentURL" - p.Value = sURL - xErr.aProperties = (p,) + xProperty = PropertyValue() + xProperty.Name = "FullCommentURL" + xProperty.Value = sURL + xErr.aProperties = (xProperty,) else: xErr.aProperties = () return xErr def _createDictError (self, sSugg, nTokenOffset, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext): @@ -933,11 +953,11 @@ bKeepToken = False if "nMergeUntil" in dToken: if dToken["i"] > nMergeUntil: # this token is not already merged with a previous token dTokenMerger = dToken if dToken["nMergeUntil"] > nMergeUntil: - nMergeUntil = dToken["nMergeUntil"] + nMergeUntil = dToken["nMergeUntil"] del dToken["nMergeUntil"] elif "bToRemove" in dToken: # remove useless token self.sSentence = self.sSentence[:dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[dToken["nEnd"]:] if bDebug: