Comment: | [core] code cleaning (pylint) |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | core | rg |
Files: | files | file ages | folders |
SHA3-256: |
48056be413279a67e3aeca763b0e4a3a |
User & Date: | olr on 2018-06-24 14:16:22 |
Other Links: | branch diff | manifest | tags |
2018-06-24
| ||
14:49 | [core] code cleaning (pylint) check-in: f1726135a3 user: olr tags: core, rg | |
14:16 | [core] code cleaning (pylint) check-in: 48056be413 user: olr tags: core, rg | |
12:28 | [build] compile rules: generated functions are now private check-in: ec15ccafa3 user: olr tags: build, rg | |
Modified gc_core/py/lang_core/gc_engine.py from [a4200b43e6] to [4a05ed73f6].
|
| > | | > | 1 2 3 4 5 6 7 8 9 10 11 | """ Grammalecte Grammar checker engine """ import re import sys import os import traceback #import unicodedata from itertools import chain |
︙ | ︙ | |||
53 54 55 56 57 58 59 60 61 62 63 64 | # functions _createRegexError = None #### Initialization def load (sContext="Python"): global _oSpellChecker global _sAppContext global _dOptions global _oTokenizer global _createRegexError | > < | 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | # functions _createRegexError = None #### Initialization def load (sContext="Python"): "initialization of the grammar checker" global _oSpellChecker global _sAppContext global _dOptions global _oTokenizer global _createRegexError try: _oSpellChecker = SpellChecker("${lang}", "${dic_main_filename_py}", "${dic_extended_filename_py}", "${dic_community_filename_py}", "${dic_personal_filename_py}") _sAppContext = sContext _dOptions = dict(gc_options.getOptions(sContext)) # duplication necessary, to be able to reset to default _oTokenizer = _oSpellChecker.getTokenizer() _oSpellChecker.activateStorage() _createRegexError = _createRegexWriterError if _bWriterError else _createRegexDictError |
︙ | ︙ | |||
242 243 244 245 246 247 248 | sMessage = globals()[sMsg[1:]](s, m) if sMsg[0:1] == "=" else m.expand(sMsg) xErr.aShortComment = sMessage # sMessage.split("|")[0] # in context menu xErr.aFullComment = sMessage # sMessage.split("|")[-1] # in dialog if bShowRuleId: xErr.aShortComment += " # " + sLineId + " # " + sRuleId # URL if sURL: | | | | | | 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 | sMessage = globals()[sMsg[1:]](s, m) if sMsg[0:1] == "=" else m.expand(sMsg) xErr.aShortComment = sMessage # sMessage.split("|")[0] # in context menu xErr.aFullComment = sMessage # sMessage.split("|")[-1] # in dialog if bShowRuleId: xErr.aShortComment += " # " + sLineId + " # " + sRuleId # URL if sURL: xProperty = PropertyValue() xProperty.Name = "FullCommentURL" xProperty.Value = sURL xErr.aProperties = (xProperty,) else: xErr.aProperties = () return xErr def _createRegexDictError (s, sx, sRepl, nOffset, m, iGroup, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext): "error as a dictionary" |
︙ | ︙ | |||
309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 | else: sNew = m.expand(sRepl) sNew = sNew + " " * (nLen-len(sNew)) return sSentence[0:m.start(iGroup)] + sNew + sSentence[m.end(iGroup):] def ignoreRule (sRuleId): _aIgnoredRules.add(sRuleId) def resetIgnoreRules (): _aIgnoredRules.clear() def reactivateRule (sRuleId): _aIgnoredRules.discard(sRuleId) def listRules (sFilter=None): "generator: returns typle (sOption, sLineId, sRuleId)" if sFilter: try: zFilter = re.compile(sFilter) except: echo("# Error. List rules: wrong regex.") sFilter = None for sOption, lRuleGroup in chain(_getRules(True), _getRules(False)): if sOption != "@@@@": for _, _, sLineId, sRuleId, _, _ in lRuleGroup: if not sFilter or zFilter.search(sRuleId): yield (sOption, sLineId, sRuleId) def displayRules (sFilter=None): echo("List of rules. Filter: << " + str(sFilter) + " >>") for sOption, sLineId, sRuleId in listRules(sFilter): echo("{:<10} {:<10} {}".format(sOption, sLineId, sRuleId)) def setOption (sOpt, bVal): if sOpt in _dOptions: _dOptions[sOpt] = bVal def setOptions (dOpt): for sKey, bVal in dOpt.items(): if sKey in _dOptions: _dOptions[sKey] = bVal def getOptions (): return _dOptions def getDefaultOptions (): return dict(gc_options.getOptions(_sAppContext)) def getOptionsLabels (sLang): return gc_options.getUI(sLang) def displayOptions (sLang): echo("List of options") echo("\n".join( [ k+":\t"+str(v)+"\t"+gc_options.getUI(sLang).get(k, ("?", ""))[0] for k, v in sorted(_dOptions.items()) ] )) echo("") def resetOptions (): global _dOptions _dOptions = dict(gc_options.getOptions(_sAppContext)) def getSpellChecker (): return _oSpellChecker def _getPath (): return os.path.join(os.path.dirname(sys.modules[__name__].__file__), __name__ + ".py") #### common functions def option (sOpt): | > > > > > > > > > > > > | | 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 | else: sNew = m.expand(sRepl) sNew = sNew + " " * (nLen-len(sNew)) return sSentence[0:m.start(iGroup)] + sNew + sSentence[m.end(iGroup):] def ignoreRule (sRuleId): "disable rule <sRuleId>" _aIgnoredRules.add(sRuleId) def resetIgnoreRules (): "clear all ignored rules" _aIgnoredRules.clear() def reactivateRule (sRuleId): "(re)activate rule <sRuleId>" _aIgnoredRules.discard(sRuleId) def listRules (sFilter=None): "generator: returns typle (sOption, sLineId, sRuleId)" if sFilter: try: zFilter = re.compile(sFilter) except: echo("# Error. List rules: wrong regex.") sFilter = None for sOption, lRuleGroup in chain(_getRules(True), _getRules(False)): if sOption != "@@@@": for _, _, sLineId, sRuleId, _, _ in lRuleGroup: if not sFilter or zFilter.search(sRuleId): yield (sOption, sLineId, sRuleId) def displayRules (sFilter=None): "display the name of rules, with the filter <sFilter>" echo("List of rules. Filter: << " + str(sFilter) + " >>") for sOption, sLineId, sRuleId in listRules(sFilter): echo("{:<10} {:<10} {}".format(sOption, sLineId, sRuleId)) def setOption (sOpt, bVal): "set option <sOpt> with <bVal> if it exists" if sOpt in _dOptions: _dOptions[sOpt] = bVal def setOptions (dOpt): "update the dictionary of options with <dOpt>" for sKey, bVal in dOpt.items(): if sKey in _dOptions: _dOptions[sKey] = bVal def getOptions (): "return the dictionary of current options" return _dOptions def getDefaultOptions (): "return the dictionary of default options" return dict(gc_options.getOptions(_sAppContext)) def getOptionsLabels (sLang): "return options labels" return gc_options.getUI(sLang) def displayOptions (sLang): "display the list of grammar checking options" echo("List of options") echo("\n".join( [ k+":\t"+str(v)+"\t"+gc_options.getUI(sLang).get(k, ("?", ""))[0] for k, v in sorted(_dOptions.items()) ] )) echo("") def resetOptions (): "set options to default values" global _dOptions _dOptions = dict(gc_options.getOptions(_sAppContext)) def getSpellChecker (): "return the spellchecker object" return _oSpellChecker def _getPath (): return os.path.join(os.path.dirname(sys.modules[__name__].__file__), __name__ + ".py") #### common functions def option (sOpt): "return True if option <sOpt> is active" return _dOptions.get(sOpt, False) def displayInfo (dTokenPos, tWord): "for debugging: retrieve info of word" if not tWord: echo("> nothing to find") |
︙ | ︙ | |||
413 414 415 416 417 418 419 | def morph (dTokenPos, tWord, sPattern, bStrict=True, bNoWord=False): "analyse a tuple (position, word), return True if sPattern in morphologies (disambiguation on)" if not tWord: return bNoWord lMorph = dTokenPos[tWord[0]]["lMorph"] if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]] else _oSpellChecker.getMorph(tWord[1]) if not lMorph: return False | | | | | | | | | | | | | | | | 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 | def morph (dTokenPos, tWord, sPattern, bStrict=True, bNoWord=False): "analyse a tuple (position, word), return True if sPattern in morphologies (disambiguation on)" if not tWord: return bNoWord lMorph = dTokenPos[tWord[0]]["lMorph"] if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]] else _oSpellChecker.getMorph(tWord[1]) if not lMorph: return False zPattern = re.compile(sPattern) if bStrict: return all(zPattern.search(s) for s in lMorph) return any(zPattern.search(s) for s in lMorph) def morphex (dTokenPos, tWord, sPattern, sNegPattern, bNoWord=False): "analyse a tuple (position, word), returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)" if not tWord: return bNoWord lMorph = dTokenPos[tWord[0]]["lMorph"] if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]] else _oSpellChecker.getMorph(tWord[1]) if not lMorph: return False # check negative condition zNegPattern = re.compile(sNegPattern) if any(zNegPattern.search(s) for s in lMorph): return False # search sPattern zPattern = re.compile(sPattern) return any(zPattern.search(s) for s in lMorph) def analyse (sWord, sPattern, bStrict=True): "analyse a word, return True if sPattern in morphologies (disambiguation off)" lMorph = _oSpellChecker.getMorph(sWord) if not lMorph: return False zPattern = re.compile(sPattern) if bStrict: return all(zPattern.search(s) for s in lMorph) return any(zPattern.search(s) for s in lMorph) def analysex (sWord, sPattern, sNegPattern): "analyse a word, returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation off)" lMorph = _oSpellChecker.getMorph(sWord) if not lMorph: return False # check negative condition zNegPattern = re.compile(sNegPattern) if any(zNegPattern.search(s) for s in lMorph): return False # search sPattern zPattern = re.compile(sPattern) return any(zPattern.search(s) for s in lMorph) ## functions to get text outside pattern scope # warning: check compile_rules.py to understand how it works |
︙ | ︙ | |||
527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 | return morphex(dTokenPos, (nPos, sWord), sPatternGroup1, sNegPatternGroup1) return morph(dTokenPos, (nPos, sWord), sPatternGroup1, False) #### Disambiguator def select (dTokenPos, nPos, sWord, sPattern, lDefault=None): if not sWord: return True if nPos not in dTokenPos: print("Error. There should be a token at this position: ", nPos) return True lMorph = _oSpellChecker.getMorph(sWord) if not lMorph or len(lMorph) == 1: return True lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ] if lSelect: if len(lSelect) != len(lMorph): dTokenPos[nPos]["lMorph"] = lSelect elif lDefault: dTokenPos[nPos]["lMorph"] = lDefault return True def exclude (dTokenPos, nPos, sWord, sPattern, lDefault=None): if not sWord: return True if nPos not in dTokenPos: print("Error. There should be a token at this position: ", nPos) return True lMorph = _oSpellChecker.getMorph(sWord) if not lMorph or len(lMorph) == 1: return True lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ] if lSelect: if len(lSelect) != len(lMorph): dTokenPos[nPos]["lMorph"] = lSelect elif lDefault: dTokenPos[nPos]["lMorph"] = lDefault return True def define (dTokenPos, nPos, lMorph): if nPos not in dTokenPos: print("Error. There should be a token at this position: ", nPos) return True dTokenPos[nPos]["lMorph"] = lMorph return True #### TOKEN SENTENCE CHECKER class TokenSentence: def __init__ (self, sSentence, sSentence0, nOffset): self.sSentence = sSentence self.sSentence0 = sSentence0 self.nOffsetWithinParagraph = nOffset self.lToken = list(_oTokenizer.genTokens(sSentence, True)) self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken } self.dTags = {} self.dError = {} self.createError = self._createWriterError if _bWriterError else self._createDictError def update (self, sSentence): self.sSentence = sSentence self.lToken = list(_oTokenizer.genTokens(sSentence, True)) def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False): "generator: return nodes where <dToken> “values” match <dNode> arcs" # token value if dToken["sValue"] in dNode: | > > > > > | 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 | return morphex(dTokenPos, (nPos, sWord), sPatternGroup1, sNegPatternGroup1) return morph(dTokenPos, (nPos, sWord), sPatternGroup1, False) #### Disambiguator def select (dTokenPos, nPos, sWord, sPattern, lDefault=None): "Disambiguation: select morphologies of <sWord> matching <sPattern>" if not sWord: return True if nPos not in dTokenPos: print("Error. There should be a token at this position: ", nPos) return True lMorph = _oSpellChecker.getMorph(sWord) if not lMorph or len(lMorph) == 1: return True lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ] if lSelect: if len(lSelect) != len(lMorph): dTokenPos[nPos]["lMorph"] = lSelect elif lDefault: dTokenPos[nPos]["lMorph"] = lDefault return True def exclude (dTokenPos, nPos, sWord, sPattern, lDefault=None): "Disambiguation: exclude morphologies of <sWord> matching <sPattern>" if not sWord: return True if nPos not in dTokenPos: print("Error. There should be a token at this position: ", nPos) return True lMorph = _oSpellChecker.getMorph(sWord) if not lMorph or len(lMorph) == 1: return True lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ] if lSelect: if len(lSelect) != len(lMorph): dTokenPos[nPos]["lMorph"] = lSelect elif lDefault: dTokenPos[nPos]["lMorph"] = lDefault return True def define (dTokenPos, nPos, lMorph): "Disambiguation: set morphologies of token at <nPos> with <lMorph>" if nPos not in dTokenPos: print("Error. There should be a token at this position: ", nPos) return True dTokenPos[nPos]["lMorph"] = lMorph return True #### TOKEN SENTENCE CHECKER class TokenSentence: "Text parser" def __init__ (self, sSentence, sSentence0, nOffset): self.sSentence = sSentence self.sSentence0 = sSentence0 self.nOffsetWithinParagraph = nOffset self.lToken = list(_oTokenizer.genTokens(sSentence, True)) self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken } self.dTags = {} self.dError = {} self.createError = self._createWriterError if _bWriterError else self._createDictError def update (self, sSentence): "update <sSentence> and retokenize" self.sSentence = sSentence self.lToken = list(_oTokenizer.genTokens(sSentence, True)) def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False): "generator: return nodes where <dToken> “values” match <dNode> arcs" # token value if dToken["sValue"] in dNode: |
︙ | ︙ | |||
684 685 686 687 688 689 690 691 692 693 694 695 696 697 | elif dNode["sType"] in sMeta: if bDebug: print(" MATCH: *" + sMeta) yield dGraph[dNode["<meta>"][sMeta]] def parse (self, dGraph, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False): self.dError = {} dPriority = {} # Key = position; value = priority dOpt = _dOptions if not dOptions else dOptions lPointer = [] bTagAndRewrite = False for dToken in self.lToken: if bDebug: | > | 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 | elif dNode["sType"] in sMeta: if bDebug: print(" MATCH: *" + sMeta) yield dGraph[dNode["<meta>"][sMeta]] def parse (self, dGraph, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False): "parse tokens from the text and execute actions encountered" self.dError = {} dPriority = {} # Key = position; value = priority dOpt = _dOptions if not dOptions else dOptions lPointer = [] bTagAndRewrite = False for dToken in self.lToken: if bDebug: |
︙ | ︙ | |||
815 816 817 818 819 820 821 | sMessage = globals()[sMsg[1:]](self.lToken) if sMsg[0:1] == "=" else self._expand(sMsg, nTokenOffset) xErr.aShortComment = sMessage # sMessage.split("|")[0] # in context menu xErr.aFullComment = sMessage # sMessage.split("|")[-1] # in dialog if bShowRuleId: xErr.aShortComment += " " + sLineId + " # " + sRuleId # URL if sURL: | | | | | | 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 | sMessage = globals()[sMsg[1:]](self.lToken) if sMsg[0:1] == "=" else self._expand(sMsg, nTokenOffset) xErr.aShortComment = sMessage # sMessage.split("|")[0] # in context menu xErr.aFullComment = sMessage # sMessage.split("|")[-1] # in dialog if bShowRuleId: xErr.aShortComment += " " + sLineId + " # " + sRuleId # URL if sURL: xProperty = PropertyValue() xProperty.Name = "FullCommentURL" xProperty.Value = sURL xErr.aProperties = (xProperty,) else: xErr.aProperties = () return xErr def _createDictError (self, sSugg, nTokenOffset, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext): "error as a dictionary" dErr = {} |
︙ | ︙ | |||
931 932 933 934 935 936 937 | if bDebug: print("Merged token:", dTokenMerger["sValue"]) bKeepToken = False if "nMergeUntil" in dToken: if dToken["i"] > nMergeUntil: # this token is not already merged with a previous token dTokenMerger = dToken if dToken["nMergeUntil"] > nMergeUntil: | | | 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 | if bDebug: print("Merged token:", dTokenMerger["sValue"]) bKeepToken = False if "nMergeUntil" in dToken: if dToken["i"] > nMergeUntil: # this token is not already merged with a previous token dTokenMerger = dToken if dToken["nMergeUntil"] > nMergeUntil: nMergeUntil = dToken["nMergeUntil"] del dToken["nMergeUntil"] elif "bToRemove" in dToken: # remove useless token self.sSentence = self.sSentence[:dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[dToken["nEnd"]:] if bDebug: print("removed:", dToken["sValue"]) bKeepToken = False |
︙ | ︙ |
Deleted gc_core/py/lang_core/gc_sentence.py version [c68dc1622f].
|
| < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < |
Modified gc_lang/fr/modules/conj.py from [96ddf4a9dc] to [2a91e3ad80].
|
| > | > > | 1 2 3 4 5 6 7 8 9 10 11 | """ Grammalecte - Conjugueur """ # License: GPL 3 import re import traceback from .conj_data import lVtyp as _lVtyp from .conj_data import lTags as _lTags |
︙ | ︙ | |||
25 26 27 28 29 30 31 32 33 34 35 36 37 38 | _dGroup = { "0": "auxiliaire", "1": "1ᵉʳ groupe", "2": "2ᵉ groupe", "3": "3ᵉ groupe" } _dTenseIdx = { ":PQ": 0, ":Ip": 1, ":Iq": 2, ":Is": 3, ":If": 4, ":K": 5, ":Sp": 6, ":Sq": 7, ":E": 8 } def isVerb (sVerb): return sVerb in _dVerb def getConj (sVerb, sTense, sWho): "returns conjugation (can be an empty string)" if sVerb not in _dVerb: return None | > | 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | _dGroup = { "0": "auxiliaire", "1": "1ᵉʳ groupe", "2": "2ᵉ groupe", "3": "3ᵉ groupe" } _dTenseIdx = { ":PQ": 0, ":Ip": 1, ":Iq": 2, ":Is": 3, ":If": 4, ":K": 5, ":Sp": 6, ":Sq": 7, ":E": 8 } def isVerb (sVerb): "return True if it’s a existing verb" return sVerb in _dVerb def getConj (sVerb, sTense, sWho): "returns conjugation (can be an empty string)" if sVerb not in _dVerb: return None |
︙ | ︙ | |||
52 53 54 55 56 57 58 59 60 61 62 63 64 65 | "returns raw informations about sVerb" if sVerb not in _dVerb: return None return _lVtyp[_dVerb[sVerb][0]] def getSimil (sWord, sMorph, bSubst=False): if ":V" not in sMorph: return set() sInfi = sMorph[1:sMorph.find("/")] aSugg = set() tTags = _getTags(sInfi) if tTags: if not bSubst: | > | 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | "returns raw informations about sVerb" if sVerb not in _dVerb: return None return _lVtyp[_dVerb[sVerb][0]] def getSimil (sWord, sMorph, bSubst=False): "returns a set of verbal forms similar to <sWord>, according to <sMorph>" if ":V" not in sMorph: return set() sInfi = sMorph[1:sMorph.find("/")] aSugg = set() tTags = _getTags(sInfi) if tTags: if not bSubst: |
︙ | ︙ | |||
96 97 98 99 100 101 102 103 104 105 106 107 108 109 | # if there is only one past participle (epi inv), unreliable. if len(aSugg) == 1: aSugg.clear() return aSugg def getConjSimilInfiV1 (sInfi): if sInfi not in _dVerb: return set() aSugg = set() tTags = _getTags(sInfi) if tTags: aSugg.add(_getConjWithTags(sInfi, tTags, ":Iq", ":2s")) aSugg.add(_getConjWithTags(sInfi, tTags, ":Iq", ":3s")) | > | 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | # if there is only one past participle (epi inv), unreliable. if len(aSugg) == 1: aSugg.clear() return aSugg def getConjSimilInfiV1 (sInfi): "returns verbal forms phonetically similar to infinitive form (for verb in group 1)" if sInfi not in _dVerb: return set() aSugg = set() tTags = _getTags(sInfi) if tTags: aSugg.add(_getConjWithTags(sInfi, tTags, ":Iq", ":2s")) aSugg.add(_getConjWithTags(sInfi, tTags, ":Iq", ":3s")) |
︙ | ︙ | |||
138 139 140 141 142 143 144 | "returns sWord modified by sSfx" if not sSfx: return "" if sSfx == "0": return sWord try: return sWord[:-(ord(sSfx[0])-48)] + sSfx[1:] if sSfx[0] != '0' else sWord + sSfx[1:] # 48 is the ASCII code for "0" | | | > > | 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | "returns sWord modified by sSfx" if not sSfx: return "" if sSfx == "0": return sWord try: return sWord[:-(ord(sSfx[0])-48)] + sSfx[1:] if sSfx[0] != '0' else sWord + sSfx[1:] # 48 is the ASCII code for "0" except (IndexError, TypeError): return "## erreur, code : " + str(sSfx) + " ##" class Verb (): "Verb and its conjugation" def __init__ (self, sVerb, sVerbPattern=""): # conjugate a unknown verb with rules from sVerbPattern if not isinstance(sVerb, str): raise TypeError("sVerb should be a string") if not sVerb: raise ValueError("Empty string.") |
︙ | ︙ |
Modified gc_lang/fr/modules/conj_generator.py from [2e696a65e3] to [ee0a228497].
|
| > | | > > | > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | """ Conjugation generator beta stage, unfinished, the root for a new way to generate flexions… """ import re def conjugate (sVerb, sVerbTag="i_____a", bVarPpas=True): "conjugate <sVerb> and returns a list of tuples (conjugation form, tags)" lConj = [] cGroup = getVerbGroupChar(sVerb) for nCut, sAdd, sFlexTags, sPattern in getConjRules(sVerb, bVarPpas): if not sPattern or re.search(sPattern, sVerb): sFlexion = sVerb[0:-nCut] + sAdd if nCut else sVerb + sAdd lConj.append((sFlexion, ":V" + cGroup + "_" + sVerbTag + sFlexTags)) return lConj def getVerbGroupChar (sVerb): "returns the group number of <sVerb> guessing on its ending" sVerb = sVerb.lower() if sVerb.endswith("er"): return "1" if sVerb.endswith("ir"): return "2" if sVerb == "être" or sVerb == "avoir": return "0" if sVerb.endswith("re"): return "3" return "4" def getConjRules (sVerb, bVarPpas=True, nGroup=2): "returns a list of lists to conjugate a verb, guessing on its ending" if sVerb.endswith("er"): # premier groupe, conjugaison en fonction de la terminaison du lemme # 5 lettres if sVerb[-5:] in oConj["V1"]: lConj = list(oConj["V1"][sVerb[-5:]]) # 4 lettres elif sVerb[-4:] in oConj["V1"]: |
︙ | ︙ | |||
113 114 115 116 117 118 119 | [2, "isses", ":Sp:Sq:2s/*", False], [2, "isse", ":Sp:3s/*", False], [2, "ît", ":Sq:3s/*", False], [2, "is", ":E:2s/*", False], [2, "issons", ":E:1p/*", False], [2, "issez", ":E:2p/*", False] ], | | | 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | [2, "isses", ":Sp:Sq:2s/*", False], [2, "isse", ":Sp:3s/*", False], [2, "ît", ":Sq:3s/*", False], [2, "is", ":E:2s/*", False], [2, "issons", ":E:1p/*", False], [2, "issez", ":E:2p/*", False] ], # premier groupe (bien plus irrégulier que prétendu) "V1": { # a # verbes en -er, -ger, -yer, -cer "er": [ [2, "er", ":Y/*", False], [2, "ant", ":P/*", False], |
︙ | ︙ |
Modified gc_lang/fr/modules/cregex.py from [a0df0d1397] to [f9eb3691d3].
|
| > | > | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | """ Grammalecte - Compiled regular expressions """ import re #### Lemme Lemma = re.compile(r"^>(\w[\w-]*)") #### Analyses Gender = re.compile(":[mfe]") Number = re.compile(":[spi]") #### Nom et adjectif NA = re.compile(":[NA]") |
︙ | ︙ | |||
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | NPf = re.compile(":(?:M[12P]|T):f") NPe = re.compile(":(?:M[12P]|T):e") #### FONCTIONS def getLemmaOfMorph (s): return Lemma.search(s).group(1) def checkAgreement (l1, l2): # check number agreement if not mbInv(l1) and not mbInv(l2): if mbSg(l1) and not mbSg(l2): return False if mbPl(l1) and not mbPl(l2): return False # check gender agreement | > > | 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | NPf = re.compile(":(?:M[12P]|T):f") NPe = re.compile(":(?:M[12P]|T):e") #### FONCTIONS def getLemmaOfMorph (s): "return lemma in morphology <s>" return Lemma.search(s).group(1) def checkAgreement (l1, l2): "returns True if agreement in gender and number is possible between morphologies <l1> and <l2>" # check number agreement if not mbInv(l1) and not mbInv(l2): if mbSg(l1) and not mbSg(l2): return False if mbPl(l1) and not mbPl(l2): return False # check gender agreement |
︙ | ︙ | |||
113 114 115 116 117 118 119 | return ":e" return sGender def getNumber (lMorph): "returns number of word (':s', ':p', ':i' or empty string)." sNumber = "" for sMorph in lMorph: | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 | return ":e" return sGender def getNumber (lMorph): "returns number of word (':s', ':p', ':i' or empty string)." sNumber = "" for sMorph in lMorph: m = Number.search(sMorph) if m: if not sNumber: sNumber = m.group(0) elif sNumber != m.group(0): return ":i" return sNumber # NOTE : isWhat (lMorph) returns True if lMorph contains nothing else than What # mbWhat (lMorph) returns True if lMorph contains What at least once ## isXXX = it’s certain def isNom (lMorph): "returns True if all morphologies are “nom”" return all(":N" in s for s in lMorph) def isNomNotAdj (lMorph): "returns True if all morphologies are “nom”, but not “adjectif”" return all(NnotA.search(s) for s in lMorph) def isAdj (lMorph): "returns True if all morphologies are “adjectif”" return all(":A" in s for s in lMorph) def isNomAdj (lMorph): "returns True if all morphologies are “nom” or “adjectif”" return all(NA.search(s) for s in lMorph) def isNomVconj (lMorph): "returns True if all morphologies are “nom” or “verbe conjugué”" return all(NVconj.search(s) for s in lMorph) def isInv (lMorph): "returns True if all morphologies are “invariable”" return all(":i" in s for s in lMorph) def isSg (lMorph): "returns True if all morphologies are “singulier”" return all(":s" in s for s in lMorph) def isPl (lMorph): "returns True if all morphologies are “pluriel”" return all(":p" in s for s in lMorph) def isEpi (lMorph): "returns True if all morphologies are “épicène”" return all(":e" in s for s in lMorph) def isMas (lMorph): "returns True if all morphologies are “masculin”" return all(":m" in s for s in lMorph) def isFem (lMorph): "returns True if all morphologies are “féminin”" return all(":f" in s for s in lMorph) ## mbXXX = MAYBE XXX def mbNom (lMorph): "returns True if one morphology is “nom”" return any(":N" in s for s in lMorph) def mbAdj (lMorph): "returns True if one morphology is “adjectif”" return any(":A" in s for s in lMorph) def mbAdjNb (lMorph): "returns True if one morphology is “adjectif” or “nombre”" return any(AD.search(s) for s in lMorph) def mbNomAdj (lMorph): "returns True if one morphology is “nom” or “adjectif”" return any(NA.search(s) for s in lMorph) def mbNomNotAdj (lMorph): "returns True if one morphology is “nom”, but not “adjectif”" b = False for s in lMorph: if ":A" in s: return False if ":N" in s: b = True return b def mbPpasNomNotAdj (lMorph): "returns True if one morphology is “nom” or “participe passé”, but not “adjectif”" return any(PNnotA.search(s) for s in lMorph) def mbVconj (lMorph): "returns True if one morphology is “nom” or “verbe conjugué”" return any(Vconj.search(s) for s in lMorph) def mbVconj123 (lMorph): "returns True if one morphology is “nom” or “verbe conjugué” (but not “avoir” or “être”)" return any(Vconj123.search(s) for s in lMorph) def mbMG (lMorph): "returns True if one morphology is “mot grammatical”" return any(":G" in s for s in lMorph) def mbInv (lMorph): "returns True if one morphology is “invariable”" return any(":i" in s for s in lMorph) def mbSg (lMorph): "returns True if one morphology is “singulier”" return any(":s" in s for s in lMorph) def mbPl (lMorph): "returns True if one morphology is “pluriel”" return any(":p" in s for s in lMorph) def mbEpi (lMorph): "returns True if one morphology is “épicène”" return any(":e" in s for s in lMorph) def mbMas (lMorph): "returns True if one morphology is “masculin”" return any(":m" in s for s in lMorph) def mbFem (lMorph): "returns True if one morphology is “féminin”" return any(":f" in s for s in lMorph) def mbNpr (lMorph): "returns True if one morphology is “nom propre” or “titre de civilité”" return any(NP.search(s) for s in lMorph) def mbNprMasNotFem (lMorph): "returns True if one morphology is “nom propre masculin” but not “féminin”" if any(NPf.search(s) for s in lMorph): return False return any(NPm.search(s) for s in lMorph) |
Modified gc_lang/fr/modules/gce_analyseur.py from [50ac148025] to [57b5310cdc].
1 2 3 4 5 6 | #### GRAMMAR CHECKING ENGINE PLUGIN: Parsing functions for French language from . import cregex as cr def rewriteSubject (s1, s2): | | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | #### GRAMMAR CHECKING ENGINE PLUGIN: Parsing functions for French language from . import cregex as cr def rewriteSubject (s1, s2): "rewrite complex subject: <s1> a prn/patr/npr (M[12P]) followed by “et” and <s2>" if s2 == "lui": return "ils" if s2 == "moi": return "nous" if s2 == "toi": return "vous" if s2 == "nous": return "nous" if s2 == "vous": return "vous" if s2 == "eux": return "ils" if s2 == "elle" or s2 == "elles": if cr.mbNprMasNotFem(_oSpellChecker.getMorph(s1)): return "ils" # si épicène, indéterminable, mais OSEF, le féminin l’emporte return "elles" return s1 + " et " + s2 |
︙ | ︙ | |||
37 38 39 40 41 42 43 | return False if cr.mbVconj(lMorph) and not cr.mbMG(lMorph): return True return False def isAmbiguousAndWrong (sWord1, sWord2, sReqMorphNA, sReqMorphConj): | | | | 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | return False if cr.mbVconj(lMorph) and not cr.mbMG(lMorph): return True return False def isAmbiguousAndWrong (sWord1, sWord2, sReqMorphNA, sReqMorphConj): "use it if <sWord1> won’t be a verb; <sWord2> is assumed to be True via isAmbiguousNAV" a2 = _oSpellChecker.getMorph(sWord2) if not a2: return False if cr.checkConjVerb(a2, sReqMorphConj): # verb word2 is ok return False a1 = _oSpellChecker.getMorph(sWord1) if not a1: return False if cr.checkAgreement(a1, a2) and (cr.mbAdj(a2) or cr.mbAdj(a1)): return False return True def isVeryAmbiguousAndWrong (sWord1, sWord2, sReqMorphNA, sReqMorphConj, bLastHopeCond): "use it if <sWord1> can be also a verb; <sWord2> is assumed to be True via isAmbiguousNAV" a2 = _oSpellChecker.getMorph(sWord2) if not a2: return False if cr.checkConjVerb(a2, sReqMorphConj): # verb word2 is ok return False a1 = _oSpellChecker.getMorph(sWord1) |
︙ | ︙ | |||
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 | #if cr.isNomAdjVerb(a1): # considered True if bLastHopeCond: return True return False def checkAgreement (sWord1, sWord2): a2 = _oSpellChecker.getMorph(sWord2) if not a2: return True a1 = _oSpellChecker.getMorph(sWord1) if not a1: return True return cr.checkAgreement(a1, a2) _zUnitSpecial = re.compile("[µ/⁰¹²³⁴⁵⁶⁷⁸⁹Ωℓ·]") _zUnitNumbers = re.compile("[0-9]") def mbUnit (s): if _zUnitSpecial.search(s): return True if 1 < len(s) < 16 and s[0:1].islower() and (not s[1:].islower() or _zUnitNumbers.search(s)): return True return False #### Syntagmes _zEndOfNG1 = re.compile(" *$| +(?:, +|)(?:n(?:’|e |o(?:u?s|tre) )|l(?:’|e(?:urs?|s|) |a )|j(?:’|e )|m(?:’|es? |a |on )|t(?:’|es? |a |u )|s(?:’|es? |a )|c(?:’|e(?:t|tte|s|) )|ç(?:a |’)|ils? |vo(?:u?s|tre) )") _zEndOfNG2 = re.compile(r" +(\w[\w-]+)") _zEndOfNG3 = re.compile(r" *, +(\w[\w-]+)") def isEndOfNG (dDA, s, iOffset): if _zEndOfNG1.match(s): return True m = _zEndOfNG2.match(s) if m and morphex(dDA, (iOffset+m.start(1), m.group(1)), ":[VR]", ":[NAQP]"): return True m = _zEndOfNG3.match(s) if m and not morph(dDA, (iOffset+m.start(1), m.group(1)), ":[NA]", False): return True return False _zNextIsNotCOD1 = re.compile(" *,") _zNextIsNotCOD2 = re.compile(" +(?:[mtsnj](e +|’)|[nv]ous |tu |ils? |elles? )") _zNextIsNotCOD3 = re.compile(r" +([a-zéèî][\w-]+)") def isNextNotCOD (dDA, s, iOffset): if _zNextIsNotCOD1.match(s) or _zNextIsNotCOD2.match(s): return True m = _zNextIsNotCOD3.match(s) if m and morphex(dDA, (iOffset+m.start(1), m.group(1)), ":[123][sp]", ":[DM]"): return True return False _zNextIsVerb1 = re.compile(" +[nmts](?:e |’)") _zNextIsVerb2 = re.compile(r" +(\w[\w-]+)") def isNextVerb (dDA, s, iOffset): if _zNextIsVerb1.match(s): return True m = _zNextIsVerb2.match(s) if m and morph(dDA, (iOffset+m.start(1), m.group(1)), ":[123][sp]", False): return True return False #### Exceptions aREGULARPLURAL = frozenset(["abricot", "amarante", "aubergine", "acajou", "anthracite", "brique", "caca", "café", \ "carotte", "cerise", "chataigne", "corail", "citron", "crème", "grave", "groseille", \ "jonquille", "marron", "olive", "pervenche", "prune", "sable"]) | > > > > > | | 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 | #if cr.isNomAdjVerb(a1): # considered True if bLastHopeCond: return True return False def checkAgreement (sWord1, sWord2): "check agreement between <sWord1> and <sWord1>" a2 = _oSpellChecker.getMorph(sWord2) if not a2: return True a1 = _oSpellChecker.getMorph(sWord1) if not a1: return True return cr.checkAgreement(a1, a2) _zUnitSpecial = re.compile("[µ/⁰¹²³⁴⁵⁶⁷⁸⁹Ωℓ·]") _zUnitNumbers = re.compile("[0-9]") def mbUnit (s): "returns True it can be a measurement unit" if _zUnitSpecial.search(s): return True if 1 < len(s) < 16 and s[0:1].islower() and (not s[1:].islower() or _zUnitNumbers.search(s)): return True return False #### Syntagmes _zEndOfNG1 = re.compile(" *$| +(?:, +|)(?:n(?:’|e |o(?:u?s|tre) )|l(?:’|e(?:urs?|s|) |a )|j(?:’|e )|m(?:’|es? |a |on )|t(?:’|es? |a |u )|s(?:’|es? |a )|c(?:’|e(?:t|tte|s|) )|ç(?:a |’)|ils? |vo(?:u?s|tre) )") _zEndOfNG2 = re.compile(r" +(\w[\w-]+)") _zEndOfNG3 = re.compile(r" *, +(\w[\w-]+)") def isEndOfNG (dDA, s, iOffset): "returns True if next word doesn’t belong to a noun group" if _zEndOfNG1.match(s): return True m = _zEndOfNG2.match(s) if m and morphex(dDA, (iOffset+m.start(1), m.group(1)), ":[VR]", ":[NAQP]"): return True m = _zEndOfNG3.match(s) if m and not morph(dDA, (iOffset+m.start(1), m.group(1)), ":[NA]", False): return True return False _zNextIsNotCOD1 = re.compile(" *,") _zNextIsNotCOD2 = re.compile(" +(?:[mtsnj](e +|’)|[nv]ous |tu |ils? |elles? )") _zNextIsNotCOD3 = re.compile(r" +([a-zéèî][\w-]+)") def isNextNotCOD (dDA, s, iOffset): "returns True if next word is not a COD" if _zNextIsNotCOD1.match(s) or _zNextIsNotCOD2.match(s): return True m = _zNextIsNotCOD3.match(s) if m and morphex(dDA, (iOffset+m.start(1), m.group(1)), ":[123][sp]", ":[DM]"): return True return False _zNextIsVerb1 = re.compile(" +[nmts](?:e |’)") _zNextIsVerb2 = re.compile(r" +(\w[\w-]+)") def isNextVerb (dDA, s, iOffset): "returns True if next word is a verb" if _zNextIsVerb1.match(s): return True m = _zNextIsVerb2.match(s) if m and morph(dDA, (iOffset+m.start(1), m.group(1)), ":[123][sp]", False): return True return False #### Exceptions aREGULARPLURAL = frozenset(["abricot", "amarante", "aubergine", "acajou", "anthracite", "brique", "caca", "café", \ "carotte", "cerise", "chataigne", "corail", "citron", "crème", "grave", "groseille", \ "jonquille", "marron", "olive", "pervenche", "prune", "sable"]) aSHOULDBEVERB = frozenset(["aller", "manger"]) |
Modified gc_lang/fr/modules/gce_suggestions.py from [a5e2b1d398] to [2926468975].
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | #### GRAMMAR CHECKING ENGINE PLUGIN: Suggestion mechanisms from . import conj from . import mfsp from . import phonet ## Verbs def suggVerb (sFlex, sWho, funcSugg2=None): aSugg = set() for sStem in _oSpellChecker.getLemma(sFlex): tTags = conj._getTags(sStem) if tTags: # we get the tense aTense = set() for sMorph in _oSpellChecker.getMorph(sFlex): | > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | #### GRAMMAR CHECKING ENGINE PLUGIN: Suggestion mechanisms from . import conj from . import mfsp from . import phonet ## Verbs def suggVerb (sFlex, sWho, funcSugg2=None): "change <sFlex> conjugation according to <sWho>" aSugg = set() for sStem in _oSpellChecker.getLemma(sFlex): tTags = conj._getTags(sStem) if tTags: # we get the tense aTense = set() for sMorph in _oSpellChecker.getMorph(sFlex): |
︙ | ︙ | |||
36 37 38 39 40 41 42 | if aSugg2: aSugg.add(aSugg2) if aSugg: return "|".join(aSugg) return "" | | > | | | | | | | > > > > | 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | if aSugg2: aSugg.add(aSugg2) if aSugg: return "|".join(aSugg) return "" def suggVerbPpas (sFlex, sPattern=None): "suggest past participles for <sFlex>" aSugg = set() for sStem in _oSpellChecker.getLemma(sFlex): tTags = conj._getTags(sStem) if tTags: if not sPattern: aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1")) aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q2")) aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q3")) aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q4")) aSugg.discard("") elif sPattern == ":m:s": aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1")) elif sPattern == ":m:p": if conj._hasConjWithTags(tTags, ":PQ", ":Q2"): aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q2")) else: aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1")) elif sPattern == ":f:s": if conj._hasConjWithTags(tTags, ":PQ", ":Q3"): aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q3")) else: aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1")) elif sPattern == ":f:p": if conj._hasConjWithTags(tTags, ":PQ", ":Q4"): aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q4")) else: aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1")) elif sPattern == ":s": aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1")) aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q3")) aSugg.discard("") elif sPattern == ":p": aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q2")) aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q4")) aSugg.discard("") else: aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1")) if aSugg: return "|".join(aSugg) return "" def suggVerbTense (sFlex, sTense, sWho): "change <sFlex> to a verb according to <sTense> and <sWho>" aSugg = set() for sStem in _oSpellChecker.getLemma(sFlex): if conj.hasConj(sStem, sTense, sWho): aSugg.add(conj.getConj(sStem, sTense, sWho)) if aSugg: return "|".join(aSugg) return "" def suggVerbImpe (sFlex): "change <sFlex> to a verb at imperative form" aSugg = set() for sStem in _oSpellChecker.getLemma(sFlex): tTags = conj._getTags(sStem) if tTags: if conj._hasConjWithTags(tTags, ":E", ":2s"): aSugg.add(conj._getConjWithTags(sStem, tTags, ":E", ":2s")) if conj._hasConjWithTags(tTags, ":E", ":1p"): aSugg.add(conj._getConjWithTags(sStem, tTags, ":E", ":1p")) if conj._hasConjWithTags(tTags, ":E", ":2p"): aSugg.add(conj._getConjWithTags(sStem, tTags, ":E", ":2p")) if aSugg: return "|".join(aSugg) return "" def suggVerbInfi (sFlex): "returns infinitive forms of <sFlex>" return "|".join([ sStem for sStem in _oSpellChecker.getLemma(sFlex) if conj.isVerb(sStem) ]) _dQuiEst = { "je": ":1s", "j’": ":1s", "j’en": ":1s", "j’y": ":1s", \ "tu": ":2s", "il": ":3s", "on": ":3s", "elle": ":3s", "nous": ":1p", "vous": ":2p", "ils": ":3p", "elles": ":3p" } _lIndicatif = [":Ip", ":Iq", ":Is", ":If"] _lSubjonctif = [":Sp", ":Sq"] def suggVerbMode (sFlex, cMode, sSuj): "returns other conjugations of <sFlex> acconding to <cMode> and <sSuj>" if cMode == ":I": lMode = _lIndicatif elif cMode == ":S": lMode = _lSubjonctif elif cMode.startswith((":I", ":S")): lMode = [cMode] else: |
︙ | ︙ | |||
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 | aSugg.add(e) if aSugg: return "|".join(aSugg) return "" def hasFemForm (sFlex): for sStem in _oSpellChecker.getLemma(sFlex): if mfsp.isFemForm(sStem) or conj.hasConj(sStem, ":PQ", ":Q3"): return True if phonet.hasSimil(sFlex, ":f"): return True return False def hasMasForm (sFlex): for sStem in _oSpellChecker.getLemma(sFlex): if mfsp.isFemForm(sStem) or conj.hasConj(sStem, ":PQ", ":Q1"): # what has a feminine form also has a masculine form return True if phonet.hasSimil(sFlex, ":m"): return True return False def switchGender (sFlex, bPlur=None): aSugg = set() if bPlur == None: for sMorph in _oSpellChecker.getMorph(sFlex): if ":f" in sMorph: if ":s" in sMorph: aSugg.add(suggMasSing(sFlex)) elif ":p" in sMorph: | > > > | 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 | aSugg.add(e) if aSugg: return "|".join(aSugg) return "" def hasFemForm (sFlex): "return True if there is a feminine form of <sFlex>" for sStem in _oSpellChecker.getLemma(sFlex): if mfsp.isFemForm(sStem) or conj.hasConj(sStem, ":PQ", ":Q3"): return True if phonet.hasSimil(sFlex, ":f"): return True return False def hasMasForm (sFlex): "return True if there is a masculine form of <sFlex>" for sStem in _oSpellChecker.getLemma(sFlex): if mfsp.isFemForm(sStem) or conj.hasConj(sStem, ":PQ", ":Q1"): # what has a feminine form also has a masculine form return True if phonet.hasSimil(sFlex, ":m"): return True return False def switchGender (sFlex, bPlur=None): "return feminine or masculine form(s) of <sFlex>" aSugg = set() if bPlur == None: for sMorph in _oSpellChecker.getMorph(sFlex): if ":f" in sMorph: if ":s" in sMorph: aSugg.add(suggMasSing(sFlex)) elif ":p" in sMorph: |
︙ | ︙ | |||
349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 | aSugg.add(suggFemSing(sFlex)) if aSugg: return "|".join(aSugg) return "" def switchPlural (sFlex): aSugg = set() for sMorph in _oSpellChecker.getMorph(sFlex): if ":s" in sMorph: aSugg.add(suggPlur(sFlex)) elif ":p" in sMorph: aSugg.add(suggSing(sFlex)) if aSugg: return "|".join(aSugg) return "" def hasSimil (sWord, sPattern=None): return phonet.hasSimil(sWord, sPattern) def suggSimil (sWord, sPattern=None, bSubst=False): "return list of words phonetically similar to sWord and whom POS is matching sPattern" aSugg = phonet.selectSimil(sWord, sPattern) for sMorph in _oSpellChecker.getMorph(sWord): aSugg.update(conj.getSimil(sWord, sMorph, bSubst)) break if aSugg: return "|".join(aSugg) return "" def suggCeOrCet (sWord): if re.match("(?i)[aeéèêiouyâîï]", sWord): return "cet" if sWord[0:1] == "h" or sWord[0:1] == "H": return "ce|cet" return "ce" def suggLesLa (sWord): if any( ":p" in sMorph for sMorph in _oSpellChecker.getMorph(sWord) ): return "les|la" return "la" _zBinary = re.compile("^[01]+$") def formatNumber (s): nLen = len(s) if nLen < 4: return s sRes = "" # nombre ordinaire nEnd = nLen while nEnd > 0: | > > > > > | 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 | aSugg.add(suggFemSing(sFlex)) if aSugg: return "|".join(aSugg) return "" def switchPlural (sFlex): "return plural or singular form(s) of <sFlex>" aSugg = set() for sMorph in _oSpellChecker.getMorph(sFlex): if ":s" in sMorph: aSugg.add(suggPlur(sFlex)) elif ":p" in sMorph: aSugg.add(suggSing(sFlex)) if aSugg: return "|".join(aSugg) return "" def hasSimil (sWord, sPattern=None): "return True if there is words phonetically similar to <sWord> (according to <sPattern> if required)" return phonet.hasSimil(sWord, sPattern) def suggSimil (sWord, sPattern=None, bSubst=False): "return list of words phonetically similar to sWord and whom POS is matching sPattern" aSugg = phonet.selectSimil(sWord, sPattern) for sMorph in _oSpellChecker.getMorph(sWord): aSugg.update(conj.getSimil(sWord, sMorph, bSubst)) break if aSugg: return "|".join(aSugg) return "" def suggCeOrCet (sWord): "suggest “ce” or “cet” or both according to the first letter of <sWord>" if re.match("(?i)[aeéèêiouyâîï]", sWord): return "cet" if sWord[0:1] == "h" or sWord[0:1] == "H": return "ce|cet" return "ce" def suggLesLa (sWord): "suggest “les” or “la” according to <sWord>" if any( ":p" in sMorph for sMorph in _oSpellChecker.getMorph(sWord) ): return "les|la" return "la" _zBinary = re.compile("^[01]+$") def formatNumber (s): "add spaces or hyphens to big numbers" nLen = len(s) if nLen < 4: return s sRes = "" # nombre ordinaire nEnd = nLen while nEnd > 0: |
︙ | ︙ | |||
426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 | elif nLen == 9 and s.startswith("0"): sRes += "|" + s[0:3] + " " + s[3:5] + " " + s[5:7] + " " + s[7:9] # fixe belge 1 sRes += "|" + s[0:2] + " " + s[2:5] + " " + s[5:7] + " " + s[7:9] # fixe belge 2 return sRes def formatNF (s): try: m = re.match("NF[ -]?(C|E|P|Q|S|X|Z|EN(?:[ -]ISO|))[ -]?([0-9]+(?:[/‑-][0-9]+|))", s) if not m: return "" return "NF " + m.group(1).upper().replace(" ", " ").replace("-", " ") + " " + m.group(2).replace("/", "‑").replace("-", "‑") except: traceback.print_exc() return "# erreur #" def undoLigature (c): if c == "fi": return "fi" elif c == "fl": return "fl" elif c == "ff": return "ff" elif c == "ffi": | > > | 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 | elif nLen == 9 and s.startswith("0"): sRes += "|" + s[0:3] + " " + s[3:5] + " " + s[5:7] + " " + s[7:9] # fixe belge 1 sRes += "|" + s[0:2] + " " + s[2:5] + " " + s[5:7] + " " + s[7:9] # fixe belge 2 return sRes def formatNF (s): "typography: format NF reference (norme française)" try: m = re.match("NF[ -]?(C|E|P|Q|S|X|Z|EN(?:[ -]ISO|))[ -]?([0-9]+(?:[/‑-][0-9]+|))", s) if not m: return "" return "NF " + m.group(1).upper().replace(" ", " ").replace("-", " ") + " " + m.group(2).replace("/", "‑").replace("-", "‑") except: traceback.print_exc() return "# erreur #" def undoLigature (c): "typography: split ligature character <c> in several chars" if c == "fi": return "fi" elif c == "fl": return "fl" elif c == "ff": return "ff" elif c == "ffi": |
︙ | ︙ | |||
461 462 463 464 465 466 467 | _xNormalizedCharsForInclusiveWriting = str.maketrans({ '(': '_', ')': '_', '.': '_', '·': '_', '–': '_', '—': '_', '/': '_' | | > | 477 478 479 480 481 482 483 484 485 486 487 488 489 | _xNormalizedCharsForInclusiveWriting = str.maketrans({ '(': '_', ')': '_', '.': '_', '·': '_', '–': '_', '—': '_', '/': '_' }) def normalizeInclusiveWriting (sToken): "typography: replace word separators used in inclusive writing by underscore (_)" return sToken.translate(_xNormalizedCharsForInclusiveWriting) |
Modified gc_lang/fr/modules/lexicographe.py from [5e53113f51] to [a9b878f391].
|
| > | > > | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | """ Grammalecte - Lexicographe """ # License: MPL 2 import re import traceback _dTAGS = { ':N': (" nom,", "Nom"), ':A': (" adjectif,", "Adjectif"), ':M1': (" prénom,", "Prénom"), ':M2': (" patronyme,", "Patronyme, matronyme, nom de famille…"), ':MP': (" nom propre,", "Nom propre"), ':W': (" adverbe,", "Adverbe"), ':J': (" interjection,", "Interjection"), |
︙ | ︙ | |||
76 77 78 79 80 81 82 | ':O2': (" 2ᵉ pers.,", "Pronom : 2ᵉ personne"), ':O3': (" 3ᵉ pers.,", "Pronom : 3ᵉ personne"), ':C': (" conjonction,", "Conjonction"), ':Ĉ': (" conjonction (él.),", "Conjonction (élément)"), ':Cc': (" conjonction de coordination,", "Conjonction de coordination"), ':Cs': (" conjonction de subordination,", "Conjonction de subordination"), ':Ĉs': (" conjonction de subordination (él.),", "Conjonction de subordination (élément)"), | | | 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | ':O2': (" 2ᵉ pers.,", "Pronom : 2ᵉ personne"), ':O3': (" 3ᵉ pers.,", "Pronom : 3ᵉ personne"), ':C': (" conjonction,", "Conjonction"), ':Ĉ': (" conjonction (él.),", "Conjonction (élément)"), ':Cc': (" conjonction de coordination,", "Conjonction de coordination"), ':Cs': (" conjonction de subordination,", "Conjonction de subordination"), ':Ĉs': (" conjonction de subordination (él.),", "Conjonction de subordination (élément)"), ':Ñ': (" locution nominale (él.),", "Locution nominale (élément)"), ':Â': (" locution adjectivale (él.),", "Locution adjectivale (élément)"), ':Ṽ': (" locution verbale (él.),", "Locution verbale (élément)"), ':Ŵ': (" locution adverbiale (él.),", "Locution adverbiale (élément)"), ':Ŕ': (" locution prépositive (él.),", "Locution prépositive (élément)"), ':Ĵ': (" locution interjective (él.),", "Locution interjective (élément)"), |
︙ | ︙ | |||
123 124 125 126 127 128 129 | 'il': " pronom personnel sujet, 3ᵉ pers. masc. sing.", 'on': " pronom personnel sujet, 3ᵉ pers. sing. ou plur.", 'elle': " pronom personnel sujet, 3ᵉ pers. fém. sing.", 'nous': " pronom personnel sujet/objet, 1ʳᵉ pers. plur.", 'vous': " pronom personnel sujet/objet, 2ᵉ pers. plur.", 'ils': " pronom personnel sujet, 3ᵉ pers. masc. plur.", 'elles': " pronom personnel sujet, 3ᵉ pers. masc. plur.", | | | | | 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | 'il': " pronom personnel sujet, 3ᵉ pers. masc. sing.", 'on': " pronom personnel sujet, 3ᵉ pers. sing. ou plur.", 'elle': " pronom personnel sujet, 3ᵉ pers. fém. sing.", 'nous': " pronom personnel sujet/objet, 1ʳᵉ pers. plur.", 'vous': " pronom personnel sujet/objet, 2ᵉ pers. plur.", 'ils': " pronom personnel sujet, 3ᵉ pers. masc. plur.", 'elles': " pronom personnel sujet, 3ᵉ pers. masc. plur.", "là": " particule démonstrative", "ci": " particule démonstrative", 'le': " COD, masc. sing.", 'la': " COD, fém. sing.", 'les': " COD, plur.", 'moi': " COI (à moi), sing.", 'toi': " COI (à toi), sing.", 'lui': " COI (à lui ou à elle), sing.", 'nous2': " COI (à nous), plur.", 'vous2': " COI (à vous), plur.", 'leur': " COI (à eux ou à elles), plur.", |
︙ | ︙ |
Modified gc_lang/fr/modules/mfsp.py from [3f4814b5d6] to [8b7759e076].
|
| > | > | 1 2 3 4 5 6 7 8 9 10 | """ Masculins, féminins, singuliers et pluriels """ from .mfsp_data import lTagMiscPlur as _lTagMiscPlur from .mfsp_data import lTagMasForm as _lTagMasForm from .mfsp_data import dMiscPlur as _dMiscPlur from .mfsp_data import dMasForm as _dMasForm |
︙ | ︙ |
Modified gc_lang/fr/modules/phonet.py from [cc107e0763] to [df9f884192].
|
| > | > > | 1 2 3 4 5 6 7 8 9 10 11 | """ Grammalecte - Suggestion phonétique """ # License: GPL 3 import re from .phonet_data import dWord as _dWord from .phonet_data import lSet as _lSet from .phonet_data import dMorph as _dMorph |
︙ | ︙ |
Modified gc_lang/fr/modules/textformatter.py from [8fb9ec33bf] to [d3e695233d].
1 2 3 4 5 6 7 8 | #!python3 import re dReplTable = { # surnumerary_spaces "start_of_paragraph": [("^[ ]+", "")], | > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 | #!python3 """ Text formatter """ import re dReplTable = { # surnumerary_spaces "start_of_paragraph": [("^[ ]+", "")], |
︙ | ︙ | |||
63 64 65 66 67 68 69 | "erase_non_breaking_hyphens": [("", "")], ## typographic signs "ts_apostrophe": [ ("(?i)\\b([ldnjmtscç])['´‘′`](?=\\w)", "\\1’"), ("(?i)(qu|jusqu|lorsqu|puisqu|quoiqu|quelqu|presqu|entr|aujourd|prud)['´‘′`]", "\\1’") ], "ts_ellipsis": [ ("\\.\\.\\.", "…"), ("(?<=…)[.][.]", "…"), ("…[.](?![.])", "…") ], | | | 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | "erase_non_breaking_hyphens": [("", "")], ## typographic signs "ts_apostrophe": [ ("(?i)\\b([ldnjmtscç])['´‘′`](?=\\w)", "\\1’"), ("(?i)(qu|jusqu|lorsqu|puisqu|quoiqu|quelqu|presqu|entr|aujourd|prud)['´‘′`]", "\\1’") ], "ts_ellipsis": [ ("\\.\\.\\.", "…"), ("(?<=…)[.][.]", "…"), ("…[.](?![.])", "…") ], "ts_n_dash_middle": [ (" [-—] ", " – "), (" [-—],", " –,") ], "ts_m_dash_middle": [ (" [-–] ", " — "), (" [-–],", " —,") ], "ts_n_dash_start": [ ("^[-—][ ]", "– "), ("^– ", "– "), ("^[-–—](?=[\\w.…])", "– ") ], "ts_m_dash_start": [ ("^[-–][ ]", "— "), |
︙ | ︙ |