Overview
Comment: | [core][fr] immunity rules to prevent false positives |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | fr | core | rg |
Files: | files | file ages | folders |
SHA3-256: |
2cc4bc018d1604e2082f0131424121a5 |
User & Date: | olr on 2018-06-23 13:05:50 |
Other Links: | branch diff | manifest | tags |
Context
2018-06-23
| ||
16:28 | [core] merge tokens check-in: c051cc6ca9 user: olr tags: core, rg | |
13:05 | [core][fr] immunity rules to prevent false positives check-in: 2cc4bc018d user: olr tags: fr, core, rg | |
11:58 | [fr] restructuration des règles pour les verbes sans sujet trouvable check-in: e94e2b3b84 user: olr tags: fr, rg | |
Changes
Modified gc_core/py/lang_core/gc_engine.py from [3fb4a33065] to [51362c8a05].
︙ | ︙ | |||
163 164 165 166 167 168 169 | # graph rules if not bParagraph and bSentenceChange: oSentence.update(s) bSentenceChange = False for sGraphName, sLineId in lRuleGroup: if bDebug: print("\n>>>> GRAPH:", sGraphName, sLineId) | | | < < | 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 | # graph rules if not bParagraph and bSentenceChange: oSentence.update(s) bSentenceChange = False for sGraphName, sLineId in lRuleGroup: if bDebug: print("\n>>>> GRAPH:", sGraphName, sLineId) bParagraphChange, s = oSentence.parse(dAllGraph[sGraphName], dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext) dErrs.update(oSentence.dError) elif not sOption or dOptions.get(sOption, False): # regex rules for zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions in lRuleGroup: if sRuleId not in _aIgnoredRules: for m in zRegex.finditer(s): bCondMemo = None for sFuncCond, cActionType, sWhat, *eAct in lActions: |
︙ | ︙ | |||
574 575 576 577 578 579 580 | return True dTokenPos[nPos]["lMorph"] = lMorph return True | < < > | 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 | return True dTokenPos[nPos]["lMorph"] = lMorph return True #### TOKEN SENTENCE CHECKER class TokenSentence: def __init__ (self, sSentence, sSentence0, nOffset): self.sSentence = sSentence self.sSentence0 = sSentence0 self.nOffsetWithinParagraph = nOffset self.lToken = list(_oTokenizer.genTokens(sSentence, True)) self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken } self.dTags = {} self.dError = {} self.createError = self._createWriterError if _bWriterError else self._createDictError def update (self, sSentence): self.sSentence = sSentence self.lToken = list(_oTokenizer.genTokens(sSentence, True)) def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False): |
︙ | ︙ | |||
687 688 689 690 691 692 693 | elif dNode["sType"] in sMeta: if bDebug: print(" MATCH: *" + sMeta) yield dGraph[dNode["<meta>"][sMeta]] def parse (self, dGraph, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False): | | | | | | | > > | | > | | | | | | | | | 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 | elif dNode["sType"] in sMeta: if bDebug: print(" MATCH: *" + sMeta) yield dGraph[dNode["<meta>"][sMeta]] def parse (self, dGraph, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False): self.dError = {} dPriority = {} # Key = position; value = priority dOpt = _dOptions if not dOptions else dOptions lPointer = [] bTagAndRewrite = False for dToken in self.lToken: if bDebug: print("TOKEN:", dToken["sValue"]) # check arcs for each existing pointer lNextPointer = [] for dPointer in lPointer: for dNode in self._getNextMatchingNodes(dToken, dGraph, dPointer["dNode"], bDebug): lNextPointer.append({"iToken": dPointer["iToken"], "dNode": dNode}) lPointer = lNextPointer # check arcs of first nodes for dNode in self._getNextMatchingNodes(dToken, dGraph, dGraph[0], bDebug): lPointer.append({"iToken": dToken["i"], "dNode": dNode}) # check if there is rules to check for each pointer for dPointer in lPointer: #if bDebug: # print("+", dPointer) if "<rules>" in dPointer["dNode"]: bChange, dErr = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iToken"]-1, dToken["i"], dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext) self.dError.update(dErr) if bChange: bTagAndRewrite = True if bTagAndRewrite: self.rewrite(bDebug) return (bTagAndRewrite, self.sSentence) def _executeActions (self, dGraph, dNode, nTokenOffset, nLastToken, dPriority, dOptions, sCountry, bShowRuleId, bDebug, bContext): "execute actions found in the DARG" dError = {} bChange = False for sLineId, nextNodeKey in dNode.items(): bCondMemo = None for sRuleId in dGraph[nextNodeKey]: try: if bDebug: print("ACTION:", sRuleId) print(dRule[sRuleId]) sOption, sFuncCond, cActionType, sWhat, *eAct = dRule[sRuleId] # Suggestion [ option, condition, "-", replacement/suggestion/action, iTokenStart, iTokenEnd, nPriority, message, URL ] # TextProcessor [ option, condition, "~", replacement/suggestion/action, iTokenStart, iTokenEnd ] # Disambiguator [ option, condition, "=", replacement/suggestion/action ] # Sentence Tag [ option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ] # Test [ option, condition, ">", "" ] if not sOption or dOptions.get(sOption, False): bCondMemo = not sFuncCond or globals()[sFuncCond](self.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, self.dTags, self.sSentence, self.sSentence0) if bCondMemo: if cActionType == "-": # grammar error nTokenErrorStart = nTokenOffset + eAct[0] if "bImmune" not in self.lToken[nTokenErrorStart]: nTokenErrorEnd = (nTokenOffset + eAct[1]) if eAct[1] else nLastToken nErrorStart = self.nOffsetWithinParagraph + self.lToken[nTokenErrorStart]["nStart"] nErrorEnd = self.nOffsetWithinParagraph + self.lToken[nTokenErrorEnd]["nEnd"] if nErrorStart not in dError or eAct[2] > dPriority.get(nErrorStart, -1): dError[nErrorStart] = self.createError(sWhat, nTokenOffset, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, True, eAct[3], eAct[4], bShowRuleId, "notype", bContext) dPriority[nErrorStart] = eAct[2] if bDebug: print("-", sRuleId, dError[nErrorStart]) elif cActionType == "~": # text processor nEndToken = (nTokenOffset + eAct[1]) if eAct[1] else nLastToken self._tagAndPrepareTokenForRewriting(sWhat, nTokenOffset + eAct[0], nEndToken, bDebug) if bDebug: print("~", sRuleId) bChange = True |
︙ | ︙ | |||
780 781 782 783 784 785 786 | print("# error: unknown action at " + sLineId) elif cActionType == ">": if bDebug: print(">!", sRuleId) break except Exception as e: raise Exception(str(e), sLineId, sRuleId, self.sSentence) | | | 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 | print("# error: unknown action at " + sLineId) elif cActionType == ">": if bDebug: print(">!", sRuleId) break except Exception as e: raise Exception(str(e), sLineId, sRuleId, self.sSentence) return bChange, dError def _createWriterError (self, sSugg, nTokenOffset, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext): "error for Writer (LO/OO)" xErr = SingleProofreadingError() #xErr = uno.createUnoStruct( "com.sun.star.linguistic2.SingleProofreadingError" ) xErr.nErrorStart = nStart xErr.nErrorLength = nEnd - nStart |
︙ | ︙ | |||
879 880 881 882 883 884 885 886 887 888 889 890 891 892 | if sWhat == "*": # purge text if nTokenRewriteEnd - nTokenRewriteStart == 0: self.lToken[nTokenRewriteStart]["bToRemove"] = True else: for i in range(nTokenRewriteStart, nTokenRewriteEnd+1): self.lToken[i]["bToRemove"] = True else: if sWhat.startswith("="): sWhat = globals()[sWhat[1:]](self.lToken) bUppercase = bUppercase and self.lToken[nTokenRewriteStart]["sValue"][0:1].isupper() if nTokenRewriteEnd - nTokenRewriteStart == 0: sWhat = sWhat + " " * (len(self.lToken[nTokenRewriteStart]["sValue"])-len(sWhat)) if bUppercase: | > > > > > > > | 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 | if sWhat == "*": # purge text if nTokenRewriteEnd - nTokenRewriteStart == 0: self.lToken[nTokenRewriteStart]["bToRemove"] = True else: for i in range(nTokenRewriteStart, nTokenRewriteEnd+1): self.lToken[i]["bToRemove"] = True elif sWhat == "!": # immunity if nTokenRewriteEnd - nTokenRewriteStart == 0: self.lToken[nTokenRewriteStart]["bImmune"] = True else: for i in range(nTokenRewriteStart, nTokenRewriteEnd+1): self.lToken[i]["bImmune"] = True else: if sWhat.startswith("="): sWhat = globals()[sWhat[1:]](self.lToken) bUppercase = bUppercase and self.lToken[nTokenRewriteStart]["sValue"][0:1].isupper() if nTokenRewriteEnd - nTokenRewriteStart == 0: sWhat = sWhat + " " * (len(self.lToken[nTokenRewriteStart]["sValue"])-len(sWhat)) if bUppercase: |
︙ | ︙ | |||
902 903 904 905 906 907 908 909 910 911 912 913 914 915 | sValue = sValue[0:1].upper() + sValue[1:] self.lToken[i]["sNewValue"] = sValue def rewrite (self, bDebug=False): "rewrite the sentence, modify tokens, purge the token list" lNewToken = [] for i, dToken in enumerate(self.lToken): if "bToRemove" in dToken: # remove useless token self.sSentence = self.sSentence[:dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[dToken["nEnd"]:] if bDebug: print("removed:", dToken["sValue"]) else: lNewToken.append(dToken) | > > > > | 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 | sValue = sValue[0:1].upper() + sValue[1:] self.lToken[i]["sNewValue"] = sValue def rewrite (self, bDebug=False): "rewrite the sentence, modify tokens, purge the token list" lNewToken = [] for i, dToken in enumerate(self.lToken): if "bImmune" in dToken: nErrorStart = self.nOffsetWithinParagraph + dToken["nStart"] if nErrorStart in self.dError: del self.dError[nErrorStart] if "bToRemove" in dToken: # remove useless token self.sSentence = self.sSentence[:dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[dToken["nEnd"]:] if bDebug: print("removed:", dToken["sValue"]) else: lNewToken.append(dToken) |
︙ | ︙ | |||
923 924 925 926 927 928 929 | sNewRepl = (dToken["sNewValue"] + " " * nDiffLen) if nDiffLen >= 0 else dToken["sNewValue"][:len(dToken["sRealValue"])] self.sSentence = self.sSentence[:dToken["nStart"]] + sNewRepl + self.sSentence[dToken["nEnd"]:] del dToken["sNewValue"] if bDebug: print(self.sSentence) self.lToken.clear() self.lToken = lNewToken | < | 934 935 936 937 938 939 940 941 942 943 944 945 946 947 | sNewRepl = (dToken["sNewValue"] + " " * nDiffLen) if nDiffLen >= 0 else dToken["sNewValue"][:len(dToken["sRealValue"])] self.sSentence = self.sSentence[:dToken["nStart"]] + sNewRepl + self.sSentence[dToken["nEnd"]:] del dToken["sNewValue"] if bDebug: print(self.sSentence) self.lToken.clear() self.lToken = lNewToken #### Analyse tokens def g_morph (dToken, sPattern, sNegPattern=""): "analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies" |
︙ | ︙ |
Modified gc_lang/fr/rules.grx from [cc9949070e] to [7874208805].
︙ | ︙ | |||
4584 4585 4586 4587 4588 4589 4590 | TEST: il est normal de ne presque pas payer des gens qui effectuent un travail TEST: j’ai l’impression de ne même pas savoir ce qu’est un « juif français ». TEST: C’que j’comprends, c’est qu’il y a des limites à ce qu’on peut supporter. TEST: la tentation pour certains médias de ne tout simplement pas rémunérer notre travail si celui-ci n’est finalement pas publié. TEST: Ne parfois pas être celui qui sabote l’ambiance. | | > > > > > > | 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 | TEST: il est normal de ne presque pas payer des gens qui effectuent un travail TEST: j’ai l’impression de ne même pas savoir ce qu’est un « juif français ». TEST: C’que j’comprends, c’est qu’il y a des limites à ce qu’on peut supporter. TEST: la tentation pour certains médias de ne tout simplement pas rémunérer notre travail si celui-ci n’est finalement pas publié. TEST: Ne parfois pas être celui qui sabote l’ambiance. #__[i](p_notre_père_qui_es_au_cieux)__ notre père (qui est? aux cieux) @@11 <<- ~1>> * @@@@ @@@@ @@@@ @@@@ @@@@GRAPH: graphe1 @@@@ @@@@ @@@@ @@@@ __p_notre_père_qui_es_au_cieux__ notre père qui [es|est] aux cieux <<- ~4>> ! <<- ~3:0>> * !! !! !!!! Formes verbales sans sujet !! !! |
︙ | ︙ | |||
4742 4743 4744 4745 4746 4747 4748 | TEST: {{j'}}ai signalé que {{j'}}essayais de regrouper les paragraphes. (Tests avec apostrophe droite.) TEST: je me questionne, {{exiges}} des réponses mais n’en obtiens pas… TEST: dans tous les cas de figure imaginés par cette bande de nuls, il n’y en a pas un qui les sauvera. TEST: ces gens qui vont par monts et par vaux. TEST: pour ne justement pas donner l’impression de s’être trompé. | | | 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 | TEST: {{j'}}ai signalé que {{j'}}essayais de regrouper les paragraphes. (Tests avec apostrophe droite.) TEST: je me questionne, {{exiges}} des réponses mais n’en obtiens pas… TEST: dans tous les cas de figure imaginés par cette bande de nuls, il n’y en a pas un qui les sauvera. TEST: ces gens qui vont par monts et par vaux. TEST: pour ne justement pas donner l’impression de s’être trompé. ## Incohérences avec formes verbales 1pl et 2pl sans sujet __conj_xxxons_sans_sujet!3__ @:1p¬:[EGMNAJ] ~¬[nN]ous <<- /conj/ not (\1.istitle() and before0(r"\w")) and not tag_before(\1, "1p") -1>> =suggVerb(\1, ":3p") # Ceci est un verbe à la 1ʳᵉ personne du pluriel. Sujet (“nous” ou équivalent) introuvable. __conj_xxxez_sans_sujet!3__ @:2p¬:[EGMNAJ] ~¬[vV]ous <<- /conj/ not (\1.istitle() and before0(r"\w")) and not tag_before(\2, "2p") -1>> =suggVerb(\1, ":3p") # Ceci est un verbe à la 2ᵉ personne du pluriel. Sujet (“vous” ou équivalent) introuvable. |
︙ | ︙ |