Overview
Comment: | [core] gc engine: better debugging info |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | core | rg |
Files: | files | file ages | folders |
SHA3-256: |
2c534c0712d50384ab9aacffe66f5799 |
User & Date: | olr on 2018-06-27 18:30:19 |
Other Links: | branch diff | manifest | tags |
Context
2018-06-27
| ||
18:51 | [core] gc_engine: remove text merged in another token + code clarification check-in: acaa2bad83 user: olr tags: core, rg | |
18:30 | [core] gc engine: better debugging info check-in: 2c534c0712 user: olr tags: core, rg | |
17:05 | [fr] rewrite rule rewriting subjets check-in: 345078f4f4 user: olr tags: fr, rg | |
Changes
Modified gc_core/py/lang_core/gc_engine.py from [f29cb1d51a] to [62d0f14ee9].
︙ | ︙ | |||
743 744 745 746 747 748 749 | dError = {} bChange = False for sLineId, nextNodeKey in dNode.items(): bCondMemo = None for sRuleId in dGraph[nextNodeKey]: try: if bDebug: | | < | > > | < < < | > | > > < < | | 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 | dError = {} bChange = False for sLineId, nextNodeKey in dNode.items(): bCondMemo = None for sRuleId in dGraph[nextNodeKey]: try: if bDebug: print(" TRY:", sRuleId) sOption, sFuncCond, cActionType, sWhat, *eAct = dRule[sRuleId] # Suggestion [ option, condition, "-", replacement/suggestion/action, iTokenStart, iTokenEnd, nPriority, message, URL ] # TextProcessor [ option, condition, "~", replacement/suggestion/action, iTokenStart, iTokenEnd ] # Disambiguator [ option, condition, "=", replacement/suggestion/action ] # Sentence Tag [ option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ] # Test [ option, condition, ">", "" ] if not sOption or dOptions.get(sOption, False): bCondMemo = not sFuncCond or globals()[sFuncCond](self.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, self.dTags, self.sSentence, self.sSentence0) if bCondMemo: if cActionType == "-": # grammar error nTokenErrorStart = nTokenOffset + eAct[0] if "bImmune" not in self.lToken[nTokenErrorStart]: nTokenErrorEnd = (nTokenOffset + eAct[1]) if eAct[1] else nLastToken nErrorStart = self.nOffsetWithinParagraph + self.lToken[nTokenErrorStart]["nStart"] nErrorEnd = self.nOffsetWithinParagraph + self.lToken[nTokenErrorEnd]["nEnd"] if nErrorStart not in dError or eAct[2] > dPriority.get(nErrorStart, -1): dError[nErrorStart] = self.createError(sWhat, nTokenOffset, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, True, eAct[3], eAct[4], bShowRuleId, "notype", bContext) dPriority[nErrorStart] = eAct[2] if bDebug: print(" NEW_ERROR:", dError[nErrorStart], "\n ", dRule[sRuleId]) elif cActionType == "~": # text processor if bDebug: print(" TAG_PREPARE:\n ", dRule[sRuleId]) nEndToken = (nTokenOffset + eAct[1]) if eAct[1] else nLastToken self._tagAndPrepareTokenForRewriting(sWhat, nTokenOffset + eAct[0], nEndToken, nTokenOffset, True, bDebug) bChange = True elif cActionType == "=": # disambiguation if bDebug: print(" DISAMBIGUATOR:\n ", dRule[sRuleId]) globals()[sWhat](self.lToken, nTokenOffset) elif cActionType == ">": # we do nothing, this test is just a condition to apply all following actions if bDebug: print(" COND_OK") pass elif cActionType == "/": # sentence tags if bDebug: print(" SENTENCE_TAG:\n ", dRule[sRuleId]) nTokenTag = nTokenOffset + eAct[0] if sWhat not in self.dTags: self.dTags[sWhat] = (nTokenTag, nTokenTag) elif nTokenTag > self.dTags[sWhat][1]: self.dTags[sWhat] = (self.dTags[sWhat][0], nTokenTag) else: print("# error: unknown action at " + sLineId) elif cActionType == ">": if bDebug: print(" COND_BREAK") break except Exception as e: raise Exception(str(e), sLineId, sRuleId, self.sSentence) return bChange, dError def _createWriterError (self, sSugg, nTokenOffset, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext): "error for Writer (LO/OO)" |
︙ | ︙ | |||
891 892 893 894 895 896 897 | sText = sText.replace(m.group(0), self.lToken[int(m.group(1))+nTokenOffset]["sValue"]) #print(">", sText) return sText def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, nTokenOffset, bUppercase=True, bDebug=False): "text processor: rewrite tokens between <nTokenRewriteStart> and <nTokenRewriteEnd> position" if bDebug: | | | 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 | sText = sText.replace(m.group(0), self.lToken[int(m.group(1))+nTokenOffset]["sValue"]) #print(">", sText) return sText def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, nTokenOffset, bUppercase=True, bDebug=False): "text processor: rewrite tokens between <nTokenRewriteStart> and <nTokenRewriteEnd> position" if bDebug: print(" REWRITING:", nTokenRewriteStart, nTokenRewriteEnd) if sWhat == "*": # purge text if nTokenRewriteEnd - nTokenRewriteStart == 0: self.lToken[nTokenRewriteStart]["bToRemove"] = True else: for i in range(nTokenRewriteStart, nTokenRewriteEnd+1): self.lToken[i]["bToRemove"] = True |
︙ | ︙ | |||
944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 | else: if bUppercase: sValue = sValue[0:1].upper() + sValue[1:] self.lToken[i]["sNewValue"] = sValue def rewrite (self, bDebug=False): "rewrite the sentence, modify tokens, purge the token list" lNewToken = [] nMergeUntil = 0 dTokenMerger = None for dToken in self.lToken: bKeepToken = True if "bImmune" in dToken: nErrorStart = self.nOffsetWithinParagraph + dToken["nStart"] if nErrorStart in self.dError: if bDebug: print("immunity -> error removed:", self.dError[nErrorStart]) del self.dError[nErrorStart] if nMergeUntil and dToken["i"] <= nMergeUntil: dTokenMerger["sValue"] += " " * (dToken["nStart"] - dTokenMerger["nEnd"]) + dToken["sValue"] dTokenMerger["nEnd"] = dToken["nEnd"] if bDebug: | > > | | | | 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 | else: if bUppercase: sValue = sValue[0:1].upper() + sValue[1:] self.lToken[i]["sNewValue"] = sValue def rewrite (self, bDebug=False): "rewrite the sentence, modify tokens, purge the token list" if bDebug: print("REWRITE") lNewToken = [] nMergeUntil = 0 dTokenMerger = None for dToken in self.lToken: bKeepToken = True if "bImmune" in dToken: nErrorStart = self.nOffsetWithinParagraph + dToken["nStart"] if nErrorStart in self.dError: if bDebug: print("immunity -> error removed:", self.dError[nErrorStart]) del self.dError[nErrorStart] if nMergeUntil and dToken["i"] <= nMergeUntil: dTokenMerger["sValue"] += " " * (dToken["nStart"] - dTokenMerger["nEnd"]) + dToken["sValue"] dTokenMerger["nEnd"] = dToken["nEnd"] if bDebug: print(" MERGED TOKEN:", dTokenMerger["sValue"]) bKeepToken = False if "nMergeUntil" in dToken: if dToken["i"] > nMergeUntil: # this token is not already merged with a previous token dTokenMerger = dToken if dToken["nMergeUntil"] > nMergeUntil: nMergeUntil = dToken["nMergeUntil"] del dToken["nMergeUntil"] elif "bToRemove" in dToken: # remove useless token self.sSentence = self.sSentence[:dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[dToken["nEnd"]:] if bDebug: print(" REMOVED:", dToken["sValue"]) bKeepToken = False # if bKeepToken: lNewToken.append(dToken) if "sNewValue" in dToken: # rewrite token and sentence if bDebug: print(dToken["sValue"], "->", dToken["sNewValue"]) dToken["sRealValue"] = dToken["sValue"] dToken["sValue"] = dToken["sNewValue"] nDiffLen = len(dToken["sRealValue"]) - len(dToken["sNewValue"]) sNewRepl = (dToken["sNewValue"] + " " * nDiffLen) if nDiffLen >= 0 else dToken["sNewValue"][:len(dToken["sRealValue"])] self.sSentence = self.sSentence[:dToken["nStart"]] + sNewRepl + self.sSentence[dToken["nEnd"]:] del dToken["sNewValue"] if bDebug: print(" REWRITED:", self.sSentence) self.lToken.clear() self.lToken = lNewToken #### Analyse tokens |
︙ | ︙ |