Overview
Comment: | [core][bug] gc engine: use ad hoc token index |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | core | rg |
Files: | files | file ages | folders |
SHA3-256: |
28025b7ef5aea0cf70ab7cf9e3f492e4 |
User & Date: | olr on 2018-07-24 22:14:18 |
Other Links: | branch diff | manifest | tags |
Context
2018-07-25
| ||
15:04 | [build] get token outside secure scope check-in: 245941cc07 user: olr tags: build, rg | |
2018-07-24
| ||
22:14 | [core][bug] gc engine: use ad hoc token index check-in: 28025b7ef5 user: olr tags: core, rg | |
22:13 | [fr] conversion: regex rules -> graph rules check-in: 2b52a5e3d7 user: olr tags: fr, rg | |
Changes
Modified gc_core/py/lang_core/gc_engine.py from [903e91d939] to [a408aa043b].
︙ | ︙ | |||
709 710 711 712 713 714 715 | yield dGraph[dNode["<meta>"][sMeta]] def parse (self, dGraph, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False): "parse tokens from the text and execute actions encountered" dOpt = _dOptions if not dOptions else dOptions lPointer = [] bTagAndRewrite = False | | | | | 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 | yield dGraph[dNode["<meta>"][sMeta]] def parse (self, dGraph, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False): "parse tokens from the text and execute actions encountered" dOpt = _dOptions if not dOptions else dOptions lPointer = [] bTagAndRewrite = False for i, dToken in enumerate(self.lToken): if bDebug: print("TOKEN:", dToken["sValue"]) # check arcs for each existing pointer lNextPointer = [] for dPointer in lPointer: for dNode in self._getNextMatchingNodes(dToken, dGraph, dPointer["dNode"], bDebug): lNextPointer.append({"iToken": dPointer["iToken"], "dNode": dNode}) lPointer = lNextPointer # check arcs of first nodes for dNode in self._getNextMatchingNodes(dToken, dGraph, dGraph[0], bDebug): lPointer.append({"iToken": i, "dNode": dNode}) # check if there is rules to check for each pointer for dPointer in lPointer: #if bDebug: # print("+", dPointer) if "<rules>" in dPointer["dNode"]: bChange = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iToken"]-1, i, dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext) if bChange: bTagAndRewrite = True if bTagAndRewrite: self.rewrite(bDebug) if bDebug: print(self) return (bTagAndRewrite, self.sSentence) |
︙ | ︙ | |||
930 931 932 933 934 935 936 | def rewrite (self, bDebug=False): "rewrite the sentence, modify tokens, purge the token list" if bDebug: print("REWRITE") lNewToken = [] nMergeUntil = 0 dTokenMerger = None | | | | | 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 | def rewrite (self, bDebug=False): "rewrite the sentence, modify tokens, purge the token list" if bDebug: print("REWRITE") lNewToken = [] nMergeUntil = 0 dTokenMerger = None for iToken, dToken in enumerate(self.lToken): bKeepToken = True if dToken["sType"] != "INFO": if "bImmune" in dToken: nErrorStart = self.nOffsetWithinParagraph + dToken["nStart"] if nErrorStart in self.dError: if bDebug: print("immunity -> error removed:", self.dError[nErrorStart]) del self.dError[nErrorStart] if nMergeUntil and iToken <= nMergeUntil: dTokenMerger["sValue"] += " " * (dToken["nStart"] - dTokenMerger["nEnd"]) + dToken["sValue"] dTokenMerger["nEnd"] = dToken["nEnd"] if bDebug: print(" MERGED TOKEN:", dTokenMerger["sValue"]) bKeepToken = False if "nMergeUntil" in dToken: if iToken > nMergeUntil: # this token is not already merged with a previous token dTokenMerger = dToken if dToken["nMergeUntil"] > nMergeUntil: nMergeUntil = dToken["nMergeUntil"] del dToken["nMergeUntil"] elif "bToRemove" in dToken: if bDebug: print(" REMOVED:", dToken["sValue"]) |
︙ | ︙ | |||
977 978 979 980 981 982 983 | try: del self.dTokenPos[dToken["nStart"]] except: print(self) print(dToken) exit() if bDebug: | | | 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 | try: del self.dTokenPos[dToken["nStart"]] except: print(self) print(dToken) exit() if bDebug: print(" TEXT REWRITED:", self.sSentence) self.lToken.clear() self.lToken = lNewToken #### Analyse tokens |
︙ | ︙ |