Overview
Comment: | [core][build] nTokenOffset necessary for text processor functions |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | core | build | rg |
Files: | files | file ages | folders |
SHA3-256: |
3a5a4d302e52d67c4e2f163eb1e682b9 |
User & Date: | olr on 2018-06-27 07:43:53 |
Other Links: | branch diff | manifest | tags |
Context
2018-06-27
| ||
10:02 | [core] gc engine: use expand for text processor too check-in: 816624027a user: olr tags: core, rg | |
07:43 | [core][build] nTokenOffset necessary for text processor functions check-in: 3a5a4d302e user: olr tags: core, build, rg | |
07:42 | [fr] mots composés avec -là (utilisation de slice) check-in: 3449bb65ee user: olr tags: fr, rg | |
Changes
Modified compile_rules_graph.py from [19fb543170] to [4e3eba14a0].
︙ | ︙ | |||
356 357 358 359 360 361 362 | if sFuncName.startswith("_g_c_"): # condition sParams = "lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, dTags, sSentence, sSentence0" elif sFuncName.startswith("g_m_"): # message sParams = "lToken, nTokenOffset" elif sFuncName.startswith("_g_s_"): # suggestion sParams = "lToken, nTokenOffset" elif sFuncName.startswith("_g_p_"): # preprocessor | | | 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 | if sFuncName.startswith("_g_c_"): # condition sParams = "lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, dTags, sSentence, sSentence0" elif sFuncName.startswith("g_m_"): # message sParams = "lToken, nTokenOffset" elif sFuncName.startswith("_g_s_"): # suggestion sParams = "lToken, nTokenOffset" elif sFuncName.startswith("_g_p_"): # preprocessor sParams = "lToken, nTokenOffset" elif sFuncName.startswith("_g_d_"): # disambiguator sParams = "lToken, nTokenOffset" else: print("# Unknown function type in [" + sFuncName + "]") continue sPyCallables += "def {} ({}):\n".format(sFuncName, sParams) sPyCallables += " return " + sReturn + "\n" |
︙ | ︙ |
Modified gc_core/py/lang_core/gc_engine.py from [ca7f3318b7] to [f1fdc10ea4].
︙ | ︙ | |||
769 770 771 772 773 774 775 | dError[nErrorStart] = self.createError(sWhat, nTokenOffset, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, True, eAct[3], eAct[4], bShowRuleId, "notype", bContext) dPriority[nErrorStart] = eAct[2] if bDebug: print("ERROR:", sRuleId, dError[nErrorStart]) elif cActionType == "~": # text processor nEndToken = (nTokenOffset + eAct[1]) if eAct[1] else nLastToken | | | | 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 | dError[nErrorStart] = self.createError(sWhat, nTokenOffset, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, True, eAct[3], eAct[4], bShowRuleId, "notype", bContext) dPriority[nErrorStart] = eAct[2] if bDebug: print("ERROR:", sRuleId, dError[nErrorStart]) elif cActionType == "~": # text processor nEndToken = (nTokenOffset + eAct[1]) if eAct[1] else nLastToken self._tagAndPrepareTokenForRewriting(sWhat, nTokenOffset + eAct[0], nEndToken, nTokenOffset, bDebug) if bDebug: print("RW:", sRuleId) bChange = True elif cActionType == "=": # disambiguation globals()[sWhat](self.lToken, nTokenOffset) if bDebug: print("DA:", sRuleId) elif cActionType == ">": # we do nothing, this test is just a condition to apply all following actions if bDebug: print(">>>", sRuleId) pass elif cActionType == "/": # sentence tags nTokenTag = nTokenOffset + eAct[0] if sWhat not in self.dTags: self.dTags[sWhat] = (nTokenTag, nTokenTag) elif nTokenTag > self.dTags[sWhat][1]: self.dTags[sWhat] = (self.dTags[sWhat][0], nTokenTag) if bDebug: print("/", sRuleId) |
︙ | ︙ | |||
888 889 890 891 892 893 894 | def _expand (self, sMsg, nTokenOffset): #print("*", sMsg) for m in re.finditer(r"\\([0-9]+)", sMsg): sMsg = sMsg.replace(m.group(0), self.lToken[int(m.group(1))+nTokenOffset]["sValue"]) #print(">", sMsg) return sMsg | | | 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 | def _expand (self, sMsg, nTokenOffset): #print("*", sMsg) for m in re.finditer(r"\\([0-9]+)", sMsg): sMsg = sMsg.replace(m.group(0), self.lToken[int(m.group(1))+nTokenOffset]["sValue"]) #print(">", sMsg) return sMsg def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, nTokenOffset, bUppercase=True, bDebug=False): "text processor: rewrite tokens between <nTokenRewriteStart> and <nTokenRewriteEnd> position" if bDebug: print("REWRITING:", nTokenRewriteStart, nTokenRewriteEnd) if sWhat == "*": # purge text if nTokenRewriteEnd - nTokenRewriteStart == 0: self.lToken[nTokenRewriteStart]["bToRemove"] = True |
︙ | ︙ | |||
918 919 920 921 922 923 924 | if nTokenRewriteEnd - nTokenRewriteStart == 0: self.lToken[nTokenRewriteStart]["sNewValue"] = "_" else: for i in range(nTokenRewriteStart, nTokenRewriteEnd+1): self.lToken[i]["sNewValue"] = "_" else: if sWhat.startswith("="): | | > > | 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 | if nTokenRewriteEnd - nTokenRewriteStart == 0: self.lToken[nTokenRewriteStart]["sNewValue"] = "_" else: for i in range(nTokenRewriteStart, nTokenRewriteEnd+1): self.lToken[i]["sNewValue"] = "_" else: if sWhat.startswith("="): sWhat = globals()[sWhat[1:]](self.lToken, nTokenOffset) bUppercase = bUppercase and self.lToken[nTokenRewriteStart]["sValue"][0:1].isupper() if nTokenRewriteEnd - nTokenRewriteStart == 0: # one token sWhat = sWhat + " " * (len(self.lToken[nTokenRewriteStart]["sValue"])-len(sWhat)) if bUppercase: sWhat = sWhat[0:1].upper() + sWhat[1:] self.lToken[nTokenRewriteStart]["sNewValue"] = sWhat else: # several tokens lTokenValue = sWhat.split("|") if len(lTokenValue) != (nTokenRewriteEnd - nTokenRewriteStart + 1): print("Error. Text processor: number of replacements != number of tokens.") return for i, sValue in zip(range(nTokenRewriteStart, nTokenRewriteEnd+1), lTokenValue): if bUppercase: sValue = sValue[0:1].upper() + sValue[1:] |
︙ | ︙ |