Overview
Comment: | [build][core] function to calculate spaces between tokens |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | core | build | rg |
Files: | files | file ages | folders |
SHA3-256: |
74ea483d5984d93c43ed4d4493b4fc65 |
User & Date: | olr on 2018-07-11 09:41:59 |
Other Links: | branch diff | manifest | tags |
Context
2018-07-11
| ||
09:49 | [build][core] function to calculate spaces between tokens (update) check-in: 268f752bd3 user: olr tags: core, build, rg | |
09:41 | [build][core] function to calculate spaces between tokens check-in: 74ea483d59 user: olr tags: core, build, rg | |
09:27 | [fr] conversion: regex rules -> graph rules check-in: e9720b3b87 user: olr tags: fr, rg | |
Changes
Modified compile_rules_graph.py from [3490c8ecf9] to [6c5e1bc4a3].
︙ | ︙ | |||
17 18 19 20 21 22 23 24 25 26 27 28 29 30 | def prepareFunction (s, bTokenValue=False): "convert simple rule syntax to a string of Python code" s = s.replace("__also__", "bCondMemo") s = s.replace("__else__", "not bCondMemo") s = re.sub(r"(morph|analyse|value|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s) s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s) s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset], dTags', s) s = re.sub(r"(switchGender|has(?:(?:Mas|Fem)Form)|Simil)[(]\\(\d+)", '\\1(lToken[\\2+nTokenOffset]["sValue"]', s) s = re.sub(r"(morph|analyse|value)\(>1", 'g_\\1(lToken[nLastToken+1]', s) # next token s = re.sub(r"(morph|analyse|value)\(<1", 'g_\\1(lToken[nTokenOffset]', s) # previous token s = re.sub(r"[\\](\d+)\.is(upper|lower|title)\(\)", 'lToken[\\1+nTokenOffset]["sValue"].is\\2()', s) s = re.sub(r"[\\](\d+)\.(startswith|endswith)\(", 'lToken[\\1+nTokenOffset]["sValue"].\\2(', s) s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s) s = re.sub(r"\bbefore\(\s*", 'look(sSentence[:lToken[1+nTokenOffset]["nStart"]], ', s) # before(s) | > | 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | def prepareFunction (s, bTokenValue=False): "convert simple rule syntax to a string of Python code" s = s.replace("__also__", "bCondMemo") s = s.replace("__else__", "not bCondMemo") s = re.sub(r"(morph|analyse|value|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s) s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s) s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset], dTags', s) s = re.sub(r"space[(][\\](\d+)", 'g_space_between_tokens(lToken[\\1+nTokenOffset], lToken[\\1+nTokenOffset+1]', s) s = re.sub(r"(switchGender|has(?:(?:Mas|Fem)Form)|Simil)[(]\\(\d+)", '\\1(lToken[\\2+nTokenOffset]["sValue"]', s) s = re.sub(r"(morph|analyse|value)\(>1", 'g_\\1(lToken[nLastToken+1]', s) # next token s = re.sub(r"(morph|analyse|value)\(<1", 'g_\\1(lToken[nTokenOffset]', s) # previous token s = re.sub(r"[\\](\d+)\.is(upper|lower|title)\(\)", 'lToken[\\1+nTokenOffset]["sValue"].is\\2()', s) s = re.sub(r"[\\](\d+)\.(startswith|endswith)\(", 'lToken[\\1+nTokenOffset]["sValue"].\\2(', s) s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s) s = re.sub(r"\bbefore\(\s*", 'look(sSentence[:lToken[1+nTokenOffset]["nStart"]], ', s) # before(s) |
︙ | ︙ |
Modified gc_core/py/lang_core/gc_engine.py from [8ce717d080] to [697fc96204].
︙ | ︙ | |||
1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 | def g_tag_after (dToken, dTags, sTag): if sTag not in dTags: return False if dToken["i"] < dTags[sTag][1]: return True return False #### Disambiguator def g_select (dToken, sPattern, lDefault=None): "select morphologies for <dToken> according to <sPattern>, always return True" lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) if not lMorph or len(lMorph) == 1: | > > > > > > > > > | 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 | def g_tag_after (dToken, dTags, sTag): if sTag not in dTags: return False if dToken["i"] < dTags[sTag][1]: return True return False def g_space_between_token (dToken1, dToken2, nMin, nMax=None): nSpace = dToken2["nStart"] - dToken1["nEnd"] if nSpace < nMin: return False if nMax is not None and nSpace > nMax: return False return True #### Disambiguator def g_select (dToken, sPattern, lDefault=None): "select morphologies for <dToken> according to <sPattern>, always return True" lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) if not lMorph or len(lMorph) == 1: |
︙ | ︙ |