Overview
| Comment: | [build][core] function to calculate spaces between tokens |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | core | build | rg |
| Files: | files | file ages | folders |
| SHA3-256: |
74ea483d5984d93c43ed4d4493b4fc65 |
| User & Date: | olr on 2018-07-11 09:41:59 |
| Other Links: | branch diff | manifest | tags |
Context
|
2018-07-11
| ||
| 09:49 | [build][core] function to calculate spaces between tokens (update) check-in: 268f752bd3 user: olr tags: core, build, rg | |
| 09:41 | [build][core] function to calculate spaces between tokens check-in: 74ea483d59 user: olr tags: core, build, rg | |
| 09:27 | [fr] conversion: regex rules -> graph rules check-in: e9720b3b87 user: olr tags: fr, rg | |
Changes
Modified compile_rules_graph.py from [3490c8ecf9] to [6c5e1bc4a3].
| ︙ | ︙ | |||
17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
def prepareFunction (s, bTokenValue=False):
"convert simple rule syntax to a string of Python code"
s = s.replace("__also__", "bCondMemo")
s = s.replace("__else__", "not bCondMemo")
s = re.sub(r"(morph|analyse|value|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset], dTags', s)
s = re.sub(r"(switchGender|has(?:(?:Mas|Fem)Form)|Simil)[(]\\(\d+)", '\\1(lToken[\\2+nTokenOffset]["sValue"]', s)
s = re.sub(r"(morph|analyse|value)\(>1", 'g_\\1(lToken[nLastToken+1]', s) # next token
s = re.sub(r"(morph|analyse|value)\(<1", 'g_\\1(lToken[nTokenOffset]', s) # previous token
s = re.sub(r"[\\](\d+)\.is(upper|lower|title)\(\)", 'lToken[\\1+nTokenOffset]["sValue"].is\\2()', s)
s = re.sub(r"[\\](\d+)\.(startswith|endswith)\(", 'lToken[\\1+nTokenOffset]["sValue"].\\2(', s)
s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)
s = re.sub(r"\bbefore\(\s*", 'look(sSentence[:lToken[1+nTokenOffset]["nStart"]], ', s) # before(s)
| > | 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
def prepareFunction (s, bTokenValue=False):
"convert simple rule syntax to a string of Python code"
s = s.replace("__also__", "bCondMemo")
s = s.replace("__else__", "not bCondMemo")
s = re.sub(r"(morph|analyse|value|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset], dTags', s)
s = re.sub(r"space[(][\\](\d+)", 'g_space_between_tokens(lToken[\\1+nTokenOffset], lToken[\\1+nTokenOffset+1]', s)
s = re.sub(r"(switchGender|has(?:(?:Mas|Fem)Form)|Simil)[(]\\(\d+)", '\\1(lToken[\\2+nTokenOffset]["sValue"]', s)
s = re.sub(r"(morph|analyse|value)\(>1", 'g_\\1(lToken[nLastToken+1]', s) # next token
s = re.sub(r"(morph|analyse|value)\(<1", 'g_\\1(lToken[nTokenOffset]', s) # previous token
s = re.sub(r"[\\](\d+)\.is(upper|lower|title)\(\)", 'lToken[\\1+nTokenOffset]["sValue"].is\\2()', s)
s = re.sub(r"[\\](\d+)\.(startswith|endswith)\(", 'lToken[\\1+nTokenOffset]["sValue"].\\2(', s)
s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)
s = re.sub(r"\bbefore\(\s*", 'look(sSentence[:lToken[1+nTokenOffset]["nStart"]], ', s) # before(s)
|
| ︙ | ︙ |
Modified gc_core/py/lang_core/gc_engine.py from [8ce717d080] to [697fc96204].
| ︙ | ︙ | |||
1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 |
def g_tag_after (dToken, dTags, sTag):
if sTag not in dTags:
return False
if dToken["i"] < dTags[sTag][1]:
return True
return False
#### Disambiguator
def g_select (dToken, sPattern, lDefault=None):
"select morphologies for <dToken> according to <sPattern>, always return True"
lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"])
if not lMorph or len(lMorph) == 1:
| > > > > > > > > > | 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 |
def g_tag_after (dToken, dTags, sTag):
if sTag not in dTags:
return False
if dToken["i"] < dTags[sTag][1]:
return True
return False
def g_space_between_token (dToken1, dToken2, nMin, nMax=None):
nSpace = dToken2["nStart"] - dToken1["nEnd"]
if nSpace < nMin:
return False
if nMax is not None and nSpace > nMax:
return False
return True
#### Disambiguator
def g_select (dToken, sPattern, lDefault=None):
"select morphologies for <dToken> according to <sPattern>, always return True"
lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"])
if not lMorph or len(lMorph) == 1:
|
| ︙ | ︙ |