Grammalecte  Check-in [74ea483d59]

Overview
Comment:[build][core] function to calculate spaces between tokens
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | build | rg
Files: files | file ages | folders
SHA3-256: 74ea483d5984d93c43ed4d4493b4fc656172605b4972c02111e3d28ed917da34
User & Date: olr on 2018-07-11 09:41:59
Other Links: branch diff | manifest | tags
Context
2018-07-11
09:49
[build][core] function to calculate spaces between tokens (update) check-in: 268f752bd3 user: olr tags: core, build, rg
09:41
[build][core] function to calculate spaces between tokens check-in: 74ea483d59 user: olr tags: core, build, rg
09:27
[fr] conversion: regex rules -> graph rules check-in: e9720b3b87 user: olr tags: fr, rg
Changes

Modified compile_rules_graph.py from [3490c8ecf9] to [6c5e1bc4a3].

17
18
19
20
21
22
23

24
25
26
27
28
29
30
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31







+







def prepareFunction (s, bTokenValue=False):
    "convert simple rule syntax to a string of Python code"
    s = s.replace("__also__", "bCondMemo")
    s = s.replace("__else__", "not bCondMemo")
    s = re.sub(r"(morph|analyse|value|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset], dTags', s)
    s = re.sub(r"space[(][\\](\d+)", 'g_space_between_tokens(lToken[\\1+nTokenOffset], lToken[\\1+nTokenOffset+1]', s)
    s = re.sub(r"(switchGender|has(?:(?:Mas|Fem)Form)|Simil)[(]\\(\d+)", '\\1(lToken[\\2+nTokenOffset]["sValue"]', s)
    s = re.sub(r"(morph|analyse|value)\(>1", 'g_\\1(lToken[nLastToken+1]', s)                       # next token
    s = re.sub(r"(morph|analyse|value)\(<1", 'g_\\1(lToken[nTokenOffset]', s)                       # previous token
    s = re.sub(r"[\\](\d+)\.is(upper|lower|title)\(\)", 'lToken[\\1+nTokenOffset]["sValue"].is\\2()', s)
    s = re.sub(r"[\\](\d+)\.(startswith|endswith)\(", 'lToken[\\1+nTokenOffset]["sValue"].\\2(', s)
    s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)
    s = re.sub(r"\bbefore\(\s*", 'look(sSentence[:lToken[1+nTokenOffset]["nStart"]], ', s)          # before(s)

Modified gc_core/py/lang_core/gc_engine.py from [8ce717d080] to [697fc96204].

1056
1057
1058
1059
1060
1061
1062









1063
1064
1065
1066
1067
1068
1069
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078







+
+
+
+
+
+
+
+
+







def g_tag_after (dToken, dTags, sTag):
    if sTag not in dTags:
        return False
    if dToken["i"] < dTags[sTag][1]:
        return True
    return False


def g_space_between_token (dToken1, dToken2, nMin, nMax=None):
    nSpace = dToken2["nStart"] - dToken1["nEnd"]
    if nSpace < nMin:
        return False
    if nMax is not None and nSpace > nMax:
        return False
    return True


#### Disambiguator

def g_select (dToken, sPattern, lDefault=None):
    "select morphologies for <dToken> according to <sPattern>, always return True"
    lMorph = dToken["lMorph"]  if "lMorph" in dToken  else _oSpellChecker.getMorph(dToken["sValue"])
    if not lMorph or len(lMorph) == 1: