Grammalecte  Check-in [c41af43fe1]

Overview
Comment:[build][core] desambiguisation: define tokens according to a slice of it
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | build | rg
Files: files | file ages | folders
SHA3-256: c41af43fe1007d6cce1dcfc0eecee5b3d8b74e6e83701a8e6b68b2252840f0ad
User & Date: olr on 2018-07-14 12:45:13
Other Links: branch diff | manifest | tags
Context
2018-07-14
12:48
[fr] fix test check-in: 7d1fce952d user: olr tags: fr, rg
12:45
[build][core] desambiguisation: define tokens according to a slice of it check-in: c41af43fe1 user: olr tags: core, build, rg
12:44
[fr] conversion: regex rules -> graph rules check-in: d4deec6280 user: olr tags: fr, rg
Changes

Modified compile_rules_graph.py from [3db92c7cf8] to [5195133d9c].

15
16
17
18
19
20
21
22

23
24
25
26
27
28
29
15
16
17
18
19
20
21

22
23
24
25
26
27
28
29







-
+









def prepareFunction (s, bTokenValue=False):
    "convert simple rule syntax to a string of Python code"
    s = s.replace("__also__", "bCondMemo")
    s = s.replace("__else__", "not bCondMemo")
    s = re.sub(r"(morph|analyse|value|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(select|exclude|define|define_from)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset], dTags', s)
    s = re.sub(r"space_after[(][\\](\d+)", 'g_space_between_tokens(lToken[\\1+nTokenOffset], lToken[\\1+nTokenOffset+1]', s)
    s = re.sub(r"analyse_with_next[(][\\](\d+)", 'g_merged_analyse(lToken[\\1+nTokenOffset], lToken[\\1+nTokenOffset+1]', s)
    s = re.sub(r"(switchGender|has(?:(?:Mas|Fem)Form)|Simil)[(]\\(\d+)", '\\1(lToken[\\2+nTokenOffset]["sValue"]', s)
    s = re.sub(r"(morph|analyse|value)\(>1", 'g_\\1(lToken[nLastToken+1]', s)                       # next token
    s = re.sub(r"(morph|analyse|value)\(<1", 'g_\\1(lToken[nTokenOffset]', s)                       # previous token
    s = re.sub(r"[\\](\d+)\.is(upper|lower|title)\(\)", 'lToken[\\1+nTokenOffset]["sValue"].is\\2()', s)

Modified gc_core/py/lang_core/gc_engine.py from [fb5a2dbac5] to [e5bcf3ef2a].

1138
1139
1140
1141
1142
1143
1144




1145
1146
1147
1148
1149
1150
1151
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155







+
+
+
+








def g_define (dToken, lMorph):
    "set morphologies of <dToken>, always return True"
    dToken["lMorph"] = lMorph
    #print("DA:", dToken["sValue"], lMorph)
    return True


def g_define_from (dToken, nLeft, nRight=None):
    dToken["lMorph"] = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)])
    return True


#### GRAMMAR CHECKER PLUGINS

${plugins}