Grammalecte  Check-in [268f752bd3]

Overview
Comment:[build][core] function to calculate spaces between tokens (update)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | build | rg
Files: files | file ages | folders
SHA3-256: 268f752bd32813ca26a804a6382d2ddc93ee0be16a54b2ff7bbd7d3ee25295d0
User & Date: olr on 2018-07-11 09:49:23
Other Links: branch diff | manifest | tags
Context
2018-07-11
17:00
[fr] conversion: regex rules -> graph rules check-in: b75a76dc10 user: olr tags: fr, rg
09:49
[build][core] function to calculate spaces between tokens (update) check-in: 268f752bd3 user: olr tags: core, build, rg
09:41
[build][core] function to calculate spaces between tokens check-in: 74ea483d59 user: olr tags: core, build, rg
Changes

Modified compile_rules_graph.py from [6c5e1bc4a3] to [75ffc0f9db].

17
18
19
20
21
22
23
24

25
26
27
28
29
30
31
17
18
19
20
21
22
23

24
25
26
27
28
29
30
31







-
+







def prepareFunction (s, bTokenValue=False):
    "convert simple rule syntax to a string of Python code"
    s = s.replace("__also__", "bCondMemo")
    s = s.replace("__else__", "not bCondMemo")
    s = re.sub(r"(morph|analyse|value|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset], dTags', s)
    s = re.sub(r"space[(][\\](\d+)", 'g_space_between_tokens(lToken[\\1+nTokenOffset], lToken[\\1+nTokenOffset+1]', s)
    s = re.sub(r"space_after[(][\\](\d+)", 'g_space_between_tokens(lToken[\\1+nTokenOffset], lToken[\\1+nTokenOffset+1]', s)
    s = re.sub(r"(switchGender|has(?:(?:Mas|Fem)Form)|Simil)[(]\\(\d+)", '\\1(lToken[\\2+nTokenOffset]["sValue"]', s)
    s = re.sub(r"(morph|analyse|value)\(>1", 'g_\\1(lToken[nLastToken+1]', s)                       # next token
    s = re.sub(r"(morph|analyse|value)\(<1", 'g_\\1(lToken[nTokenOffset]', s)                       # previous token
    s = re.sub(r"[\\](\d+)\.is(upper|lower|title)\(\)", 'lToken[\\1+nTokenOffset]["sValue"].is\\2()', s)
    s = re.sub(r"[\\](\d+)\.(startswith|endswith)\(", 'lToken[\\1+nTokenOffset]["sValue"].\\2(', s)
    s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)
    s = re.sub(r"\bbefore\(\s*", 'look(sSentence[:lToken[1+nTokenOffset]["nStart"]], ', s)          # before(s)

Modified gc_core/py/lang_core/gc_engine.py from [697fc96204] to [1a91db6e13].

1057
1058
1059
1060
1061
1062
1063
1064

1065
1066
1067
1068
1069
1070
1071
1057
1058
1059
1060
1061
1062
1063

1064
1065
1066
1067
1068
1069
1070
1071







-
+







    if sTag not in dTags:
        return False
    if dToken["i"] < dTags[sTag][1]:
        return True
    return False


def g_space_between_token (dToken1, dToken2, nMin, nMax=None):
def g_space_between_tokens (dToken1, dToken2, nMin, nMax=None):
    nSpace = dToken2["nStart"] - dToken1["nEnd"]
    if nSpace < nMin:
        return False
    if nMax is not None and nSpace > nMax:
        return False
    return True

Modified gc_lang/fr/rules.grx from [30d0ce5f1b] to [fc561263e1].

2632
2633
2634
2635
2636
2637
2638
2639

2640
2641
2642
2643
2644
2645
2646
2632
2633
2634
2635
2636
2637
2638

2639
2640
2641
2642
2643
2644
2645
2646







-
+







@@@@


!!!! Apostrophe manquante (2)                                                                     !!

__typo_apostrophe_manquante_audace__
    <start>  [L|D|S|N|C|J|M|T|Ç]  ~^[aeéiouhAEÉIOUHyîèêôûYÎÈÊÔÛ]
        <<- option("mapos") -2>> =\1+"’"                                                            # Il manque peut-être une apostrophe.
        <<- option("mapos") and space_after(\2, 1, 1) -2>> =\2+"’"                                  # Il manque peut-être une apostrophe.

TEST: __mapos__ {{L }}opinion des gens, elle s’en moquait.



!!!! A / À: accentuation la préposition en début de phrase                                        !!