Grammalecte  Check-in [f7e8fadbf4]

Overview
Comment:[build][core] analyse merged token values
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | build | rg
Files: files | file ages | folders
SHA3-256: f7e8fadbf4c8e3abc73314f8929a68f6dd3a3ceb65a3f287400574a215e4074f
User & Date: olr on 2018-07-14 10:05:05
Other Links: branch diff | manifest | tags
Context
2018-07-14
12:04
[build] use default option priority for graph rules check-in: 58285f596f user: olr tags: build, rg
10:05
[build][core] analyse merged token values check-in: f7e8fadbf4 user: olr tags: core, build, rg
2018-07-13
18:07
[fr] toutes choses égales par ailleurs check-in: 6f56e6203f user: olr tags: fr, rg
Changes

Modified compile_rules_graph.py from [2c76153ce9] to [3fa8a2a061].

18
19
20
21
22
23
24

25
26
27
28
29
30
31
    "convert simple rule syntax to a string of Python code"
    s = s.replace("__also__", "bCondMemo")
    s = s.replace("__else__", "not bCondMemo")
    s = re.sub(r"(morph|analyse|value|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset], dTags', s)
    s = re.sub(r"space_after[(][\\](\d+)", 'g_space_between_tokens(lToken[\\1+nTokenOffset], lToken[\\1+nTokenOffset+1]', s)

    s = re.sub(r"(switchGender|has(?:(?:Mas|Fem)Form)|Simil)[(]\\(\d+)", '\\1(lToken[\\2+nTokenOffset]["sValue"]', s)
    s = re.sub(r"(morph|analyse|value)\(>1", 'g_\\1(lToken[nLastToken+1]', s)                       # next token
    s = re.sub(r"(morph|analyse|value)\(<1", 'g_\\1(lToken[nTokenOffset]', s)                       # previous token
    s = re.sub(r"[\\](\d+)\.is(upper|lower|title)\(\)", 'lToken[\\1+nTokenOffset]["sValue"].is\\2()', s)
    s = re.sub(r"[\\](\d+)\.(startswith|endswith)\(", 'lToken[\\1+nTokenOffset]["sValue"].\\2(', s)
    s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)
    s = re.sub(r"\bbefore\(\s*", 'look(sSentence[:lToken[1+nTokenOffset]["nStart"]], ', s)          # before(s)







>







18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
    "convert simple rule syntax to a string of Python code"
    s = s.replace("__also__", "bCondMemo")
    s = s.replace("__else__", "not bCondMemo")
    s = re.sub(r"(morph|analyse|value|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset], dTags', s)
    s = re.sub(r"space_after[(][\\](\d+)", 'g_space_between_tokens(lToken[\\1+nTokenOffset], lToken[\\1+nTokenOffset+1]', s)
    s = re.sub(r"analyse_with_next[(][\\](\d+)", 'g_merged_analyse(lToken[\\1+nTokenOffset], lToken[\\1+nTokenOffset+1]', s)
    s = re.sub(r"(switchGender|has(?:(?:Mas|Fem)Form)|Simil)[(]\\(\d+)", '\\1(lToken[\\2+nTokenOffset]["sValue"]', s)
    s = re.sub(r"(morph|analyse|value)\(>1", 'g_\\1(lToken[nLastToken+1]', s)                       # next token
    s = re.sub(r"(morph|analyse|value)\(<1", 'g_\\1(lToken[nTokenOffset]', s)                       # previous token
    s = re.sub(r"[\\](\d+)\.is(upper|lower|title)\(\)", 'lToken[\\1+nTokenOffset]["sValue"].is\\2()', s)
    s = re.sub(r"[\\](\d+)\.(startswith|endswith)\(", 'lToken[\\1+nTokenOffset]["sValue"].\\2(', s)
    s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)
    s = re.sub(r"\bbefore\(\s*", 'look(sSentence[:lToken[1+nTokenOffset]["nStart"]], ', s)          # before(s)

Modified gc_core/py/lang_core/gc_engine.py from [b5abe0de9e] to [fb5a2dbac5].

1040
1041
1042
1043
1044
1045
1046




























1047
1048
1049
1050
1051
1052
1053
            zNegPattern = re.compile(sNegPattern)
            if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
                return False
    # search sPattern
    zPattern = re.compile(sPattern)
    return any(zPattern.search(sMorph)  for sMorph in lMorph)






























def g_tag_before (dToken, dTags, sTag):
    if sTag not in dTags:
        return False
    if dToken["i"] > dTags[sTag][0]:
        return True
    return False







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
            zNegPattern = re.compile(sNegPattern)
            if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
                return False
    # search sPattern
    zPattern = re.compile(sPattern)
    return any(zPattern.search(sMorph)  for sMorph in lMorph)


def g_merged_analyse (dToken1, dToken2, cMerger, sPattern, sNegPattern="", bSetMorph=True):
    "merge two token values, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies (disambiguation off)"
    lMorph = _oSpellChecker.getMorph(dToken1["sValue"] + cMerger + dToken2["sValue"])
    if not lMorph:
        return False
    # check negative condition
    if sNegPattern:
        if sNegPattern == "*":
            # all morph must match sPattern
            if not lMorph:
                return False
            zPattern = re.compile(sPattern)
            bResult = all(zPattern.search(sMorph)  for sMorph in lMorph)
            if bResult and bSetMorph:
                dToken1["lMorph"] = lMorph
            return bResult
        else:
            zNegPattern = re.compile(sNegPattern)
            if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
                return False
    # search sPattern
    zPattern = re.compile(sPattern)
    bResult = any(zPattern.search(sMorph)  for sMorph in lMorph)
    if bResult and bSetMorph:
        dToken1["lMorph"] = lMorph
    return bResult


def g_tag_before (dToken, dTags, sTag):
    if sTag not in dTags:
        return False
    if dToken["i"] > dTags[sTag][0]:
        return True
    return False

Modified gc_lang/fr/rules.grx from [0215d9178b] to [7fb40ebc09].

2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322










2323
2324
2325
2326
2327
2328
2329
    <<- spell(\1+"-"+\2) and analyse(\1+"-"+\2, ":", False) and morph(word(-1), ":D", False, not bool(re.search("(?i)^(?:s(?:ans|ous)|non)$", \1)))
    ->> \1-\2                                                                                       # Il manque probablement un trait d’union.

TEST: il a pris une balle dans l’{{arrière train}}.
TEST: Ce {{sans gêne}} mérite une bonne leçon


__[i]/tu(tu_mots_composés)__
    (attrape|garde|porte|brise|cache|casse|chauffe|contre|coupe|cure|croque|entre|essuie|lance|lave|lève|marque|pare|passe|perce|pèse|porte|poste|pousse|presse|protège|ramasse|serre|taille|tire|tourne|traîne|vice|vide) ({w1})  @@0,$
    <<- spell(\1+"-"+\2) and analyse(\1+"-"+\2, ":N", False) and morph(word(-1), ":(?:D|V0e)", False, True)
        and not (morph(\1, ":G", False) and morph(\2, ":[GYB]", False))
    ->> \1-\2                                                                                       # Il manque probablement un trait d’union.

TEST: ce sont des {{lève tard}}.



@@@@
@@@@
@@@@
@@@@
@@@@GRAPH: graphe1                                                                                 _
@@@@
@@@@
@@@@
@@@@











__tu_substantifs__
    [à|a] tout [va|vas]
        <<- /tu/ ->> à tout-va                                  # Il manque un trait d’union.

    [à|a] [la|là] [va|vas] vite
        <<- /tu/ ->> à la va-vite                               # Il manque un trait d’union.







<
<
<
<
<
<
<












>
>
>
>
>
>
>
>
>
>







2297
2298
2299
2300
2301
2302
2303







2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
    <<- spell(\1+"-"+\2) and analyse(\1+"-"+\2, ":", False) and morph(word(-1), ":D", False, not bool(re.search("(?i)^(?:s(?:ans|ous)|non)$", \1)))
    ->> \1-\2                                                                                       # Il manque probablement un trait d’union.

TEST: il a pris une balle dans l’{{arrière train}}.
TEST: Ce {{sans gêne}} mérite une bonne leçon












@@@@
@@@@
@@@@
@@@@
@@@@GRAPH: graphe1                                                                                 _
@@@@
@@@@
@@@@
@@@@

__tu_mots_composés_verbe_nom__
    [attrape|garde|porte|brise|cache|casse|chauffe|contre|coupe|cure|croque|entre|essuie|lance|lave|lève|marque|pare|passe|perce|pèse|porte|poste|pousse|presse|protège|ramasse|serre|taille|tire|tourne|traîne|traine|vide]  *WORD
    <<- /tu/ not (morph(\1, ":G") and morph(\2, ":[GYB]")) and morph(<1, ":(?:D|V0e)|<start>") and analyse_with_next(\1, "-", ":N")
    ->> \1-\2                                                                                       # Il manque probablement un trait d’union.

TEST: ce sont des {{lève tard}}.
TEST: un {{brise glace}}
TEST: entre deux chaises…


__tu_substantifs__
    [à|a] tout [va|vas]
        <<- /tu/ ->> à tout-va                                  # Il manque un trait d’union.

    [à|a] [la|là] [va|vas] vite
        <<- /tu/ ->> à la va-vite                               # Il manque un trait d’union.