Grammalecte  Check-in [4134a01a49]

Overview
Comment:[core] darg: merge morph and morphex functions
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: 4134a01a49e64e156c546934f6174aa95416f4a9c2fb64aa5a8d47b914566355
User & Date: olr on 2018-06-06 09:30:40
Other Links: branch diff | manifest | tags
Context
2018-06-06
09:54
[graphspell] end of lemma is now a slash instead of a space check-in: c5c926760b user: olr tags: graphspell, rg
09:30
[core] darg: merge morph and morphex functions check-in: 4134a01a49 user: olr tags: core, rg
07:47
[core] darg: anti-patterns for morphologies check-in: 47ae72e7f9 user: olr tags: core, rg
Changes

Modified compile_rules_graph.py from [ca6fc181e8] to [184a3e89b8].

19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
    s = re.sub(r"isStart0 *\(\)", 'before0(["<START>", ","])', s)
    s = re.sub(r"isRealStart0 *\(\)", 'before0(["<START>"])', s)
    s = re.sub(r"isEnd *\(\)", 'after(["<END>", ","])', s)
    s = re.sub(r"isRealEnd *\(\)", 'after(["<END>"])', s)
    s = re.sub(r"isEnd0 *\(\)", 'after0(["<END>", ","])', s)
    s = re.sub(r"isRealEnd0 *\(\)", 'after0(["<END>"])', s)
    s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(morph|morphex|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"token\(\s*(\d)", 'nextToken(\\1', s)                                       # token(n)
    s = re.sub(r"token\(\s*-(\d)", 'prevToken(\\1', s)                                      # token(-n)
    s = re.sub(r"before\(\s*", 'look(s[:m.start()], ', s)                                   # before(s)
    s = re.sub(r"after\(\s*", 'look(s[m.end():], ', s)                                      # after(s)
    s = re.sub(r"textarea\(\s*", 'look(s, ', s)                                             # textarea(s)
    s = re.sub(r"before_chk1\(\s*", 'look_chk1(dDA, s[:m.start()], 0, ', s)                 # before_chk1(s)
    s = re.sub(r"after_chk1\(\s*", 'look_chk1(dDA, s[m.end():], m.end(), ', s)              # after_chk1(s)







|







19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
    s = re.sub(r"isStart0 *\(\)", 'before0(["<START>", ","])', s)
    s = re.sub(r"isRealStart0 *\(\)", 'before0(["<START>"])', s)
    s = re.sub(r"isEnd *\(\)", 'after(["<END>", ","])', s)
    s = re.sub(r"isRealEnd *\(\)", 'after(["<END>"])', s)
    s = re.sub(r"isEnd0 *\(\)", 'after0(["<END>", ","])', s)
    s = re.sub(r"isRealEnd0 *\(\)", 'after0(["<END>"])', s)
    s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(morph|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"token\(\s*(\d)", 'nextToken(\\1', s)                                       # token(n)
    s = re.sub(r"token\(\s*-(\d)", 'prevToken(\\1', s)                                      # token(-n)
    s = re.sub(r"before\(\s*", 'look(s[:m.start()], ', s)                                   # before(s)
    s = re.sub(r"after\(\s*", 'look(s[m.end():], ', s)                                      # after(s)
    s = re.sub(r"textarea\(\s*", 'look(s, ', s)                                             # textarea(s)
    s = re.sub(r"before_chk1\(\s*", 'look_chk1(dDA, s[:m.start()], 0, ', s)                 # before_chk1(s)
    s = re.sub(r"after_chk1\(\s*", 'look_chk1(dDA, s[m.end():], m.end(), ', s)              # after_chk1(s)

Modified gc_core/py/lang_core/gc_engine.py from [5975c86814] to [f0f3202267].

815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843






844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867





868
869
870
871
872
873
874
875
876
877
            print("# Error. Jump failed: ", sWhat)
            traceback.print_exc()
            return


#### Analyse tokens

def g_morph (dToken, sPattern, bStrict=True):
    "analyse a token, return True if <sPattern> in morphologies"
    if "lMorph" in dToken:
        lMorph = dToken["lMorph"]
    else:
        lMorph = _oSpellChecker.getMorph(dToken["sValue"])
        if not lMorph:
            return False
    zPattern = re.compile(sPattern)
    if bStrict:
        return all(zPattern.search(sMorph)  for sMorph in lMorph)
    return any(zPattern.search(sMorph)  for sMorph in lMorph)

def g_morphex (dToken, sPattern, sNegPattern):
    "analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies"
    if "lMorph" in dToken:
        lMorph = dToken["lMorph"]
    else:
        lMorph = _oSpellChecker.getMorph(dToken["sValue"])
        if not lMorph:
            return False
    # check negative condition






    zNegPattern = re.compile(sNegPattern)
    if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
        return False
    # search sPattern
    zPattern = re.compile(sPattern)
    return any(zPattern.search(sMorph)  for sMorph in lMorph)

def g_analyse (dToken, sPattern, bStrict=True):
    "analyse a token, return True if <sPattern> in morphologies (disambiguation off)"
    lMorph = _oSpellChecker.getMorph(dToken["sValue"])
    if not lMorph:
        return False
    zPattern = re.compile(sPattern)
    if bStrict:
        return all(zPattern.search(sMorph)  for sMorph in lMorph)
    return any(zPattern.search(sMorph)  for sMorph in lMorph)


def g_analysex (dToken, sPattern, sNegPattern):
    "analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies (disambiguation off)"
    lMorph = _oSpellChecker.getMorph(dToken["sValue"])
    if not lMorph:
        return False
    # check negative condition





    zNegPattern = re.compile(sNegPattern)
    if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
        return False
    # search sPattern
    zPattern = re.compile(sPattern)
    return any(zPattern.search(sMorph)  for sMorph in lMorph)



#### Disambiguator







|
<
<
<
<
<
<
<
<
<
<
<
<
<








>
>
>
>
>
>
|
|
|




<
<
<
<
<
<
<
<
<

<
|





>
>
>
>
>
|
|
|







815
816
817
818
819
820
821
822













823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843









844

845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
            print("# Error. Jump failed: ", sWhat)
            traceback.print_exc()
            return


#### Analyse tokens

def g_morph (dToken, sPattern, sNegPattern=""):













    "analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies"
    if "lMorph" in dToken:
        lMorph = dToken["lMorph"]
    else:
        lMorph = _oSpellChecker.getMorph(dToken["sValue"])
        if not lMorph:
            return False
    # check negative condition
    if sNegPattern:
        if sNegPattern == "*":
            # all morph must match sPattern
            zPattern = re.compile(sPattern)
            return all(zPattern.search(sMorph)  for sMorph in lMorph)
        else:
            zNegPattern = re.compile(sNegPattern)
            if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
                return False
    # search sPattern
    zPattern = re.compile(sPattern)
    return any(zPattern.search(sMorph)  for sMorph in lMorph)












def g_analyse (dToken, sPattern, sNegPattern=""):
    "analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies (disambiguation off)"
    lMorph = _oSpellChecker.getMorph(dToken["sValue"])
    if not lMorph:
        return False
    # check negative condition
    if sNegPattern:
        if sNegPattern == "*":
            zPattern = re.compile(sPattern)
            return all(zPattern.search(sMorph)  for sMorph in lMorph)
        else:
            zNegPattern = re.compile(sNegPattern)
            if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
                return False
    # search sPattern
    zPattern = re.compile(sPattern)
    return any(zPattern.search(sMorph)  for sMorph in lMorph)



#### Disambiguator

Modified gc_lang/fr/rules_graph.grx from [1dc1051949] to [02356176f2].

67
68
69
70
71
72
73
74
75
76
        <<- -2>> plaisir                                                                            # Faire plaisir : dans cette locution, “plaisir” doit être au singulier.

TEST: Ça me fait {{plaisirs}}.


__test__
    je  ~préf[éè]r  [que|qu’]  @(?::Os|:M)¬:X  @:I
        <<- morph(\1, ":V", False) and morphex(\4, ":Os|:M", ":X") -5>> SUBJONCTIF                  # SUBJONCTIF.

TEST: je préférerais qu’Isabelle {{est}} partie.







|


67
68
69
70
71
72
73
74
75
76
        <<- -2>> plaisir                                                                            # Faire plaisir : dans cette locution, “plaisir” doit être au singulier.

TEST: Ça me fait {{plaisirs}}.


__test__
    je  ~préf[éè]r  [que|qu’]  @(?::Os|:M)¬:X  @:I
        <<- morph(\1, ":V") and morph(\4, ":Os|:M", ":X") -5>> SUBJONCTIF                  # SUBJONCTIF.

TEST: je préférerais qu’Isabelle {{est}} partie.