Grammalecte  Check-in [8289f6c423]

Overview
Comment:[core] ge engine: function for testing token value
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: 8289f6c42329b2aa1fea46785604fa62d2c240aebd3682183267700a1a2acba6
User & Date: olr on 2018-06-30 00:19:58
Other Links: branch diff | manifest | tags
Context
2018-06-30
06:30
[graphspell][bug] tokenizer: syntax error check-in: ec92f6e873 user: olr tags: graphspell, rg
00:19
[core] ge engine: function for testing token value check-in: 8289f6c423 user: olr tags: core, rg
2018-06-29
22:46
[graphspell] tokenizer: add lMorph to <start> and <end> tokens check-in: 2dbf497b04 user: olr tags: graphspell, rg
Changes

Modified compile_rules_graph.py from [846da89f7e] to [8e700a8b33].

14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
dFUNCTIONS = {}


def prepareFunction (s, bTokenValue=False):
    "convert simple rule syntax to a string of Python code"
    s = s.replace("__also__", "bCondMemo")
    s = s.replace("__else__", "not bCondMemo")
    s = re.sub(r"(morph|analyse|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset], dTags', s)
    s = re.sub(r"(switchGender|has(?:Mas|Fem)Form)[(]\\(\d+)", '\\1(lToken[\\2+nTokenOffset]["sValue"]', s)
    s = re.sub(r"(morph|analyse)\(>1", 'g_\\1(lToken[nLastToken+1]', s)                     # next token
    s = re.sub(r"(morph|analyse)\(<1", 'g_\\1(lToken[nTokenOffset]', s)                     # previous token
    s = re.sub(r"[\\](\d+)\.is(upper|lower|title)\(\)", 'lToken[\\1+nTokenOffset]["sValue"].is\\2()', s)
    s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)







|







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
dFUNCTIONS = {}


def prepareFunction (s, bTokenValue=False):
    "convert simple rule syntax to a string of Python code"
    s = s.replace("__also__", "bCondMemo")
    s = s.replace("__else__", "not bCondMemo")
    s = re.sub(r"(morph|analyse|value|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset], dTags', s)
    s = re.sub(r"(switchGender|has(?:Mas|Fem)Form)[(]\\(\d+)", '\\1(lToken[\\2+nTokenOffset]["sValue"]', s)
    s = re.sub(r"(morph|analyse)\(>1", 'g_\\1(lToken[nLastToken+1]', s)                     # next token
    s = re.sub(r"(morph|analyse)\(<1", 'g_\\1(lToken[nTokenOffset]', s)                     # previous token
    s = re.sub(r"[\\](\d+)\.is(upper|lower|title)\(\)", 'lToken[\\1+nTokenOffset]["sValue"].is\\2()', s)
    s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)

Modified gc_core/py/lang_core/gc_engine.py from [09d4d7dd58] to [4effed76c8].

969
970
971
972
973
974
975






976
977
978
979
980
981
982
            print("  REWRITED:", self.sSentence)
        self.lToken.clear()
        self.lToken = lNewToken



#### Analyse tokens







def g_morph (dToken, sPattern, sNegPattern="", nLeft=None, nRight=None, bMemorizeMorph=True):
    "analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies"
    if "lMorph" in dToken:
        lMorph = dToken["lMorph"]
    else:
        if nLeft is not None:







>
>
>
>
>
>







969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
            print("  REWRITED:", self.sSentence)
        self.lToken.clear()
        self.lToken = lNewToken



#### Analyse tokens

def g_value (dToken, sValues, nLeft=None, nRight=None):
    "test if <dToken['sValue']> is in sValues (each value should be separated with |)"
    sValue = "|"+dToken["sValue"]+"|"  if nLeft is None  else "|"+dToken["sValue"][slice(nLeft, nRight)]+"|"
    return sValue in sValues


def g_morph (dToken, sPattern, sNegPattern="", nLeft=None, nRight=None, bMemorizeMorph=True):
    "analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies"
    if "lMorph" in dToken:
        lMorph = dToken["lMorph"]
    else:
        if nLeft is not None: