Grammalecte  Check-in [8289f6c423]

Overview
Comment:[core] ge engine: function for testing token value
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: 8289f6c42329b2aa1fea46785604fa62d2c240aebd3682183267700a1a2acba6
User & Date: olr on 2018-06-30 00:19:58
Other Links: branch diff | manifest | tags
Context
2018-06-30
06:30
[graphspell][bug] tokenizer: syntax error check-in: ec92f6e873 user: olr tags: graphspell, rg
00:19
[core] ge engine: function for testing token value check-in: 8289f6c423 user: olr tags: core, rg
2018-06-29
22:46
[graphspell] tokenizer: add lMorph to <start> and <end> tokens check-in: 2dbf497b04 user: olr tags: graphspell, rg
Changes

Modified compile_rules_graph.py from [846da89f7e] to [8e700a8b33].

14
15
16
17
18
19
20
21

22
23
24
25
26
27
28
14
15
16
17
18
19
20

21
22
23
24
25
26
27
28







-
+







dFUNCTIONS = {}


def prepareFunction (s, bTokenValue=False):
    "convert simple rule syntax to a string of Python code"
    s = s.replace("__also__", "bCondMemo")
    s = s.replace("__else__", "not bCondMemo")
    s = re.sub(r"(morph|analyse|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(morph|analyse|value|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset], dTags', s)
    s = re.sub(r"(switchGender|has(?:Mas|Fem)Form)[(]\\(\d+)", '\\1(lToken[\\2+nTokenOffset]["sValue"]', s)
    s = re.sub(r"(morph|analyse)\(>1", 'g_\\1(lToken[nLastToken+1]', s)                     # next token
    s = re.sub(r"(morph|analyse)\(<1", 'g_\\1(lToken[nTokenOffset]', s)                     # previous token
    s = re.sub(r"[\\](\d+)\.is(upper|lower|title)\(\)", 'lToken[\\1+nTokenOffset]["sValue"].is\\2()', s)
    s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)

Modified gc_core/py/lang_core/gc_engine.py from [09d4d7dd58] to [4effed76c8].

969
970
971
972
973
974
975






976
977
978
979
980
981
982
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988







+
+
+
+
+
+







            print("  REWRITED:", self.sSentence)
        self.lToken.clear()
        self.lToken = lNewToken



#### Analyse tokens

def g_value (dToken, sValues, nLeft=None, nRight=None):
    "test if <dToken['sValue']> is in sValues (each value should be separated with |)"
    sValue = "|"+dToken["sValue"]+"|"  if nLeft is None  else "|"+dToken["sValue"][slice(nLeft, nRight)]+"|"
    return sValue in sValues


def g_morph (dToken, sPattern, sNegPattern="", nLeft=None, nRight=None, bMemorizeMorph=True):
    "analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies"
    if "lMorph" in dToken:
        lMorph = dToken["lMorph"]
    else:
        if nLeft is not None: