18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
|
s = re.sub(r"isRealStart *\(\)", 'before(["<START>"])', s)
s = re.sub(r"isStart0 *\(\)", 'before0(["<START>", ","])', s)
s = re.sub(r"isRealStart0 *\(\)", 'before0(["<START>"])', s)
s = re.sub(r"isEnd *\(\)", 'after(["<END>", ","])', s)
s = re.sub(r"isRealEnd *\(\)", 'after(["<END>"])', s)
s = re.sub(r"isEnd0 *\(\)", 'after0(["<END>", ","])', s)
s = re.sub(r"isRealEnd0 *\(\)", 'after0(["<END>"])', s)
s = re.sub(r"(select|exclude)[(][\\](\d+)", '\\1(lToken[\\2]', s)
s = re.sub(r"define[(][\\](\d+)", 'define(lToken[\\1]', s)
s = re.sub(r"(morph|morphex|displayInfo)[(]\\(\d+)", '\\1(lToken[\\2]', s)
s = re.sub(r"token\(\s*(\d)", 'nextToken(\\1', s) # token(n)
s = re.sub(r"token\(\s*-(\d)", 'prevToken(\\1', s) # token(-n)
s = re.sub(r"before\(\s*", 'look(s[:m.start()], ', s) # before(s)
s = re.sub(r"after\(\s*", 'look(s[m.end():], ', s) # after(s)
s = re.sub(r"textarea\(\s*", 'look(s, ', s) # textarea(s)
s = re.sub(r"before_chk1\(\s*", 'look_chk1(dDA, s[:m.start()], 0, ', s) # before_chk1(s)
s = re.sub(r"after_chk1\(\s*", 'look_chk1(dDA, s[m.end():], m.end(), ', s) # after_chk1(s)
s = re.sub(r"textarea_chk1\(\s*", 'look_chk1(dDA, s, 0, ', s) # textarea_chk1(s)
s = re.sub(r"isEndOfNG\(\s*\)", 'isEndOfNG(dDA, s[m.end():], m.end())', s) # isEndOfNG(s)
s = re.sub(r"isNextNotCOD\(\s*\)", 'isNextNotCOD(dDA, s[m.end():], m.end())', s) # isNextNotCOD(s)
s = re.sub(r"isNextVerb\(\s*\)", 'isNextVerb(dDA, s[m.end():], m.end())', s) # isNextVerb(s)
s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)
s = re.sub(r"[\\](\d+)", 'lToken[\\1]', s)
return s
def genTokenLines (sTokenLine):
"tokenize a string and return a list of lines of tokens"
|
|
<
|
|
|
|
|
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
|
s = re.sub(r"isRealStart *\(\)", 'before(["<START>"])', s)
s = re.sub(r"isStart0 *\(\)", 'before0(["<START>", ","])', s)
s = re.sub(r"isRealStart0 *\(\)", 'before0(["<START>"])', s)
s = re.sub(r"isEnd *\(\)", 'after(["<END>", ","])', s)
s = re.sub(r"isRealEnd *\(\)", 'after(["<END>"])', s)
s = re.sub(r"isEnd0 *\(\)", 'after0(["<END>", ","])', s)
s = re.sub(r"isRealEnd0 *\(\)", 'after0(["<END>"])', s)
s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
s = re.sub(r"(morph|morphex|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
s = re.sub(r"token\(\s*(\d)", 'nextToken(\\1', s) # token(n)
s = re.sub(r"token\(\s*-(\d)", 'prevToken(\\1', s) # token(-n)
s = re.sub(r"before\(\s*", 'look(s[:m.start()], ', s) # before(s)
s = re.sub(r"after\(\s*", 'look(s[m.end():], ', s) # after(s)
s = re.sub(r"textarea\(\s*", 'look(s, ', s) # textarea(s)
s = re.sub(r"before_chk1\(\s*", 'look_chk1(dDA, s[:m.start()], 0, ', s) # before_chk1(s)
s = re.sub(r"after_chk1\(\s*", 'look_chk1(dDA, s[m.end():], m.end(), ', s) # after_chk1(s)
s = re.sub(r"textarea_chk1\(\s*", 'look_chk1(dDA, s, 0, ', s) # textarea_chk1(s)
#s = re.sub(r"isEndOfNG\(\s*\)", 'isEndOfNG(dDA, s[m.end():], m.end())', s) # isEndOfNG(s)
#s = re.sub(r"isNextNotCOD\(\s*\)", 'isNextNotCOD(dDA, s[m.end():], m.end())', s) # isNextNotCOD(s)
#s = re.sub(r"isNextVerb\(\s*\)", 'isNextVerb(dDA, s[m.end():], m.end())', s) # isNextVerb(s)
s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)
s = re.sub(r"[\\](\d+)", 'lToken[\\1]', s)
return s
def genTokenLines (sTokenLine):
"tokenize a string and return a list of lines of tokens"
|