1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
|
#!python3
# -*- coding: UTF-8 -*-
dSimilarChars = {
"a": "aàâáä",
"à": "aàâáä",
"â": "aàâáä",
"á": "aàâáä",
"ä": "aàâáä",
"c": "cç",
"ç": "cç",
"e": "eéêèë",
"é": "eéêèë",
"ê": "eéêèë",
"è": "eéêèë",
"ë": "eéêèë",
"i": "iîïíì",
"î": "iîïíì",
"ï": "iîïíì",
"í": "iîïíì",
"ì": "iîïíì",
"o": "oôóòö",
"ô": "oôóòö",
"ó": "oôóòö",
"ò": "oôóòö",
"ö": "oôóòö",
"u": "uûùüú",
"û": "uûùüú",
"ù": "uûùüú",
"ü": "uûùüú",
"ú": "uûùüú",
}
## No stemming
def noStemming (sFlex, sStem):
return sStem
def rebuildWord (sFlex, cmd1, cmd2):
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
1
2
3
4
5
6
7
8
|
#!python3
## No stemming
def noStemming (sFlex, sStem):
return sStem
def rebuildWord (sFlex, cmd1, cmd2):
|
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
jSfx = 0
for i in range(min(len(sFlex), len(sStem))):
if sFlex[i] != sStem[i]:
break
jSfx += 1
return chr(len(sFlex)-jSfx+48) + sStem[jSfx:]
def getStemFromSuffixCode (sFlex, sSfxCode):
if sSfxCode == "0":
return sFlex
return sFlex[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:] if sSfxCode[0] != '0' else sFlex + sSfxCode[1:]
# Prefix and suffix
def defineAffixCode (sFlex, sStem):
""" Returns a string defining how to get stem from flexion. Examples:
"0" if stem = flexion
"stem" if no common substring
|
|
|
|
|
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
|
jSfx = 0
for i in range(min(len(sFlex), len(sStem))):
if sFlex[i] != sStem[i]:
break
jSfx += 1
return chr(len(sFlex)-jSfx+48) + sStem[jSfx:]
def changeWordWithSuffixCode (sWord, sSfxCode):
if sSfxCode == "0":
return sWord
return sWord[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:] if sSfxCode[0] != '0' else sWord + sSfxCode[1:]
# Prefix and suffix
def defineAffixCode (sFlex, sStem):
""" Returns a string defining how to get stem from flexion. Examples:
"0" if stem = flexion
"stem" if no common substring
|
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
|
if M[x][y] > longest:
longest = M[x][y]
x_longest = x
else:
M[x][y] = 0
return s1[x_longest-longest : x_longest]
def getStemFromAffixCode (sFlex, sAffCode):
if sAffCode == "0":
return sFlex
if '/' not in sAffCode:
return "# error #"
sPfxCode, sSfxCode = sAffCode.split('/')
sFlex = sPfxCode[1:] + sFlex[(ord(sPfxCode[0])-48):]
return sFlex[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:] if sSfxCode[0] != '0' else sFlex + sSfxCode[1:]
|
|
|
|
|
|
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
if M[x][y] > longest:
longest = M[x][y]
x_longest = x
else:
M[x][y] = 0
return s1[x_longest-longest : x_longest]
def changeWordWithAffixCode (sWord, sAffCode):
if sAffCode == "0":
return sWord
if '/' not in sAffCode:
return "# error #"
sPfxCode, sSfxCode = sAffCode.split('/')
sWord = sPfxCode[1:] + sWord[(ord(sPfxCode[0])-48):]
return sWord[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:] if sSfxCode[0] != '0' else sWord + sSfxCode[1:]
|