Grammalecte  Diff

Differences From Artifact [e86906e5ce]:

To Artifact [23bc31f0e4]:


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
1
































2
3
4
5
6
7
8

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-







#!python3
# -*- coding: UTF-8 -*-


dSimilarChars = {
    "a": "aàâáä",
    "à": "aàâáä",
    "â": "aàâáä",
    "á": "aàâáä",
    "ä": "aàâáä",
    "c": "cç",
    "ç": "cç",
    "e": "eéêèë",
    "é": "eéêèë",
    "ê": "eéêèë",
    "è": "eéêèë",
    "ë": "eéêèë",
    "i": "iîïíì",
    "î": "iîïíì",
    "ï": "iîïíì",
    "í": "iîïíì",
    "ì": "iîïíì",
    "o": "oôóòö",
    "ô": "oôóòö",
    "ó": "oôóòö",
    "ò": "oôóòö",
    "ö": "oôóòö",
    "u": "uûùüú",
    "û": "uûùüú",
    "ù": "uûùüú",
    "ü": "uûùüú",
    "ú": "uûùüú",
}

## No stemming

def noStemming (sFlex, sStem):
    return sStem

def rebuildWord (sFlex, cmd1, cmd2):
69
70
71
72
73
74
75
76

77
78
79


80
81
82
83
84
85
86
37
38
39
40
41
42
43

44
45


46
47
48
49
50
51
52
53
54







-
+

-
-
+
+







    jSfx = 0
    for i in range(min(len(sFlex), len(sStem))):
        if sFlex[i] != sStem[i]:
            break
        jSfx += 1
    return chr(len(sFlex)-jSfx+48) + sStem[jSfx:]  

def getStemFromSuffixCode (sFlex, sSfxCode):
def changeWordWithSuffixCode (sWord, sSfxCode):
    if sSfxCode == "0":
        return sFlex
    return sFlex[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:]  if sSfxCode[0] != '0'  else sFlex + sSfxCode[1:]
        return sWord
    return sWord[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:]  if sSfxCode[0] != '0'  else sWord + sSfxCode[1:]


# Prefix and suffix
def defineAffixCode (sFlex, sStem):
    """ Returns a string defining how to get stem from flexion. Examples:
            "0" if stem = flexion
            "stem" if no common substring
120
121
122
123
124
125
126
127

128
129

130
131
132
133
134


135
88
89
90
91
92
93
94

95
96

97
98
99
100


101
102
103







-
+

-
+



-
-
+
+

                if M[x][y] > longest:
                    longest = M[x][y]
                    x_longest = x
            else:
                M[x][y] = 0
    return s1[x_longest-longest : x_longest]

def getStemFromAffixCode (sFlex, sAffCode):
def changeWordWithAffixCode (sWord, sAffCode):
    if sAffCode == "0":
        return sFlex
        return sWord
    if '/' not in sAffCode:
        return "# error #"
    sPfxCode, sSfxCode = sAffCode.split('/')
    sFlex = sPfxCode[1:] + sFlex[(ord(sPfxCode[0])-48):] 
    return sFlex[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:]  if sSfxCode[0] != '0'  else sFlex + sSfxCode[1:]
    sWord = sPfxCode[1:] + sWord[(ord(sPfxCode[0])-48):] 
    return sWord[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:]  if sSfxCode[0] != '0'  else sWord + sSfxCode[1:]