Grammalecte  Diff

Differences From Artifact [9efc586374]:

To Artifact [fdeaf568a0]:


1
2
3
4
5
6
7
8

9






10
11
12



13
14
15
16
17

18





19
20
21
22
23

24





25










26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
"""
List of similar chars
useful for suggestion mechanism
"""


dDistanceBetweenChars = {
    # dDistanceBetweenChars:

    # - with Jaro-Winkler, values between 1 and 10






    # - with Damerau-Levenshtein, values / 10 (between 0 and 1: 0.1, 0.2 ... 0.9)
    #"a": {},
    "e": {"é": 5},



    "é": {"e": 5},
    "i": {"y": 2},
    #"o": {},
    #"u": {},
    "y": {"i": 3},

    "b": {"d": 8, "h": 9},





    "c": {"ç": 1, "k": 5, "q": 5, "s": 5, "x": 5, "z": 8},
    "d": {"b": 8},
    "f": {"v": 8},
    "g": {"j": 5},
    "h": {"b": 9},

    "j": {"g": 5, "i": 9},





    "k": {"c": 5, "q": 1, "x": 5},










    "l": {"i": 9},
    "m": {"n": 8},
    "n": {"m": 8, "r": 9},
    "p": {"q": 9},
    "q": {"c": 5, "k": 1, "p": 9},
    "r": {"n": 9, "j": 9},
    "s": {"c": 5, "ç": 1, "x": 5, "z": 5},
    "t": {"d": 9},
    "v": {"f": 8, "w": 1},
    "w": {"v": 1},
    "x": {"c": 5, "k": 5, "q": 5, "s": 5},
    "z": {"s": 5}
}


def distanceBetweenChars (c1, c2):
    "returns a float between 0 and 1"
    if c1 == c2:
        return 0





<

<
>

>
>
>
>
>
>
|
|
|
>
>
>
|
|
|
|
|
>
|
>
>
>
>
>
|
|
|
|
|
>
|
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|
|







1
2
3
4
5

6

7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
"""
List of similar chars
useful for suggestion mechanism
"""


dDistanceBetweenChars = {

    # - with Damerau-Levenshtein, values / 10 (between 0 and 1: 0.1, 0.2 ... 0.9)
    # - with Jaro-Winkler, values between 1 and 10
    # voyelles
    "a": { "a": 0,  "á": .1, "à": .1, "â": .1, "ã": .1 },
    "á": { "a": .1, "á": 0,  "à": .1, "â": .1, "ã": .1 },
    "à": { "a": .1, "á": .1, "à": 0,  "â": .1, "ã": .1 },
    "â": { "a": .1, "á": .1, "à": .1, "â": 0,  "ã": .1 },
    "ã": { "a": .1, "á": .1, "à": .1, "â": .1, "ã": 0  },

    "e": { "e": 0,  "é": .1, "è": .1, "ê": .1, "ẽ": .1 },
    "é": { "e": .1, "é": 0,  "è": .1, "ê": .1, "ẽ": .1 },
    "è": { "e": .1, "é": .1, "è": 0,  "ê": .1, "ẽ": .1 },
    "ê": { "e": .1, "é": .1, "è": .1, "ê": 0,  "ẽ": .1 },
    "ẽ": { "e": .1, "é": .1, "è": .1, "ê": .1, "ẽ": 0  },

    "i": { "i": 0,  "í": .1, "ì": .1, "î": .1, "ĩ": .1 },
    ": { "i": .1, "í": 0,  "ì": .1, "î": .1, "ĩ": .1 },
    ": { "i": .1, "í": .1, "ì": 0,  "î": .1, "ĩ": .1 },
    "î": { "i": .1, "í": .1, "ì": .1, "î": 0,  "ĩ": .1 },
    "ĩ": { "i": .1, "í": .1, "ì": .1, "î": .1, "ĩ": 0  },

    "o": { "o": 0,  "ó": .1, "ò": .1, "ô": .1, "õ": .1 },
    "ó": { "o": .1, "ó": 0,  "ò": .1, "ô": .1, "õ": .1 },
    "ò": { "o": .1, "ó": .1, "ò": 0,  "ô": .1, "õ": .1 },
    "ô": { "o": .1, "ó": .1, "ò": .1, "ô": 0,  "õ": .1 },
    "õ": { "o": .1, "ó": .1, "ò": .1, "ô": .1, "õ": 0  },

    "u": { "u": 0,  "ú": .1, "ù": .1, "û": .1, "ũ": .1 },
    "ú": { "u": .1, "ú": 0,  "ù": .1, "û": .1, "ũ": .1 },
    "ù": { "u": .1, "ú": .1, "ù": 0,  "û": .1, "ũ": .1 },
    "û": { "u": .1, "ú": .1, "ù": .1, "û": 0,  "ũ": .1 },
    "ũ": { "u": .1, "ú": .1, "ù": .1, "û": .1, "ũ": 0  },

    "y": { "y": 0,  "ý": .1, "ỳ": .1, "ŷ": .1, "ỹ": .1 },
    "ý": { "y": .1, "ý": 0,  "ỳ": .1, "ŷ": .1, "ỹ": .1 },
    "ỳ": { "y": .1, "ý": .1, "ỳ": 0,  "ŷ": .1, "ỹ": .1 },
    "ŷ": { "y": .1, "ý": .1, "ỳ": .1, "ŷ": 0,  "ỹ": .1 },
    "ỹ": { "y": .1, "ý": .1, "ỳ": .1, "ŷ": .1, "ỹ": 0  },

    ## consonnes
    "b": { "b": 0, "d": .8, "h": .9 },
    "c": { "c": 0, "ç": .1, "k": .5, "q": .5, "s": .5, "x": .5, "z": .8 },
    "ç": { "c": .1, "ç": 0, "k": .5, "q": .5, "s": .5, "x": .5, "z": .8 },
    "d": { "d": 0, "b": .8 },
    "f": { "f": 0, "v": .8 },
    "g": { "g": 0, "j": .5, "q": .8 },
    "h": { "h": 0, "b": .9 },
    "j": { "j": 0, "g": .5, "i": .8 },
    "k": { "k": 0, "c": .5, "q": .1, "x": .5 },
    "l": { "l": 0, "i": .8 },
    "m": { "m": 0, "n": .6 },
    "n": { "n": 0, "ñ": .1, "m": .6, "r": .8 },
    "p": { "p": 0, "q": .8 },
    "q": { "q": 0, "c": .5, "k": .1, "p": .8, "g": .8 },
    "r": { "r": 0, "n": .8, "j": .9 },
    "s": { "s": 0, "c": .5, "ç": .1, "x": .5, "z": .5 },
    "t": { "t": 0, "d": .9 },
    "v": { "v": 0, "f": .8, "w": .2 },
    "w": { "w": 0, "v": .2 },
    "x": { "x": 0, "c": .5, "k": .5, "q": .5, "s": .5 },
    "z": { "z": 0, "s": .5 }
}


def distanceBetweenChars (c1, c2):
    "returns a float between 0 and 1"
    if c1 == c2:
        return 0