1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
|
-
-
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
"""
List of similar chars
useful for suggestion mechanism
"""
dDistanceBetweenChars = {
# dDistanceBetweenChars:
# - with Damerau-Levenshtein, values / 10 (between 0 and 1: 0.1, 0.2 ... 0.9)
# - with Jaro-Winkler, values between 1 and 10
# voyelles
"a": { "a": 0, "á": .1, "à": .1, "â": .1, "ã": .1 },
"á": { "a": .1, "á": 0, "à": .1, "â": .1, "ã": .1 },
"à": { "a": .1, "á": .1, "à": 0, "â": .1, "ã": .1 },
"â": { "a": .1, "á": .1, "à": .1, "â": 0, "ã": .1 },
"ã": { "a": .1, "á": .1, "à": .1, "â": .1, "ã": 0 },
# - with Damerau-Levenshtein, values / 10 (between 0 and 1: 0.1, 0.2 ... 0.9)
#"a": {},
"e": {"é": 5},
"é": {"e": 5},
"i": {"y": 2},
#"o": {},
#"u": {},
"y": {"i": 3},
"b": {"d": 8, "h": 9},
"c": {"ç": 1, "k": 5, "q": 5, "s": 5, "x": 5, "z": 8},
"d": {"b": 8},
"f": {"v": 8},
"g": {"j": 5},
"h": {"b": 9},
"j": {"g": 5, "i": 9},
"k": {"c": 5, "q": 1, "x": 5},
"l": {"i": 9},
"m": {"n": 8},
"n": {"m": 8, "r": 9},
"p": {"q": 9},
"q": {"c": 5, "k": 1, "p": 9},
"r": {"n": 9, "j": 9},
"s": {"c": 5, "ç": 1, "x": 5, "z": 5},
"t": {"d": 9},
"v": {"f": 8, "w": 1},
"w": {"v": 1},
"x": {"c": 5, "k": 5, "q": 5, "s": 5},
"z": {"s": 5}
"e": { "e": 0, "é": .1, "è": .1, "ê": .1, "ẽ": .1 },
"é": { "e": .1, "é": 0, "è": .1, "ê": .1, "ẽ": .1 },
"è": { "e": .1, "é": .1, "è": 0, "ê": .1, "ẽ": .1 },
"ê": { "e": .1, "é": .1, "è": .1, "ê": 0, "ẽ": .1 },
"ẽ": { "e": .1, "é": .1, "è": .1, "ê": .1, "ẽ": 0 },
"i": { "i": 0, "í": .1, "ì": .1, "î": .1, "ĩ": .1 },
"í": { "i": .1, "í": 0, "ì": .1, "î": .1, "ĩ": .1 },
"ì": { "i": .1, "í": .1, "ì": 0, "î": .1, "ĩ": .1 },
"î": { "i": .1, "í": .1, "ì": .1, "î": 0, "ĩ": .1 },
"ĩ": { "i": .1, "í": .1, "ì": .1, "î": .1, "ĩ": 0 },
"o": { "o": 0, "ó": .1, "ò": .1, "ô": .1, "õ": .1 },
"ó": { "o": .1, "ó": 0, "ò": .1, "ô": .1, "õ": .1 },
"ò": { "o": .1, "ó": .1, "ò": 0, "ô": .1, "õ": .1 },
"ô": { "o": .1, "ó": .1, "ò": .1, "ô": 0, "õ": .1 },
"õ": { "o": .1, "ó": .1, "ò": .1, "ô": .1, "õ": 0 },
"u": { "u": 0, "ú": .1, "ù": .1, "û": .1, "ũ": .1 },
"ú": { "u": .1, "ú": 0, "ù": .1, "û": .1, "ũ": .1 },
"ù": { "u": .1, "ú": .1, "ù": 0, "û": .1, "ũ": .1 },
"û": { "u": .1, "ú": .1, "ù": .1, "û": 0, "ũ": .1 },
"ũ": { "u": .1, "ú": .1, "ù": .1, "û": .1, "ũ": 0 },
"y": { "y": 0, "ý": .1, "ỳ": .1, "ŷ": .1, "ỹ": .1 },
"ý": { "y": .1, "ý": 0, "ỳ": .1, "ŷ": .1, "ỹ": .1 },
"ỳ": { "y": .1, "ý": .1, "ỳ": 0, "ŷ": .1, "ỹ": .1 },
"ŷ": { "y": .1, "ý": .1, "ỳ": .1, "ŷ": 0, "ỹ": .1 },
"ỹ": { "y": .1, "ý": .1, "ỳ": .1, "ŷ": .1, "ỹ": 0 },
## consonnes
"b": { "b": 0, "d": .8, "h": .9 },
"c": { "c": 0, "ç": .1, "k": .5, "q": .5, "s": .5, "x": .5, "z": .8 },
"ç": { "c": .1, "ç": 0, "k": .5, "q": .5, "s": .5, "x": .5, "z": .8 },
"d": { "d": 0, "b": .8 },
"f": { "f": 0, "v": .8 },
"g": { "g": 0, "j": .5, "q": .8 },
"h": { "h": 0, "b": .9 },
"j": { "j": 0, "g": .5, "i": .8 },
"k": { "k": 0, "c": .5, "q": .1, "x": .5 },
"l": { "l": 0, "i": .8 },
"m": { "m": 0, "n": .6 },
"n": { "n": 0, "ñ": .1, "m": .6, "r": .8 },
"p": { "p": 0, "q": .8 },
"q": { "q": 0, "c": .5, "k": .1, "p": .8, "g": .8 },
"r": { "r": 0, "n": .8, "j": .9 },
"s": { "s": 0, "c": .5, "ç": .1, "x": .5, "z": .5 },
"t": { "t": 0, "d": .9 },
"v": { "v": 0, "f": .8, "w": .2 },
"w": { "w": 0, "v": .2 },
"x": { "x": 0, "c": .5, "k": .5, "q": .5, "s": .5 },
"z": { "z": 0, "s": .5 }
}
def distanceBetweenChars (c1, c2):
"returns a float between 0 and 1"
if c1 == c2:
return 0
|