22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
let sRes = "";
for (let c of sWord) {
sRes += this._dTransChars.gl_get(c, c);
}
return sRes.replace("eau", "o").replace("au", "o");
},
distanceDamerauLevenshtein: function (s1, s2) {
// distance of Damerau-Levenshtein between <s1> and <s2>
// https://fr.wikipedia.org/wiki/Distance_de_Damerau-Levenshtein
try {
let nLen1 = s1.length;
let nLen2 = s2.length;
let matrix = [];
for (let i = 0; i <= nLen1; i++) {
matrix[i] = new Array(nLen2 + 1);
}
for (let i = 0; i <= nLen1; i++) {
matrix[i][0] = i;
}
for (let j = 0; j <= nLen2; j++) {
matrix[0][j] = j;
}
for (let i = 1; i <= nLen1; i++) {
for (let j = 1; j <= nLen2; j++) {
let nCost = (s1[i] === s2[j]) ? 0 : 1;
matrix[i][j] = Math.min(
matrix[i-1][j] + 1, // Deletion
matrix[i][j-1] + 1, // Insertion
matrix[i-1][j-1] + nCost // Substitution
);
if (i > 1 && j > 1 && s1[i] == s2[j-1] && s1[i-1] == s2[j]) {
matrix[i][j] = Math.min(matrix[i][j], matrix[i-2][j-2] + nCost); // Transposition
}
}
}
//console.log(s2 + ": " + matrix[nLen1][nLen2]);
return matrix[nLen1][nLen2];
}
catch (e) {
helpers.logerror(e);
}
},
showDistance (s1, s2) {
let s1b = this.cleanWord(s1);
let s2b = this.cleanWord(s2);
console.log(`Distance: ${s1} / ${s2} = ${this.distanceDamerauLevenshtein(s1, s2)})`);
console.log(`Distance: ${s1b} / ${s2b} = ${this.distanceDamerauLevenshtein(s1b, s2b)})`);
},
// Method: Remove Useless Chars
aVovels: new Set([
'a', 'e', 'i', 'o', 'u', 'y',
'à', 'é', 'î', 'ô', 'û', 'ÿ',
'â', 'è', 'ï', 'ö', 'ù', 'ŷ',
'ä', 'ê', 'í', 'ó', 'ü', 'ý',
'á', 'ë', 'ì', 'ò', 'ú', 'ỳ',
'ā', 'ē', 'ī', 'ō', 'ū', 'ȳ',
'h', 'œ', 'æ'
]),
shrinkWord: function (sWord) {
// remove vovels and h
let sRes = "";
for (let cChar of sWord.slice(1)) {
if (!this.aVovels.has(cChar)) {
sRes += cChar;
}
}
return sWord.slice(0, 1).replace("h", "") + sRes;
},
// Similar chars
d1to1: new Map([
["1", "liîLIÎ"],
["2", "zZ"],
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
|
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
|
let sRes = "";
for (let c of sWord) {
sRes += this._dTransChars.gl_get(c, c);
}
return sRes.replace("eau", "o").replace("au", "o");
},
aVowel: new Set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ"),
aConsonant: new Set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ"),
aDouble: new Set("bcçdfjklmnprstzBCÇDFJKLMNPRSTZ"), // letters that may be used twice successively
// Similar chars
d1to1: new Map([
["1", "liîLIÎ"],
["2", "zZ"],
|
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
|
d1toX: new Map([
["æ", ["ae",]],
["Æ", ["AE",]],
["b", ["bb",]],
["B", ["BB",]],
["c", ["cc", "ss", "qu", "ch"]],
["C", ["CC", "SS", "QU", "CH"]],
["ç", ["ss", "cc", "qh", "ch"]],
["Ç", ["SS", "CC", "QH", "CH"]],
["d", ["dd",]],
["D", ["DD",]],
["f", ["ff", "ph"]],
["F", ["FF", "PH"]],
["g", ["gu", "ge", "gg", "gh"]],
["G", ["GU", "GE", "GG", "GH"]],
["i", ["ii",]],
["I", ["II",]],
["j", ["jj", "dj"]],
["J", ["JJ", "DJ"]],
["k", ["qu", "ck", "ch", "cu", "kk", "kh"]],
["K", ["QU", "CK", "CH", "CU", "KK", "KH"]],
["l", ["ll",]],
["L", ["LL",]],
["m", ["mm", "mn"]],
|
<
<
>
>
<
<
|
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
|
d1toX: new Map([
["æ", ["ae",]],
["Æ", ["AE",]],
["b", ["bb",]],
["B", ["BB",]],
["c", ["cc", "ss", "qu", "ch"]],
["C", ["CC", "SS", "QU", "CH"]],
["d", ["dd",]],
["D", ["DD",]],
["é", ["ai", "ei"]],
["É", ["AI", "EI"]],
["f", ["ff", "ph"]],
["F", ["FF", "PH"]],
["g", ["gu", "ge", "gg", "gh"]],
["G", ["GU", "GE", "GG", "GH"]],
["j", ["jj", "dj"]],
["J", ["JJ", "DJ"]],
["k", ["qu", "ck", "ch", "cu", "kk", "kh"]],
["K", ["QU", "CK", "CH", "CU", "KK", "KH"]],
["l", ["ll",]],
["L", ["LL",]],
["m", ["mm", "mn"]],
|
262
263
264
265
266
267
268
269
270
271
272
273
274
275
|
["t", ["tt", "th"]],
["T", ["TT", "TH"]],
["x", ["cc", "ct", "xx"]],
["X", ["CC", "CT", "XX"]],
["z", ["ss", "zh"]],
["Z", ["SS", "ZH"]],
]),
d2toX: new Map([
["an", ["en",]],
["AN", ["EN",]],
["au", ["eau", "o", "ô"]],
["AU", ["EAU", "O", "Ô"]],
["en", ["an",]],
|
>
>
>
>
>
>
>
|
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
|
["t", ["tt", "th"]],
["T", ["TT", "TH"]],
["x", ["cc", "ct", "xx"]],
["X", ["CC", "CT", "XX"]],
["z", ["ss", "zh"]],
["Z", ["SS", "ZH"]],
]),
get1toXReplacement: function (cPrev, cCur, cNext) {
if (this.aConsonant.has(cCur) && (this.aConsonant.has(cPrev) || this.aConsonant.has(cNext))) {
return [];
}
return this.d1toX.gl_get(cCur, []);
},
d2toX: new Map([
["an", ["en",]],
["AN", ["EN",]],
["au", ["eau", "o", "ô"]],
["AU", ["EAU", "O", "Ô"]],
["en", ["an",]],
|