Grammalecte  Diff

Differences From Artifact [f5b2267e7d]:

To Artifact [ac345212e4]:


22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

66
67
68
69
70
71
72

73
74

75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
22
23
24
25
26
27
28





































29







30


31




















32
33
34
35
36
37
38







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
+
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-







        let sRes = "";
        for (let c of sWord) {
            sRes += this._dTransChars.gl_get(c, c);
        }
        return sRes.replace("eau", "o").replace("au", "o");
    },

    distanceDamerauLevenshtein: function (s1, s2) {
        // distance of Damerau-Levenshtein between <s1> and <s2>
        // https://fr.wikipedia.org/wiki/Distance_de_Damerau-Levenshtein
        try {
            let nLen1 = s1.length;
            let nLen2 = s2.length;
            let matrix = [];
            for (let i = 0;  i <= nLen1;  i++) {
                matrix[i] = new Array(nLen2 + 1);
            }
            for (let i = 0;  i <= nLen1;  i++) {
                matrix[i][0] = i;
            }
            for (let j = 0;  j <= nLen2;  j++) {
                matrix[0][j] = j;
            }
            for (let i = 1;  i <= nLen1;  i++) {
                for (let j = 1;  j <= nLen2;  j++) {
                    let nCost = (s1[i] === s2[j]) ? 0 : 1;
                    matrix[i][j] = Math.min(
                        matrix[i-1][j] + 1,         // Deletion
                        matrix[i][j-1] + 1,         // Insertion
                        matrix[i-1][j-1] + nCost    // Substitution
                    );
                    if (i > 1 && j > 1 && s1[i] == s2[j-1] && s1[i-1] == s2[j]) {
                        matrix[i][j] = Math.min(matrix[i][j], matrix[i-2][j-2] + nCost);  // Transposition
                    }
                }
            }
            //console.log(s2 + ": " + matrix[nLen1][nLen2]);
            return matrix[nLen1][nLen2];
        }
        catch (e) {
            helpers.logerror(e);
        }
    },

    aVowel: new Set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ"),
    showDistance (s1, s2) {
        let s1b = this.cleanWord(s1);
        let s2b = this.cleanWord(s2);
        console.log(`Distance: ${s1} / ${s2} = ${this.distanceDamerauLevenshtein(s1, s2)})`);
        console.log(`Distance: ${s1b} / ${s2b} = ${this.distanceDamerauLevenshtein(s1b, s2b)})`);
    },

    aConsonant: new Set("bcdefghjklmnñpqrstvwxzBCDEFGHJKLMNÑPQRSTVWXZ"),
    // Method: Remove Useless Chars

    aDouble: new Set("bcdfjklmnprstzBCDFJKLMNPRSTZ"),  // letter that may be used twice successively
    aVovels: new Set([
        'a', 'e', 'i', 'o', 'u', 'y',
        'à', 'é', 'î', 'ô', 'û', 'ÿ',
        'â', 'è', 'ï', 'ö', 'ù', 'ŷ',
        'ä', 'ê', 'í', 'ó', 'ü', 'ý',
        'á', 'ë', 'ì', 'ò', 'ú', 'ỳ',
        'ā', 'ē', 'ī', 'ō', 'ū', 'ȳ',
        'h', 'œ', 'æ'
    ]),

    shrinkWord: function (sWord) {
        // remove vovels and h
        let sRes = "";
        for (let cChar of sWord.slice(1)) {
            if (!this.aVovels.has(cChar)) {
                sRes += cChar;
            }
        }
        return sWord.slice(0, 1).replace("h", "") + sRes;
    },


    // Similar chars

    d1to1: new Map([
        ["1", "liîLIÎ"],
        ["2", "zZ"],
227
228
229
230
231
232
233








234
235
236
237
238
239
240
241
242
243
244
245
246
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182


183
184
185
186
187
188
189







+
+
+
+
+
+
+
+




-
-







        ["B", ["BB",]],
        ["c", ["cc", "ss", "qu", "ch"]],
        ["C", ["CC", "SS", "QU", "CH"]],
        ["ç", ["ss", "cc", "qh", "ch"]],
        ["Ç", ["SS", "CC", "QH", "CH"]],
        ["d", ["dd",]],
        ["D", ["DD",]],
        ["é", ["ai", "ei"]],
        ["É", ["AI", "EI"]],
        ["è", ["ai", "ei"]],
        ["È", ["AI", "EI"]],
        ["ê", ["ai", "ei"]],
        ["Ê", ["AI", "EI"]],
        ["ë", ["ai", "ei"]],
        ["Ë", ["AI", "EI"]],
        ["f", ["ff", "ph"]],
        ["F", ["FF", "PH"]],
        ["g", ["gu", "ge", "gg", "gh"]],
        ["G", ["GU", "GE", "GG", "GH"]],
        ["i", ["ii",]],
        ["I", ["II",]],
        ["j", ["jj", "dj"]],
        ["J", ["JJ", "DJ"]],
        ["k", ["qu", "ck", "ch", "cu", "kk", "kh"]],
        ["K", ["QU", "CK", "CH", "CU", "KK", "KH"]],
        ["l", ["ll",]],
        ["L", ["LL",]],
        ["m", ["mm", "mn"]],