Grammalecte  Check-in [504e22f37f]

Overview
Comment:[graphspell][js] suggestion mechanism improvement: Damerau-Levenshtein extension
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | major_change | graphspell
Files: files | file ages | folders
SHA3-256: 504e22f37f139a766a34c0d63b734e21cd26d8c147cfb1e56ed6353e4622491b
User & Date: olr on 2025-09-18 13:12:51
Other Links: manifest | tags
Context
2025-09-18
16:04
[fr] faux positifs + màj dictionnaire check-in: bb6b0511d1 user: olr tags: trunk, fr
13:12
[graphspell][js] suggestion mechanism improvement: Damerau-Levenshtein extension check-in: 504e22f37f user: olr tags: trunk, major_change, graphspell
12:39
[graphspell] suggestion mechanism improvement: Damerau-Levenshtein extension check-in: 6c7fd16428 user: olr tags: trunk, major_change, graphspell
Changes

Modified graphspell-js/char_player.js from [1d820bba9e] to [20fe230042].

10
11
12
13
14
15
16
17




18

19
20
21

22
23




24





25
26
27
28
29

30





31










32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
var char_player = {
    /*
        oDistanceBetweenChars:
            - with Jaro-Winkler, values between 1 and 10
            - with Damerau-Levenshtein, values / 10 (between 0 and 1: 0.1, 0.2 ... 0.9)
    */
    oDistanceBetweenChars: {
        //"a": {},




        "e": {"é": 5},

        //"é": {"e": 5},
        "i": {"y": 2},
        //"o": {},

        //"u": {},
        "y": {"i": 3},




        "b": {"d": 8, "h": 9},





        "c": {"ç": 1, "k": 5, "q": 5, "s": 5, "x": 5, "z": 8},
        "d": {"b": 8},
        "f": {"v": 8},
        "g": {"j": 5},
        "h": {"b": 9},

        "j": {"g": 5, "i": 9},





        "k": {"c": 5, "q": 1, "x": 5},










        "l": {"i": 9},
        "m": {"n": 8},
        "n": {"m": 8, "r": 9},
        "p": {"q": 9},
        "q": {"c": 5, "k": 1, "p": 9},
        "r": {"n": 9, "j": 9},
        "s": {"c": 5, "ç": 1, "x": 5, "z": 5},
        "t": {"d": 9},
        "v": {"f": 8, "w": 1},
        "w": {"v": 1},
        "x": {"c": 5, "k": 5, "q": 5, "s": 5},
        "z": {"s": 5}
    },

    distanceBetweenChars: function (c1, c2) {
        if (c1 == c2) {
            return 0;
        }
        if (this.oDistanceBetweenChars.hasOwnProperty(c1) && this.oDistanceBetweenChars[c1].hasOwnProperty(c2)) {







|
>
>
>
>
|
>
|
|
|
>
|
|
>
>
>
>
|
>
>
>
>
>
|
|
|
|
|
>
|
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|
|







10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
var char_player = {
    /*
        oDistanceBetweenChars:
            - with Jaro-Winkler, values between 1 and 10
            - with Damerau-Levenshtein, values / 10 (between 0 and 1: 0.1, 0.2 ... 0.9)
    */
    oDistanceBetweenChars: {
        "a": { "a": 0,  "á": .1, "à": .1, "â": .1, "ã": .1 },
        "á": { "a": .1, "á": 0,  "à": .1, "â": .1, "ã": .1 },
        "à": { "a": .1, "á": .1, "à": 0,  "â": .1, "ã": .1 },
        "â": { "a": .1, "á": .1, "à": .1, "â": 0,  "ã": .1 },
        "ã": { "a": .1, "á": .1, "à": .1, "â": .1, "ã": 0  },

        "e": { "e": 0,  "é": .1, "è": .1, "ê": .1, "ẽ": .1 },
        "é": { "e": .1, "é": 0,  "è": .1, "ê": .1, "ẽ": .1 },
        "è": { "e": .1, "é": .1, "è": 0,  "ê": .1, "ẽ": .1 },
        ": { "e": .1, "é": .1, "è": .1, "ê": 0,  "ẽ": .1 },
        "ẽ": { "e": .1, "é": .1, "è": .1, "ê": .1, "ẽ": 0  },

        "i": { "i": 0,  "í": .1, "ì": .1, "î": .1, "ĩ": .1 },
        "í": { "i": .1, "í": 0,  "ì": .1, "î": .1, "ĩ": .1 },
        "ì": { "i": .1, "í": .1, "ì": 0,  "î": .1, "ĩ": .1 },
        "î": { "i": .1, "í": .1, "ì": .1, "î": 0,  "ĩ": .1 },
        "ĩ": { "i": .1, "í": .1, "ì": .1, "î": .1, "ĩ": 0  },

        "o": { "o": 0,  "ó": .1, "ò": .1, "ô": .1, "õ": .1 },
        "ó": { "o": .1, "ó": 0,  "ò": .1, "ô": .1, "õ": .1 },
        "ò": { "o": .1, "ó": .1, "ò": 0,  "ô": .1, "õ": .1 },
        "ô": { "o": .1, "ó": .1, "ò": .1, "ô": 0,  "õ": .1 },
        "õ": { "o": .1, "ó": .1, "ò": .1, "ô": .1, "õ": 0  },

        "u": { "u": 0,  "ú": .1, "ù": .1, "û": .1, "ũ": .1 },
        "ú": { "u": .1, "ú": 0,  "ù": .1, "û": .1, "ũ": .1 },
        "ù": { "u": .1, "ú": .1, "ù": 0,  "û": .1, "ũ": .1 },
        "û": { "u": .1, "ú": .1, "ù": .1, "û": 0,  "ũ": .1 },
        "ũ": { "u": .1, "ú": .1, "ù": .1, "û": .1, "ũ": 0  },

        "y": { "y": 0,  "ý": .1, "ỳ": .1, "ŷ": .1, "ỹ": .1 },
        "ý": { "y": .1, "ý": 0,  "ỳ": .1, "ŷ": .1, "ỹ": .1 },
        "ỳ": { "y": .1, "ý": .1, "ỳ": 0,  "ŷ": .1, "ỹ": .1 },
        "ŷ": { "y": .1, "ý": .1, "ỳ": .1, "ŷ": 0,  "ỹ": .1 },
        "ỹ": { "y": .1, "ý": .1, "ỳ": .1, "ŷ": .1, "ỹ": 0  },

        // consonnes
        "b": { "b": 0, "d": .8, "h": .9 },
        "c": { "c": 0, "ç": .1, "k": .5, "q": .5, "s": .5, "x": .5, "z": .8 },
        "ç": { "c": .1, "ç": 0, "k": .5, "q": .5, "s": .5, "x": .5, "z": .8 },
        "d": { "d": 0, "b": .8 },
        "f": { "f": 0, "v": .8 },
        "g": { "g": 0, "j": .5, "q": .8 },
        "h": { "h": 0, "b": .9 },
        "j": { "j": 0, "g": .5, "i": .8 },
        "k": { "k": 0, "c": .5, "q": .1, "x": .5 },
        "l": { "l": 0, "i": .8 },
        "m": { "m": 0, "n": .6 },
        "n": { "n": 0, "ñ": .1, "m": .6, "r": .8 },
        "p": { "p": 0, "q": .8 },
        "q": { "q": 0, "c": .5, "k": .1, "p": .8, "g": .8 },
        "r": { "r": 0, "n": .8, "j": .9 },
        "s": { "s": 0, "c": .5, "ç": .1, "x": .5, "z": .5 },
        "t": { "t": 0, "d": .9 },
        "v": { "v": 0, "f": .8, "w": .2 },
        "w": { "w": 0, "v": .2 },
        "x": { "x": 0, "c": .5, "k": .5, "q": .5, "s": .5 },
        "z": { "z": 0, "s": .5 }
    },

    distanceBetweenChars: function (c1, c2) {
        if (c1 == c2) {
            return 0;
        }
        if (this.oDistanceBetweenChars.hasOwnProperty(c1) && this.oDistanceBetweenChars[c1].hasOwnProperty(c2)) {

Modified graphspell-js/ibdawg.js from [abe9c98a1b] to [d23b81aee3].

43
44
45
46
47
48
49
50

51
52
53
54
55
56
57
58
        this.aAllSugg.add(sSugg);
        //console.log("Grammalecte: " + sSugg);
        let nSimDist = str_transform.distanceSift4(this.sSimplifiedWord, str_transform.simplifyWord(sSugg));
        if (nSimDist < this.nMinDist) {
            this.nMinDist = nSimDist;
        }
        if (nSimDist <= this.nMinDist+1) {
            let nDist = Math.min(str_transform.distanceDamerauLevenshtein(this.sWord, sSugg), str_transform.distanceDamerauLevenshtein(this.sSimplifiedWord, str_transform.simplifyWord(sSugg)));

            this.dAccSugg.set(sSugg, Math.min(nDist, nSimDist+1));
            if (this.dAccSugg.size  > this.nTempSuggLimit) {
                this.nDistLimit = -1; // suggest() ends searching when this variable = -1
            }
        }
        this.nDistLimit = Math.min(this.nDistLimit, this.nMinDist+1);
        //console.log(this.dAccSugg);
    }







|
>
|







43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
        this.aAllSugg.add(sSugg);
        //console.log("Grammalecte: " + sSugg);
        let nSimDist = str_transform.distanceSift4(this.sSimplifiedWord, str_transform.simplifyWord(sSugg));
        if (nSimDist < this.nMinDist) {
            this.nMinDist = nSimDist;
        }
        if (nSimDist <= this.nMinDist+1) {
            let nDist = Math.min(str_transform.distanceDamerauLevenshteinX(this.sWord, sSugg), str_transform.distanceDamerauLevenshteinX(this.sSimplifiedWord, str_transform.simplifyWord(sSugg)));
            if (sSugg.includes(" ")) { nDist += 1; }
            this.dAccSugg.set(sSugg, nDist);
            if (this.dAccSugg.size  > this.nTempSuggLimit) {
                this.nDistLimit = -1; // suggest() ends searching when this variable = -1
            }
        }
        this.nDistLimit = Math.min(this.nDistLimit, this.nMinDist+1);
        //console.log(this.dAccSugg);
    }

Modified graphspell-js/str_transform.js from [8e968e40a5] to [fd4fb4b17d].

119
120
121
122
123
124
125





































126
127
128
129
130
131
132
                } else {
                    table[i+1][j+1] = 0;
                }
            }
        }
        return longestCommonSubstring;
    },






































    distanceDamerauLevenshtein: function (s1, s2) {
        // distance of Damerau-Levenshtein between <s1> and <s2>
        // https://fr.wikipedia.org/wiki/Distance_de_Damerau-Levenshtein
        try {
            let nLen1 = s1.length;
            let nLen2 = s2.length;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
                } else {
                    table[i+1][j+1] = 0;
                }
            }
        }
        return longestCommonSubstring;
    },

    distanceDamerauLevenshteinX: function (s1, s2) {
        // distance of Damerau-Levenshtein between <s1> and <s2>
        // https://fr.wikipedia.org/wiki/Distance_de_Damerau-Levenshtein
        try {
            let nLen1 = s1.length;
            let nLen2 = s2.length;
            let matrix = [];
            for (let i = 0;  i <= nLen1+1;  i++) {
                matrix[i] = new Array(nLen2 + 2);
            }
            for (let i = 0;  i <= nLen1+1;  i++) {
                matrix[i][0] = i;
            }
            for (let j = 0;  j <= nLen2+1;  j++) {
                matrix[0][j] = j;
            }
            for (let i = 1;  i <= nLen1;  i++) {
                for (let j = 1;  j <= nLen2;  j++) {
                    //let nCost = (s1[i-1] === s2[j-1]) ? 0 : 1;
                    let nCost = char_player.distanceBetweenChars(s1[i-1], s2[j-1]);
                    matrix[i][j] = Math.min(
                        matrix[i-1][j] + 1,         // Deletion
                        matrix[i][j-1] + 1,         // Insertion
                        matrix[i-1][j-1] + nCost    // Substitution
                    );
                    if (i > 1 && j > 1 && s1[i] == s2[j-1] && s1[i-1] == s2[j]) {
                        matrix[i][j] = Math.min(matrix[i][j], matrix[i-2][j-2] + nCost);  // Transposition
                    }
                }
            }
            return matrix[nLen1][nLen2];
        }
        catch (e) {
            console.error(e);
        }
    },

    distanceDamerauLevenshtein: function (s1, s2) {
        // distance of Damerau-Levenshtein between <s1> and <s2>
        // https://fr.wikipedia.org/wiki/Distance_de_Damerau-Levenshtein
        try {
            let nLen1 = s1.length;
            let nLen2 = s2.length;
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
        catch (e) {
            console.error(e);
        }
    },

    distanceJaroWinkler: function(a, b, boost = .666) {
        // https://github.com/thsig/jaro-winkler-JS
        //if (a == b) { return 1.0; }
        let a_len = a.length;
        let b_len = b.length;
        let a_flag = [];
        let b_flag = [];
        let search_range = Math.floor(Math.max(a_len, b_len) / 2) - 1;
        let minv = Math.min(a_len, b_len);








|







196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
        catch (e) {
            console.error(e);
        }
    },

    distanceJaroWinkler: function(a, b, boost = .666) {
        // https://github.com/thsig/jaro-winkler-JS
        if (a == b) { return 1.0; }
        let a_len = a.length;
        let b_len = b.length;
        let a_flag = [];
        let b_flag = [];
        let search_range = Math.floor(Math.max(a_len, b_len) / 2) - 1;
        let minv = Math.min(a_len, b_len);

302
303
304
305
306
307
308

309
310
311
312
313
314
315
316
317
318
        lcss += local_cs;
        return Math.round(Math.max(l1, l2) - lcss);
    },

    showDistance: function (s1, s2) {
        console.log(`${s1} ≠ ${s2}`);
        let nDL = this.distanceDamerauLevenshtein(s1, s2);

        let nS4 = this.distanceSift4(s1, s2);
        let fJW = this.distanceJaroWinkler(s1, s2);
        console.log(`DL: ${nDL} — S4: ${nS4} — JW: ${fJW}`);
    },

    // Suffix only
    defineSuffixCode: function (sFlex, sStem) {
        /*
            Returns a string defining how to get stem from flexion
                "n(sfx)"







>


|







339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
        lcss += local_cs;
        return Math.round(Math.max(l1, l2) - lcss);
    },

    showDistance: function (s1, s2) {
        console.log(`${s1} ≠ ${s2}`);
        let nDL = this.distanceDamerauLevenshtein(s1, s2);
        let fDLX = this.distanceDamerauLevenshteinX(s1, s2);
        let nS4 = this.distanceSift4(s1, s2);
        let fJW = this.distanceJaroWinkler(s1, s2);
        console.log(`DL: ${nDL} DLX: ${fDLX} — S4: ${nS4} — JW: ${fJW}`);
    },

    // Suffix only
    defineSuffixCode: function (sFlex, sStem) {
        /*
            Returns a string defining how to get stem from flexion
                "n(sfx)"
391
392
393
394
395
396
397

398
399
400
401
402
403
404
405


if (typeof(exports) !== 'undefined') {
    exports.simplifyWord = str_transform.simplifyWord;
    exports.numbersToExponent = str_transform.numbersToExponent;
    exports.spellingNormalization = str_transform.spellingNormalization;
    exports.longestCommonSubstring = str_transform.longestCommonSubstring;

    exports.distanceDamerauLevenshtein = str_transform.distanceDamerauLevenshtein;
    exports.distanceJaroWinkler = str_transform.distanceJaroWinkler;
    exports.showDistance = str_transform.showDistance;
    exports.changeWordWithSuffixCode = str_transform.changeWordWithSuffixCode;
    exports.changeWordWithAffixCode = str_transform.changeWordWithAffixCode;
    exports.defineAffixCode = str_transform.defineAffixCode;
    exports.defineSuffixCode = str_transform.defineSuffixCode;
}







>








429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444


if (typeof(exports) !== 'undefined') {
    exports.simplifyWord = str_transform.simplifyWord;
    exports.numbersToExponent = str_transform.numbersToExponent;
    exports.spellingNormalization = str_transform.spellingNormalization;
    exports.longestCommonSubstring = str_transform.longestCommonSubstring;
    exports.distanceDamerauLevenshteinX = str_transform.distanceDamerauLevenshteinX;
    exports.distanceDamerauLevenshtein = str_transform.distanceDamerauLevenshtein;
    exports.distanceJaroWinkler = str_transform.distanceJaroWinkler;
    exports.showDistance = str_transform.showDistance;
    exports.changeWordWithSuffixCode = str_transform.changeWordWithSuffixCode;
    exports.changeWordWithAffixCode = str_transform.changeWordWithAffixCode;
    exports.defineAffixCode = str_transform.defineAffixCode;
    exports.defineSuffixCode = str_transform.defineSuffixCode;
}