Overview
Comment: | [graphspell][js] suggestion mechanism improvement: Damerau-Levenshtein extension |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | major_change | graphspell |
Files: | files | file ages | folders |
SHA3-256: |
504e22f37f139a766a34c0d63b734e21 |
User & Date: | olr on 2025-09-18 13:12:51 |
Other Links: | manifest | tags |
Context
2025-09-18
| ||
16:04 | [fr] faux positifs + màj dictionnaire check-in: bb6b0511d1 user: olr tags: trunk, fr | |
13:12 | [graphspell][js] suggestion mechanism improvement: Damerau-Levenshtein extension check-in: 504e22f37f user: olr tags: trunk, major_change, graphspell | |
12:39 | [graphspell] suggestion mechanism improvement: Damerau-Levenshtein extension check-in: 6c7fd16428 user: olr tags: trunk, major_change, graphspell | |
Changes
Modified graphspell-js/char_player.js from [1d820bba9e] to [20fe230042].
︙ | ︙ | |||
10 11 12 13 14 15 16 | var char_player = { /* oDistanceBetweenChars: - with Jaro-Winkler, values between 1 and 10 - with Damerau-Levenshtein, values / 10 (between 0 and 1: 0.1, 0.2 ... 0.9) */ oDistanceBetweenChars: { | | > > > > | > | | | > | | > > > > | > > > > > | | | | | > | > > > > > | > > > > > > > > > > | | | | | | | | | | | | | 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | var char_player = { /* oDistanceBetweenChars: - with Jaro-Winkler, values between 1 and 10 - with Damerau-Levenshtein, values / 10 (between 0 and 1: 0.1, 0.2 ... 0.9) */ oDistanceBetweenChars: { "a": { "a": 0, "á": .1, "à": .1, "â": .1, "ã": .1 }, "á": { "a": .1, "á": 0, "à": .1, "â": .1, "ã": .1 }, "à": { "a": .1, "á": .1, "à": 0, "â": .1, "ã": .1 }, "â": { "a": .1, "á": .1, "à": .1, "â": 0, "ã": .1 }, "ã": { "a": .1, "á": .1, "à": .1, "â": .1, "ã": 0 }, "e": { "e": 0, "é": .1, "è": .1, "ê": .1, "ẽ": .1 }, "é": { "e": .1, "é": 0, "è": .1, "ê": .1, "ẽ": .1 }, "è": { "e": .1, "é": .1, "è": 0, "ê": .1, "ẽ": .1 }, "ê": { "e": .1, "é": .1, "è": .1, "ê": 0, "ẽ": .1 }, "ẽ": { "e": .1, "é": .1, "è": .1, "ê": .1, "ẽ": 0 }, "i": { "i": 0, "í": .1, "ì": .1, "î": .1, "ĩ": .1 }, "í": { "i": .1, "í": 0, "ì": .1, "î": .1, "ĩ": .1 }, "ì": { "i": .1, "í": .1, "ì": 0, "î": .1, "ĩ": .1 }, "î": { "i": .1, "í": .1, "ì": .1, "î": 0, "ĩ": .1 }, "ĩ": { "i": .1, "í": .1, "ì": .1, "î": .1, "ĩ": 0 }, "o": { "o": 0, "ó": .1, "ò": .1, "ô": .1, "õ": .1 }, "ó": { "o": .1, "ó": 0, "ò": .1, "ô": .1, "õ": .1 }, "ò": { "o": .1, "ó": .1, "ò": 0, "ô": .1, "õ": .1 }, "ô": { "o": .1, "ó": .1, "ò": .1, "ô": 0, "õ": .1 }, "õ": { "o": .1, "ó": .1, "ò": .1, "ô": .1, "õ": 0 }, "u": { "u": 0, "ú": .1, "ù": .1, "û": .1, "ũ": .1 }, "ú": { "u": .1, "ú": 0, "ù": .1, "û": .1, "ũ": .1 }, "ù": { "u": .1, "ú": .1, "ù": 0, "û": .1, "ũ": .1 }, "û": { "u": .1, "ú": .1, "ù": .1, "û": 0, "ũ": .1 }, "ũ": { "u": .1, "ú": .1, "ù": .1, "û": .1, "ũ": 0 }, "y": { "y": 0, "ý": .1, "ỳ": .1, "ŷ": .1, "ỹ": .1 }, "ý": { "y": .1, "ý": 0, "ỳ": .1, "ŷ": .1, "ỹ": .1 }, "ỳ": { "y": .1, "ý": .1, "ỳ": 0, "ŷ": .1, "ỹ": .1 }, "ŷ": { "y": .1, "ý": .1, "ỳ": .1, "ŷ": 0, "ỹ": .1 }, "ỹ": { "y": .1, "ý": .1, "ỳ": .1, "ŷ": .1, "ỹ": 0 }, // consonnes "b": { "b": 0, "d": .8, "h": .9 }, "c": { "c": 0, "ç": .1, "k": .5, "q": .5, "s": .5, "x": .5, "z": .8 }, "ç": { "c": .1, "ç": 0, "k": .5, "q": .5, "s": .5, "x": .5, "z": .8 }, "d": { "d": 0, "b": .8 }, "f": { "f": 0, "v": .8 }, "g": { "g": 0, "j": .5, "q": .8 }, "h": { "h": 0, "b": .9 }, "j": { "j": 0, "g": .5, "i": .8 }, "k": { "k": 0, "c": .5, "q": .1, "x": .5 }, "l": { "l": 0, "i": .8 }, "m": { "m": 0, "n": .6 }, "n": { "n": 0, "ñ": .1, "m": .6, "r": .8 }, "p": { "p": 0, "q": .8 }, "q": { "q": 0, "c": .5, "k": .1, "p": .8, "g": .8 }, "r": { "r": 0, "n": .8, "j": .9 }, "s": { "s": 0, "c": .5, "ç": .1, "x": .5, "z": .5 }, "t": { "t": 0, "d": .9 }, "v": { "v": 0, "f": .8, "w": .2 }, "w": { "w": 0, "v": .2 }, "x": { "x": 0, "c": .5, "k": .5, "q": .5, "s": .5 }, "z": { "z": 0, "s": .5 } }, distanceBetweenChars: function (c1, c2) { if (c1 == c2) { return 0; } if (this.oDistanceBetweenChars.hasOwnProperty(c1) && this.oDistanceBetweenChars[c1].hasOwnProperty(c2)) { |
︙ | ︙ |
Modified graphspell-js/ibdawg.js from [abe9c98a1b] to [d23b81aee3].
︙ | ︙ | |||
43 44 45 46 47 48 49 | this.aAllSugg.add(sSugg); //console.log("Grammalecte: " + sSugg); let nSimDist = str_transform.distanceSift4(this.sSimplifiedWord, str_transform.simplifyWord(sSugg)); if (nSimDist < this.nMinDist) { this.nMinDist = nSimDist; } if (nSimDist <= this.nMinDist+1) { | | > | | 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | this.aAllSugg.add(sSugg); //console.log("Grammalecte: " + sSugg); let nSimDist = str_transform.distanceSift4(this.sSimplifiedWord, str_transform.simplifyWord(sSugg)); if (nSimDist < this.nMinDist) { this.nMinDist = nSimDist; } if (nSimDist <= this.nMinDist+1) { let nDist = Math.min(str_transform.distanceDamerauLevenshteinX(this.sWord, sSugg), str_transform.distanceDamerauLevenshteinX(this.sSimplifiedWord, str_transform.simplifyWord(sSugg))); if (sSugg.includes(" ")) { nDist += 1; } this.dAccSugg.set(sSugg, nDist); if (this.dAccSugg.size > this.nTempSuggLimit) { this.nDistLimit = -1; // suggest() ends searching when this variable = -1 } } this.nDistLimit = Math.min(this.nDistLimit, this.nMinDist+1); //console.log(this.dAccSugg); } |
︙ | ︙ |
Modified graphspell-js/str_transform.js from [8e968e40a5] to [fd4fb4b17d].
︙ | ︙ | |||
119 120 121 122 123 124 125 126 127 128 129 130 131 132 | } else { table[i+1][j+1] = 0; } } } return longestCommonSubstring; }, distanceDamerauLevenshtein: function (s1, s2) { // distance of Damerau-Levenshtein between <s1> and <s2> // https://fr.wikipedia.org/wiki/Distance_de_Damerau-Levenshtein try { let nLen1 = s1.length; let nLen2 = s2.length; | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | } else { table[i+1][j+1] = 0; } } } return longestCommonSubstring; }, distanceDamerauLevenshteinX: function (s1, s2) { // distance of Damerau-Levenshtein between <s1> and <s2> // https://fr.wikipedia.org/wiki/Distance_de_Damerau-Levenshtein try { let nLen1 = s1.length; let nLen2 = s2.length; let matrix = []; for (let i = 0; i <= nLen1+1; i++) { matrix[i] = new Array(nLen2 + 2); } for (let i = 0; i <= nLen1+1; i++) { matrix[i][0] = i; } for (let j = 0; j <= nLen2+1; j++) { matrix[0][j] = j; } for (let i = 1; i <= nLen1; i++) { for (let j = 1; j <= nLen2; j++) { //let nCost = (s1[i-1] === s2[j-1]) ? 0 : 1; let nCost = char_player.distanceBetweenChars(s1[i-1], s2[j-1]); matrix[i][j] = Math.min( matrix[i-1][j] + 1, // Deletion matrix[i][j-1] + 1, // Insertion matrix[i-1][j-1] + nCost // Substitution ); if (i > 1 && j > 1 && s1[i] == s2[j-1] && s1[i-1] == s2[j]) { matrix[i][j] = Math.min(matrix[i][j], matrix[i-2][j-2] + nCost); // Transposition } } } return matrix[nLen1][nLen2]; } catch (e) { console.error(e); } }, distanceDamerauLevenshtein: function (s1, s2) { // distance of Damerau-Levenshtein between <s1> and <s2> // https://fr.wikipedia.org/wiki/Distance_de_Damerau-Levenshtein try { let nLen1 = s1.length; let nLen2 = s2.length; |
︙ | ︙ | |||
159 160 161 162 163 164 165 | catch (e) { console.error(e); } }, distanceJaroWinkler: function(a, b, boost = .666) { // https://github.com/thsig/jaro-winkler-JS | | | 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 | catch (e) { console.error(e); } }, distanceJaroWinkler: function(a, b, boost = .666) { // https://github.com/thsig/jaro-winkler-JS if (a == b) { return 1.0; } let a_len = a.length; let b_len = b.length; let a_flag = []; let b_flag = []; let search_range = Math.floor(Math.max(a_len, b_len) / 2) - 1; let minv = Math.min(a_len, b_len); |
︙ | ︙ | |||
302 303 304 305 306 307 308 309 310 | lcss += local_cs; return Math.round(Math.max(l1, l2) - lcss); }, showDistance: function (s1, s2) { console.log(`${s1} ≠ ${s2}`); let nDL = this.distanceDamerauLevenshtein(s1, s2); let nS4 = this.distanceSift4(s1, s2); let fJW = this.distanceJaroWinkler(s1, s2); | > | | 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 | lcss += local_cs; return Math.round(Math.max(l1, l2) - lcss); }, showDistance: function (s1, s2) { console.log(`${s1} ≠ ${s2}`); let nDL = this.distanceDamerauLevenshtein(s1, s2); let fDLX = this.distanceDamerauLevenshteinX(s1, s2); let nS4 = this.distanceSift4(s1, s2); let fJW = this.distanceJaroWinkler(s1, s2); console.log(`DL: ${nDL} DLX: ${fDLX} — S4: ${nS4} — JW: ${fJW}`); }, // Suffix only defineSuffixCode: function (sFlex, sStem) { /* Returns a string defining how to get stem from flexion "n(sfx)" |
︙ | ︙ | |||
391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 | if (typeof(exports) !== 'undefined') { exports.simplifyWord = str_transform.simplifyWord; exports.numbersToExponent = str_transform.numbersToExponent; exports.spellingNormalization = str_transform.spellingNormalization; exports.longestCommonSubstring = str_transform.longestCommonSubstring; exports.distanceDamerauLevenshtein = str_transform.distanceDamerauLevenshtein; exports.distanceJaroWinkler = str_transform.distanceJaroWinkler; exports.showDistance = str_transform.showDistance; exports.changeWordWithSuffixCode = str_transform.changeWordWithSuffixCode; exports.changeWordWithAffixCode = str_transform.changeWordWithAffixCode; exports.defineAffixCode = str_transform.defineAffixCode; exports.defineSuffixCode = str_transform.defineSuffixCode; } | > | 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 | if (typeof(exports) !== 'undefined') { exports.simplifyWord = str_transform.simplifyWord; exports.numbersToExponent = str_transform.numbersToExponent; exports.spellingNormalization = str_transform.spellingNormalization; exports.longestCommonSubstring = str_transform.longestCommonSubstring; exports.distanceDamerauLevenshteinX = str_transform.distanceDamerauLevenshteinX; exports.distanceDamerauLevenshtein = str_transform.distanceDamerauLevenshtein; exports.distanceJaroWinkler = str_transform.distanceJaroWinkler; exports.showDistance = str_transform.showDistance; exports.changeWordWithSuffixCode = str_transform.changeWordWithSuffixCode; exports.changeWordWithAffixCode = str_transform.changeWordWithAffixCode; exports.defineAffixCode = str_transform.defineAffixCode; exports.defineSuffixCode = str_transform.defineSuffixCode; } |