Index: graphspell-js/char_player.js ================================================================== --- graphspell-js/char_player.js +++ graphspell-js/char_player.js @@ -12,37 +12,68 @@ oDistanceBetweenChars: - with Jaro-Winkler, values between 1 and 10 - with Damerau-Levenshtein, values / 10 (between 0 and 1: 0.1, 0.2 ... 0.9) */ oDistanceBetweenChars: { - //"a": {}, - "e": {"é": 5}, - //"é": {"e": 5}, - "i": {"y": 2}, - //"o": {}, - //"u": {}, - "y": {"i": 3}, - "b": {"d": 8, "h": 9}, - "c": {"ç": 1, "k": 5, "q": 5, "s": 5, "x": 5, "z": 8}, - "d": {"b": 8}, - "f": {"v": 8}, - "g": {"j": 5}, - "h": {"b": 9}, - "j": {"g": 5, "i": 9}, - "k": {"c": 5, "q": 1, "x": 5}, - "l": {"i": 9}, - "m": {"n": 8}, - "n": {"m": 8, "r": 9}, - "p": {"q": 9}, - "q": {"c": 5, "k": 1, "p": 9}, - "r": {"n": 9, "j": 9}, - "s": {"c": 5, "ç": 1, "x": 5, "z": 5}, - "t": {"d": 9}, - "v": {"f": 8, "w": 1}, - "w": {"v": 1}, - "x": {"c": 5, "k": 5, "q": 5, "s": 5}, - "z": {"s": 5} + "a": { "a": 0, "á": .1, "à": .1, "â": .1, "ã": .1 }, + "á": { "a": .1, "á": 0, "à": .1, "â": .1, "ã": .1 }, + "à": { "a": .1, "á": .1, "à": 0, "â": .1, "ã": .1 }, + "â": { "a": .1, "á": .1, "à": .1, "â": 0, "ã": .1 }, + "ã": { "a": .1, "á": .1, "à": .1, "â": .1, "ã": 0 }, + + "e": { "e": 0, "é": .1, "è": .1, "ê": .1, "ẽ": .1 }, + "é": { "e": .1, "é": 0, "è": .1, "ê": .1, "ẽ": .1 }, + "è": { "e": .1, "é": .1, "è": 0, "ê": .1, "ẽ": .1 }, + "ê": { "e": .1, "é": .1, "è": .1, "ê": 0, "ẽ": .1 }, + "ẽ": { "e": .1, "é": .1, "è": .1, "ê": .1, "ẽ": 0 }, + + "i": { "i": 0, "í": .1, "ì": .1, "î": .1, "ĩ": .1 }, + "í": { "i": .1, "í": 0, "ì": .1, "î": .1, "ĩ": .1 }, + "ì": { "i": .1, "í": .1, "ì": 0, "î": .1, "ĩ": .1 }, + "î": { "i": .1, "í": .1, "ì": .1, "î": 0, "ĩ": .1 }, + "ĩ": { "i": .1, "í": .1, "ì": .1, "î": .1, "ĩ": 0 }, + + "o": { "o": 0, "ó": .1, "ò": .1, "ô": .1, "õ": .1 }, + "ó": { "o": .1, "ó": 0, "ò": .1, "ô": .1, "õ": .1 }, + "ò": { "o": .1, "ó": .1, "ò": 0, "ô": .1, "õ": .1 }, + "ô": { "o": .1, "ó": .1, "ò": .1, "ô": 0, "õ": .1 }, + "õ": { "o": .1, "ó": .1, "ò": .1, "ô": .1, "õ": 0 }, + + "u": { "u": 0, "ú": .1, "ù": .1, "û": .1, "ũ": .1 }, + "ú": { "u": .1, "ú": 0, "ù": .1, "û": .1, "ũ": .1 }, + "ù": { "u": .1, "ú": .1, "ù": 0, "û": .1, "ũ": .1 }, + "û": { "u": .1, "ú": .1, "ù": .1, "û": 0, "ũ": .1 }, + "ũ": { "u": .1, "ú": .1, "ù": .1, "û": .1, "ũ": 0 }, + + "y": { "y": 0, "ý": .1, "ỳ": .1, "ŷ": .1, "ỹ": .1 }, + "ý": { "y": .1, "ý": 0, "ỳ": .1, "ŷ": .1, "ỹ": .1 }, + "ỳ": { "y": .1, "ý": .1, "ỳ": 0, "ŷ": .1, "ỹ": .1 }, + "ŷ": { "y": .1, "ý": .1, "ỳ": .1, "ŷ": 0, "ỹ": .1 }, + "ỹ": { "y": .1, "ý": .1, "ỳ": .1, "ŷ": .1, "ỹ": 0 }, + + // consonnes + "b": { "b": 0, "d": .8, "h": .9 }, + "c": { "c": 0, "ç": .1, "k": .5, "q": .5, "s": .5, "x": .5, "z": .8 }, + "ç": { "c": .1, "ç": 0, "k": .5, "q": .5, "s": .5, "x": .5, "z": .8 }, + "d": { "d": 0, "b": .8 }, + "f": { "f": 0, "v": .8 }, + "g": { "g": 0, "j": .5, "q": .8 }, + "h": { "h": 0, "b": .9 }, + "j": { "j": 0, "g": .5, "i": .8 }, + "k": { "k": 0, "c": .5, "q": .1, "x": .5 }, + "l": { "l": 0, "i": .8 }, + "m": { "m": 0, "n": .6 }, + "n": { "n": 0, "ñ": .1, "m": .6, "r": .8 }, + "p": { "p": 0, "q": .8 }, + "q": { "q": 0, "c": .5, "k": .1, "p": .8, "g": .8 }, + "r": { "r": 0, "n": .8, "j": .9 }, + "s": { "s": 0, "c": .5, "ç": .1, "x": .5, "z": .5 }, + "t": { "t": 0, "d": .9 }, + "v": { "v": 0, "f": .8, "w": .2 }, + "w": { "w": 0, "v": .2 }, + "x": { "x": 0, "c": .5, "k": .5, "q": .5, "s": .5 }, + "z": { "z": 0, "s": .5 } }, distanceBetweenChars: function (c1, c2) { if (c1 == c2) { return 0; Index: graphspell-js/ibdawg.js ================================================================== --- graphspell-js/ibdawg.js +++ graphspell-js/ibdawg.js @@ -45,12 +45,13 @@ let nSimDist = str_transform.distanceSift4(this.sSimplifiedWord, str_transform.simplifyWord(sSugg)); if (nSimDist < this.nMinDist) { this.nMinDist = nSimDist; } if (nSimDist <= this.nMinDist+1) { - let nDist = Math.min(str_transform.distanceDamerauLevenshtein(this.sWord, sSugg), str_transform.distanceDamerauLevenshtein(this.sSimplifiedWord, str_transform.simplifyWord(sSugg))); - this.dAccSugg.set(sSugg, Math.min(nDist, nSimDist+1)); + let nDist = Math.min(str_transform.distanceDamerauLevenshteinX(this.sWord, sSugg), str_transform.distanceDamerauLevenshteinX(this.sSimplifiedWord, str_transform.simplifyWord(sSugg))); + if (sSugg.includes(" ")) { nDist += 1; } + this.dAccSugg.set(sSugg, nDist); if (this.dAccSugg.size > this.nTempSuggLimit) { this.nDistLimit = -1; // suggest() ends searching when this variable = -1 } } this.nDistLimit = Math.min(this.nDistLimit, this.nMinDist+1); Index: graphspell-js/str_transform.js ================================================================== --- graphspell-js/str_transform.js +++ graphspell-js/str_transform.js @@ -121,10 +121,47 @@ } } } return longestCommonSubstring; }, + + distanceDamerauLevenshteinX: function (s1, s2) { + // distance of Damerau-Levenshtein between and + // https://fr.wikipedia.org/wiki/Distance_de_Damerau-Levenshtein + try { + let nLen1 = s1.length; + let nLen2 = s2.length; + let matrix = []; + for (let i = 0; i <= nLen1+1; i++) { + matrix[i] = new Array(nLen2 + 2); + } + for (let i = 0; i <= nLen1+1; i++) { + matrix[i][0] = i; + } + for (let j = 0; j <= nLen2+1; j++) { + matrix[0][j] = j; + } + for (let i = 1; i <= nLen1; i++) { + for (let j = 1; j <= nLen2; j++) { + //let nCost = (s1[i-1] === s2[j-1]) ? 0 : 1; + let nCost = char_player.distanceBetweenChars(s1[i-1], s2[j-1]); + matrix[i][j] = Math.min( + matrix[i-1][j] + 1, // Deletion + matrix[i][j-1] + 1, // Insertion + matrix[i-1][j-1] + nCost // Substitution + ); + if (i > 1 && j > 1 && s1[i] == s2[j-1] && s1[i-1] == s2[j]) { + matrix[i][j] = Math.min(matrix[i][j], matrix[i-2][j-2] + nCost); // Transposition + } + } + } + return matrix[nLen1][nLen2]; + } + catch (e) { + console.error(e); + } + }, distanceDamerauLevenshtein: function (s1, s2) { // distance of Damerau-Levenshtein between and // https://fr.wikipedia.org/wiki/Distance_de_Damerau-Levenshtein try { @@ -161,11 +198,11 @@ } }, distanceJaroWinkler: function(a, b, boost = .666) { // https://github.com/thsig/jaro-winkler-JS - //if (a == b) { return 1.0; } + if (a == b) { return 1.0; } let a_len = a.length; let b_len = b.length; let a_flag = []; let b_flag = []; let search_range = Math.floor(Math.max(a_len, b_len) / 2) - 1; @@ -304,13 +341,14 @@ }, showDistance: function (s1, s2) { console.log(`${s1} ≠ ${s2}`); let nDL = this.distanceDamerauLevenshtein(s1, s2); + let fDLX = this.distanceDamerauLevenshteinX(s1, s2); let nS4 = this.distanceSift4(s1, s2); let fJW = this.distanceJaroWinkler(s1, s2); - console.log(`DL: ${nDL} — S4: ${nS4} — JW: ${fJW}`); + console.log(`DL: ${nDL} DLX: ${fDLX} — S4: ${nS4} — JW: ${fJW}`); }, // Suffix only defineSuffixCode: function (sFlex, sStem) { /* @@ -393,13 +431,14 @@ if (typeof(exports) !== 'undefined') { exports.simplifyWord = str_transform.simplifyWord; exports.numbersToExponent = str_transform.numbersToExponent; exports.spellingNormalization = str_transform.spellingNormalization; exports.longestCommonSubstring = str_transform.longestCommonSubstring; + exports.distanceDamerauLevenshteinX = str_transform.distanceDamerauLevenshteinX; exports.distanceDamerauLevenshtein = str_transform.distanceDamerauLevenshtein; exports.distanceJaroWinkler = str_transform.distanceJaroWinkler; exports.showDistance = str_transform.showDistance; exports.changeWordWithSuffixCode = str_transform.changeWordWithSuffixCode; exports.changeWordWithAffixCode = str_transform.changeWordWithAffixCode; exports.defineAffixCode = str_transform.defineAffixCode; exports.defineSuffixCode = str_transform.defineSuffixCode; }