Index: graphspell-js/char_player.js ================================================================== --- graphspell-js/char_player.js +++ graphspell-js/char_player.js @@ -6,39 +6,43 @@ ${map} var char_player = { - + /* + oDistanceBetweenChars: + - with Jaro-Winkler, values between 1 and 10 + - with Damerau-Levenshtein, values / 10 (between 0 and 1: 0.1, 0.2 ... 0.9) + */ oDistanceBetweenChars: { - "a": {}, - "e": {"é": 0.5}, - "é": {"e": 0.5}, - "i": {"y": 0.2}, - "o": {}, - "u": {}, - "y": {"i": 0.3}, - "b": {"d": 0.8, "h": 0.9}, - "c": {"ç": 0.1, "k": 0.5, "q": 0.5, "s": 0.5, "x": 0.5, "z": 0.8}, - "d": {"b": 0.8}, - "f": {"v": 0.8}, - "g": {"j": 0.5}, - "h": {"b": 0.9}, - "j": {"g": 0.5, "i": 0.9}, - "k": {"c": 0.5, "q": 0.1, "x": 0.5}, - "l": {"i": 0.9}, - "m": {"n": 0.8}, - "n": {"m": 0.8, "r": 0.9}, - "p": {"q": 0.9}, - "q": {"c": 0.5, "k": 0.1, "p": 0.9}, - "r": {"n": 0.9, "j": 0.9}, - "s": {"c": 0.5, "ç": 0.1, "x": 0.5, "z": 0.5}, - "t": {"d": 0.9}, - "v": {"f": 0.8, "w": 0.1}, - "w": {"v": 0.1}, - "x": {"c": 0.5, "k": 0.5, "q": 0.5, "s": 0.5}, - "z": {"s": 0.5} + //"a": {}, + "e": {"é": 5}, + //"é": {"e": 5}, + "i": {"y": 2}, + //"o": {}, + //"u": {}, + "y": {"i": 3}, + "b": {"d": 8, "h": 9}, + "c": {"ç": 1, "k": 5, "q": 5, "s": 5, "x": 5, "z": 8}, + "d": {"b": 8}, + "f": {"v": 8}, + "g": {"j": 5}, + "h": {"b": 9}, + "j": {"g": 5, "i": 9}, + "k": {"c": 5, "q": 1, "x": 5}, + "l": {"i": 9}, + "m": {"n": 8}, + "n": {"m": 8, "r": 9}, + "p": {"q": 9}, + "q": {"c": 5, "k": 1, "p": 9}, + "r": {"n": 9, "j": 9}, + "s": {"c": 5, "ç": 1, "x": 5, "z": 5}, + "t": {"d": 9}, + "v": {"f": 8, "w": 1}, + "w": {"v": 1}, + "x": {"c": 5, "k": 5, "q": 5, "s": 5}, + "z": {"s": 5} }, distanceBetweenChars: function (c1, c2) { if (c1 == c2) { return 0; Index: graphspell-js/ibdawg.js ================================================================== --- graphspell-js/ibdawg.js +++ graphspell-js/ibdawg.js @@ -20,78 +20,103 @@ class SuggResult { // Structure for storing, classifying and filtering suggestions - constructor (sWord, nDistLimit=-1) { + constructor (sWord, nSuggLimit=10, nDistLimit=-1) { this.sWord = sWord; this.sSimplifiedWord = str_transform.simplifyWord(sWord); this.nDistLimit = (nDistLimit >= 0) ? nDistLimit : Math.floor(sWord.length / 3) + 1; this.nMinDist = 1000; - this.aSugg = new Set(); - this.dSugg = new Map([ [0, []], [1, []], [2, []] ]); - this.aAllSugg = new Set(); // all found words even those refused + // Temporary sets + this.aAllSugg = new Set(); // All suggestions, even the one rejected + this.dGoodSugg = new Map(); // Acceptable suggestions + this.dBestSugg = new Map(); // Best suggestions + // Parameters + this.nSuggLimit = nSuggLimit; + this.nSuggLimitExt = nSuggLimit + 2; // we add few entries in case suggestions merge after casing modifications + this.nBestSuggLimit = Math.floor(nSuggLimit * 1.5); // n times the requested limit + this.nGoodSuggLimit = nSuggLimit * 15; // n times the requested limit } - addSugg (sSugg, nDeep=0) { + addSugg (sSugg) { // add a suggestion if (this.aAllSugg.has(sSugg)) { return; } this.aAllSugg.add(sSugg); - if (!this.aSugg.has(sSugg)) { - //let nDist = Math.floor(str_transform.distanceDamerauLevenshtein(this.sSimplifiedWord, str_transform.simplifyWord(sSugg))); - let nDist = Math.floor((1 - str_transform.distanceJaroWinkler(this.sSimplifiedWord, str_transform.simplifyWord(sSugg)))*10); - if (nDist <= this.nDistLimit) { - if (sSugg.includes(" ")) { // add 1 to distance for split suggestions - nDist += 1; - } - if (!this.dSugg.has(nDist)) { - this.dSugg.set(nDist, []); - } - this.dSugg.get(nDist).push(sSugg); - this.aSugg.add(sSugg); - if (nDist < this.nMinDist) { - this.nMinDist = nDist; - } - this.nDistLimit = Math.min(this.nDistLimit, this.nMinDist+1); - } + // jaro 0->1 1 les chaines sont égale + let nDistJaro = 1 - str_transform.distanceJaroWinkler(this.sSimplifiedWord, str_transform.simplifyWord(sSugg)); + let nDist = Math.floor(nDistJaro * 10); + if (nDistJaro < .11) { // Best suggestions + this.dBestSugg.set(sSugg, Math.round(nDistJaro*1000)); + if (this.dBestSugg.size > this.nBestSuggLimit) { + this.nDistLimit = -1; // make suggest() to end search + } + } else if (nDistJaro < .33) { // Good suggestions + this.dGoodSugg.set(sSugg, Math.round(nDistJaro*1000)); + if (this.dGoodSugg.size > this.nGoodSuggLimit) { + this.nDistLimit = -1; // make suggest() to end search + } + } else { + if (nDist < this.nMinDist) { + this.nMinDist = nDist; + } + this.nDistLimit = Math.min(this.nDistLimit, this.nMinDist); + } + if (nDist <= this.nDistLimit) { + if (nDist < this.nMinDist) { + this.nMinDist = nDist; + } + this.nDistLimit = Math.min(this.nDistLimit, this.nMinDist+1); } } - getSuggestions (nSuggLimit=10) { + getSuggestions () { // return a list of suggestions let lRes = []; - let bFirstListSorted = false; - for (let [nDist, lSugg] of this.dSugg.entries()) { - if (nDist > this.nDistLimit) { - break; - } - if (!bFirstListSorted && lSugg.length > 1) { - //lRes.sort((a, b) => { return str_transform.distanceDamerauLevenshtein(this.sWord, a) - str_transform.distanceDamerauLevenshtein(this.sWord, b); }); - lRes.sort((a, b) => { return str_transform.distanceJaroWinkler(this.sWord, b) - str_transform.distanceJaroWinkler(this.sWord, a); }); - bFirstListSorted = true; - } - lRes.push(...lSugg); - if (lRes.length > nSuggLimit) { - break; - } - } + if (this.dBestSugg.size > 0) { + // sort only with simplified words + let lResTmp = [...this.dBestSugg.entries()].sort((a, b) => { return a[1] - b[1]; }); + let nSize = Math.min(this.nSuggLimitExt, lResTmp.length); + for (let i=0; i < nSize; i++){ + lRes.push(lResTmp[i][0]); + } + } + if (lRes.length < this.nSuggLimitExt) { + // sort with simplified words and original word + let lResTmp = [...this.dGoodSugg.entries()].sort((a, b) => { + // Low precision to rely more on simplified words + let nJaroA = Math.round(str_transform.distanceJaroWinkler(this.sWord, a[0]) * 10); + let nJaroB = Math.round(str_transform.distanceJaroWinkler(this.sWord, b[0]) * 10); + if (nJaroA == nJaroB) { + return a[1] - b[1]; // warning: both lists are NOT sorted the same way (key: a-b) + } else { + return nJaroB - nJaroA; // warning: both lists are NOT sorted the same way (key: b-a) + } + }).slice(0, this.nSuggLimitExt); + let nSize = Math.min(this.nSuggLimitExt, lResTmp.length); + for (let i=0; i < nSize; i++){ + lRes.push(lResTmp[i][0]); + } + } + // casing if (this.sWord.gl_isUpperCase()) { lRes = lRes.map((sSugg) => { return sSugg.toUpperCase(); }); lRes = [...new Set(lRes)]; } else if (this.sWord.slice(0,1).gl_isUpperCase()) { lRes = lRes.map((sSugg) => { return sSugg.slice(0,1).toUpperCase() + sSugg.slice(1); }); lRes = [...new Set(lRes)]; } - return lRes.slice(0, nSuggLimit); + return lRes.slice(0, this.nSuggLimit); } reset () { - this.aSugg.clear(); this.dSugg.clear(); + this.dGoodSugg.clear(); + this.dBestSugg.clear(); } } class IBDAWG { @@ -329,17 +354,18 @@ } let nMaxSwitch = Math.max(Math.floor(sWord.length / 3), 1); let nMaxDel = Math.floor(sWord.length / 5); let nMaxHardRepl = Math.max(Math.floor((sWord.length - 5) / 4), 1); let nMaxJump = Math.max(Math.floor(sWord.length / 4), 1); - let oSuggResult = new SuggResult(sWord); + let oSuggResult = new SuggResult(sWord, nSuggLimit); + let sWord = str_transform.cleanWord(sWord); if (bSplitTrailingNumbers) { this._splitTrailingNumbers(oSuggResult, sWord); } this._splitSuggest(oSuggResult, sWord); this._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump); - let aSugg = oSuggResult.getSuggestions(nSuggLimit); + let aSugg = oSuggResult.getSuggestions(); if (this.lexicographer) { aSugg = this.lexicographer.filterSugg(aSugg); } if (sSfx || sPfx) { // we add what we removed Index: graphspell-js/str_transform.js ================================================================== --- graphspell-js/str_transform.js +++ graphspell-js/str_transform.js @@ -64,10 +64,23 @@ } i++; } return sNewWord.replace(/eau/g, "o").replace(/au/g, "o").replace(/ai/g, "éi").replace(/ei/g, "é").replace(/ph/g, "f"); }, + + cleanWord: function (sWord) { + // word clean for the user who make commun and preditive error help suggest + // remove letters repeated more than 2 times + if (sWord.match(/(.)(\1){2,}/igm)){ + sWord = sWord.replace(/(.*)(.)(.\2)/igm,'$1$2').replace(/(.)(\1)+/igm,'$1$1'); + } + // words ending with -ik -> replace with -ique + if (sWord.match(/ik$/ig)){ + sWord = sWord.replace(/(.*)ik$/ig,'$1ique'); + } + return sWord; + }, _xTransNumbersToExponent: new Map([ ["0", "⁰"], ["1", "¹"], ["2", "²"], ["3", "³"], ["4", "⁴"], ["5", "⁵"], ["6", "⁶"], ["7", "⁷"], ["8", "⁸"], ["9", "⁹"] ]), @@ -207,11 +220,11 @@ for (let i = 0; i < a_len; i++) { if (!a_flag[i]) { for (let j = 0; j < b_len; j++) { if (!b_flag[j]) { if (adjwt[a[i]] && adjwt[a[i]][b[j]]) { - N_simi += adjwt[a[i]][b[j]] * 10; // le fois 10 est un ajustement pour que ça fonctionne bien dans cette fonction + N_simi += adjwt[a[i]][b[j]]; b_flag[j] = 2; break; } } }