Index: gc_core/js/char_player.js ================================================================== --- gc_core/js/char_player.js +++ gc_core/js/char_player.js @@ -3,10 +3,30 @@ ${map} var char_player = { + + _dTransChars: new Map([ + ['à', 'a'], ['é', 'e'], ['î', 'i'], ['ô', 'o'], ['û', 'u'], ['ÿ', 'y'], + ['â', 'a'], ['è', 'e'], ['ï', 'i'], ['ö', 'o'], ['ù', 'u'], ['ŷ', 'y'], + ['ä', 'a'], ['ê', 'e'], ['í', 'i'], ['ó', 'o'], ['ü', 'u'], ['ý', 'y'], + ['á', 'a'], ['ë', 'e'], ['ì', 'i'], ['ò', 'o'], ['ú', 'u'], ['ỳ', 'y'], + ['ā', 'a'], ['ē', 'e'], ['ī', 'i'], ['ō', 'o'], ['ū', 'u'], ['ȳ', 'y'], + ['ñ', 'n'], + ['œ', 'oe'], ['æ', 'ae'], + ]), + + cleanWord: function (sWord) { + // word simplication before calculating distance between words + sWord = sWord.toLowerCase(); + let sRes = ""; + for (let c of sWord) { + sRes += this._dTransChars.gl_get(c, c); + } + return sWord; + }, distanceDamerauLevenshtein: function (s1, s2) { // distance of Damerau-Levenshtein between and // https://fr.wikipedia.org/wiki/Distance_de_Damerau-Levenshtein try { @@ -54,11 +74,11 @@ 'á', 'ë', 'ì', 'ò', 'ú', 'ỳ', 'ā', 'ē', 'ī', 'ō', 'ū', 'ȳ', 'h', 'œ', 'æ' ]), - clearWord: function (sWord) { + shrinkWord: function (sWord) { // remove vovels and h let sRes = ""; for (let cChar of sWord.slice(1)) { if (!this.aVovels.has(cChar)) { sRes += cChar; Index: gc_core/js/ibdawg.js ================================================================== --- gc_core/js/ibdawg.js +++ gc_core/js/ibdawg.js @@ -201,20 +201,21 @@ } else if (sWord.gl_isLowerCase()) { aSugg.gl_update(this._suggest(sWord.gl_toCapitalize(), nMaxDel, nMaxHardRepl)); } if (aSugg.size == 0) { - aSugg.gl_update(this._suggestWithCrushedUselessChars(char_player.clearWord(sWord))); + aSugg.gl_update(this._suggestWithCrushedUselessChars(char_player.shrinkWord(sWord))); } // Set to Array aSugg = Array.from(aSugg); aSugg = aSugg.filter((sSugg) => { return !sSugg.endsWith("è") && !sSugg.endsWith("È"); }); // fr language if (sWord.gl_isTitle()) { aSugg = aSugg.map((sSugg) => { return sSugg.gl_toCapitalize(); }); } let dDistTemp = new Map(); - aSugg.forEach((sSugg) => { dDistTemp.set(sSugg, char_player.distanceDamerauLevenshtein(sWord, sSugg)); }); + let sCleanWord = char_player.cleanWord(sWord) + aSugg.forEach((sSugg) => { dDistTemp.set(sSugg, char_player.distanceDamerauLevenshtein(sCleanWord, char_player.cleanWord(sSugg))); }); aSugg = aSugg.sort((sA, sB) => { return dDistTemp.get(sA) - dDistTemp.get(sB); }).slice(0, nMaxSugg); dDistTemp.clear(); if (sSfx || sPfx) { // we add what we removed return aSugg.map( (sSugg) => { return sPfx + sSugg + sSfx } ); Index: gc_core/py/char_player.py ================================================================== --- gc_core/py/char_player.py +++ gc_core/py/char_player.py @@ -1,10 +1,25 @@ # list of similar chars # useful for suggestion mechanism import re + +_xTransChars = str.maketrans({ + 'à': 'a', 'é': 'e', 'î': 'i', 'ô': 'o', 'û': 'u', 'ÿ': 'y', + 'â': 'a', 'è': 'e', 'ï': 'i', 'ö': 'o', 'ù': 'u', 'ŷ': 'y', + 'ä': 'a', 'ê': 'e', 'í': 'i', 'ó': 'o', 'ü': 'u', 'ý': 'y', + 'á': 'a', 'ë': 'e', 'ì': 'i', 'ò': 'o', 'ú': 'u', 'ỳ': 'y', + 'ā': 'a', 'ē': 'e', 'ī': 'i', 'ō': 'o', 'ū': 'u', 'ȳ': 'y', + 'ñ': 'n', + 'œ': 'oe', 'æ': 'ae', +}) + +def cleanWord (sWord): + "word simplication before calculating distance between words" + return sWord.lower().translate(_xTransChars) + def distanceDamerauLevenshtein (s1, s2): "distance of Damerau-Levenshtein between and " # https://fr.wikipedia.org/wiki/Distance_de_Damerau-Levenshtein d = {} @@ -43,11 +58,11 @@ aVovels = frozenset(_dVovels.keys()) -def clearWord (sWord): +def shrinkWord (sWord): "remove vovels and h" return sWord[0:1].replace("h", "") + sWord[1:].translate(_xTransVovels) # Similar chars Index: gc_core/py/ibdawg.py ================================================================== --- gc_core/py/ibdawg.py +++ gc_core/py/ibdawg.py @@ -198,13 +198,14 @@ aSugg = set(map(lambda sSugg: sSugg.title(), aSugg)) elif sWord.islower(): aSugg.update(self._suggest(sWord.title(), nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)) if not aSugg: #print("crush useless chars") - aSugg.update(self._suggestWithCrushedUselessChars(cp.clearWord(sWord))) + aSugg.update(self._suggestWithCrushedUselessChars(cp.shrinkWord(sWord))) aSugg = cp.filterSugg(aSugg) - aSugg = sorted(aSugg, key=lambda sSugg: cp.distanceDamerauLevenshtein(sWord, sSugg))[:nMaxSugg] + sCleanWord = cp.cleanWord(sWord) + aSugg = sorted(aSugg, key=lambda sSugg: cp.distanceDamerauLevenshtein(sCleanWord, cp.cleanWord(sSugg)))[:nMaxSugg] if sSfx or sPfx: # we add what we removed return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg)) return aSugg