Index: gc_core/js/char_player.js ================================================================== --- gc_core/js/char_player.js +++ gc_core/js/char_player.js @@ -17,20 +17,26 @@ ]), cleanWord: function (sWord) { // word simplication before calculating distance between words sWord = sWord.toLowerCase(); - let sRes = ""; + let sNewWord = ""; + let i = 1; for (let c of sWord) { - sRes += this._dTransChars.gl_get(c, c); + let cNew = this._dTransChars.gl_get(c, c); + let cNext = sWord.slice(i, i+1) + if (cNew != this._dTransChars.gl_get(cNext, cNext)) { + sNewWord += cNew; + } + i++; } - return sRes.replace("eau", "o").replace("au", "o"); + return sNewWord.replace("eau", "o").replace("au", "o").replace("ai", "e").replace("ei", "e"); }, aVowel: new Set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ"), aConsonant: new Set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ"), - aDouble: new Set("bcçdfjklmnprstzBCÇDFJKLMNPRSTZ"), // letters that may be used twice successively + aDouble: new Set("bcdfjklmnprstzBCDFJKLMNPRSTZ"), // letters that may be used twice successively // Similar chars d1to1: new Map([ Index: gc_core/py/char_player.py ================================================================== --- gc_core/py/char_player.py +++ gc_core/py/char_player.py @@ -14,16 +14,21 @@ 'œ': 'oe', 'æ': 'ae', }) def cleanWord (sWord): "word simplication before calculating distance between words" - return sWord.lower().translate(_xTransChars).replace("eau", "o").replace("au", "o") + sWord = sWord.lower().translate(_xTransChars) + sNewWord = "" + for i, c in enumerate(sWord, 1): + if c != sWord[i:i+1]: + sNewWord += c + return sNewWord.replace("eau", "o").replace("au", "o").replace("ai", "e").replace("ei", "e") aVowel = set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ") aConsonant = set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ") -aDouble = set("bcçdfjklmnprstzBCÇDFJKLMNPRSTZ") # letters that may be used twice successively +aDouble = set("bcdfjklmnprstzBCDFJKLMNPRSTZ") # letters that may be used twice successively # Similar chars d1to1 = {