Index: gc_core/js/char_player.js ================================================================== --- gc_core/js/char_player.js +++ gc_core/js/char_player.js @@ -316,25 +316,31 @@ ["oi", ["ois", "oit", "oix"]], ["OI", ["OIS", "OIT", "OIX"]], ]), - // Préfixes + // Préfixes et suffixes aPfx1: new Set([ "anti", "archi", "contre", "hyper", "mé", "méta", "im", "in", "ir", "par", "proto", "pseudo", "pré", "re", "ré", "sans", "sous", "supra", "sur", "ultra" ]), aPfx2: new Set([ "belgo", "franco", "génito", "gynéco", "médico", "russo" ]), - aExcludedSfx: new Set([ - "je", "tu", "il", "elle", "on", "t-il", "t-elle", "t-on", "nous", "vous", "ils", "elles" - ]) + + cut: function (sWord) { + // returns an arry of strings (prefix, trimed_word, suffix) + let m = /^([a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st]+)(-(?:t-|)(?:ils?|elles|on|je|tu|nous|vous)$)/.exec(sWord); + if (m) { + return ["", m[1], m[2]]; + } + return ["", sWord, ""]; + }, + + // Other functions + filterSugg: function (aSugg) { + return aSugg.filter((sSugg) => { return !sSugg.endsWith("è") && !sSugg.endsWith("È"); }); + } } - - - - - Index: gc_core/js/ibdawg.js ================================================================== --- gc_core/js/ibdawg.js +++ gc_core/js/ibdawg.js @@ -188,18 +188,13 @@ return l; } suggest (sWord, nMaxSugg=10) { // returns a array of suggestions for - let sAdd = ""; - if (sWord.includes("-")) { - let nLastHyphenPos = sWord.lastIndexOf("-"); - if (char_player.aExcludedSfx.has(sWord.slice(nLastHyphenPos+1))) { - sAdd = sWord.slice(nLastHyphenPos); - sWord = sWord.slice(0, nLastHyphenPos); - } - } + let sPfx = ""; + let sSfx = ""; + [sPfx, sWord, sSfx] = char_player.cut(sWord); let nMaxDel = Math.floor(sWord.length / 5); let nMaxHardRepl = Math.max(Math.floor((sWord.length - 5) / 4), 1); let aSugg = this._suggest(sWord, nMaxDel, nMaxHardRepl); if (sWord.gl_isTitle()) { aSugg.gl_update(this._suggest(sWord.toLowerCase(), nMaxDel, nMaxHardRepl)); @@ -218,13 +213,13 @@ } let dDistTemp = new Map(); aSugg.forEach((sSugg) => { dDistTemp.set(sSugg, char_player.distanceDamerauLevenshtein(sWord, sSugg)); }); aSugg = aSugg.sort((sA, sB) => { return dDistTemp.get(sA) - dDistTemp.get(sB); }).slice(0, nMaxSugg); dDistTemp.clear(); - if (sAdd) { + if (sSfx || sPfx) { // we add what we removed - return aSugg.map( (sSugg) => { return sSugg + sAdd } ); + return aSugg.map( (sSugg) => { return sPfx + sSugg + sSfx } ); } return aSugg; } _suggest (sRemain, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=false) { Index: gc_core/py/char_player.py ================================================================== --- gc_core/py/char_player.py +++ gc_core/py/char_player.py @@ -1,8 +1,10 @@ # list of similar chars # useful for suggestion mechanism +import re + def distanceDamerauLevenshtein (s1, s2): "distance of Damerau-Levenshtein between and " # https://fr.wikipedia.org/wiki/Distance_de_Damerau-Levenshtein d = {} @@ -298,18 +300,31 @@ "oi": ("ois", "oit", "oix"), "OI": ("OIS", "OIT", "OIX"), } -# Préfixes +# Préfixes et suffixes aPfx1 = frozenset([ "anti", "archi", "contre", "hyper", "mé", "méta", "im", "in", "ir", "par", "proto", "pseudo", "pré", "re", "ré", "sans", "sous", "supra", "sur", "ultra" ]) aPfx2 = frozenset([ "belgo", "franco", "génito", "gynéco", "médico", "russo" ]) -aExcludedSfx = frozenset([ - "je", "tu", "il", "elle", "on", "t-il", "t-elle", "t-on", "nous", "vous", "ils", "elles" -]) + +_zMotAvecPronom = re.compile("^(?i)(\\w+)(-(?:t-|)(?:ils?|elles?|on|je|tu|nous|vous))$") + +def cut (sWord): + "returns a tuple of strings (prefix, trimed_word, suffix)" + m = _zMotAvecPronom.search(sWord) + if m: + return ("", m.group(1), m.group(2)) + return ("", sWord, "") + + +# Other functions + +def filterSugg (aSugg): + "exclude suggestions" + return filter(lambda sSugg: not sSugg.endswith(("è", "È")), aSugg) Index: gc_core/py/ibdawg.py ================================================================== --- gc_core/py/ibdawg.py +++ gc_core/py/ibdawg.py @@ -187,16 +187,11 @@ l.extend(self.morph(sWord.capitalize())) return l def suggest (self, sWord, nMaxSugg=10): "returns a set of suggestions for " - sAdd = "" - if "-" in sWord: - nLastHyphenPos = sWord.rfind("-") - if sWord[nLastHyphenPos+1:] in cp.aExcludedSfx: - sAdd = sWord[nLastHyphenPos:] - sWord = sWord[:nLastHyphenPos] + sPfx, sWord, sSfx = cp.cut(sWord) nMaxDel = len(sWord) // 5 nMaxHardRepl = max((len(sWord) - 5) // 4, 1) aSugg = self._suggest(sWord, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl) if sWord.istitle(): aSugg.update(self._suggest(sWord.lower(), nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)) @@ -204,15 +199,15 @@ elif sWord.islower(): aSugg.update(self._suggest(sWord.title(), nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)) if not aSugg: #print("crush useless chars") aSugg.update(self._suggestWithCrushedUselessChars(cp.clearWord(sWord))) - aSugg = filter(lambda sSugg: not sSugg.endswith(("è", "È")), aSugg) # fr language + aSugg = cp.filterSugg(aSugg) aSugg = sorted(aSugg, key=lambda sSugg: cp.distanceDamerauLevenshtein(sWord, sSugg))[:nMaxSugg] - if sAdd: + if sSfx or sPfx: # we add what we removed - return list(map(lambda sSug: sSug+sAdd, aSugg)) + return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg)) return aSugg def _suggest (self, sRemain, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False): "returns a set of suggestions" # recursive function