Index: gc_core/py/char_player.py ================================================================== --- gc_core/py/char_player.py +++ gc_core/py/char_player.py @@ -1,28 +1,54 @@ # list of similar chars # useful for suggestion mechanism + +# distance between words +def distanceBetweenWords (s1, s2): + "distance of Damerau-Levenshtein between and " + # https://fr.wikipedia.org/wiki/Distance_de_Damerau-Levenshtein + d = {} + nLen1 = len(s1) + nLen2 = len(s2) + for i in range(-1, nLen1+1): + d[i, -1] = i + 1 + for j in range(-1, nLen2+1): + d[-1, j] = j + 1 + for i in range(nLen1): + for j in range(nLen2): + nCost = 0 if s1[i] == s2[j] else 1 + d[i, j] = min( + d[i-1, j] + 1, # Deletion + d[i, j-1] + 1, # Insertion + d[i-1, j-1] + nCost, # Substitution + ) + if i and j and s1[i] == s2[j-1] and s1[i-1] == s2[j]: + d[i, j] = min(d[i, j], d[i-2, j-2] + nCost) # Transposition + return d[nLen1-1, nLen2-1] + # Method: Remove Useless Chars -_dUselessChar = { +_dVovels = { 'a': '', 'e': '', 'i': '', 'o': '', 'u': '', 'y': '', 'à': '', 'é': '', 'î': '', 'ô': '', 'û': '', 'ÿ': '', 'â': '', 'è': '', 'ï': '', 'ö': '', 'ù': '', 'ŷ': '', 'ä': '', 'ê': '', 'í': '', 'ó': '', 'ü': '', 'ý': '', 'á': '', 'ë': '', 'ì': '', 'ò': '', 'ú': '', 'ỳ': '', 'ā': '', 'ē': '', 'ī': '', 'ō': '', 'ū': '', 'ȳ': '', 'h': '', 'œ': '', 'æ': '' } -_CHARMAP = str.maketrans(_dUselessChar) +_xTransVovels = str.maketrans(_dVovels) + -aUselessChar = frozenset(_dUselessChar.keys()) +aVovels = frozenset(_dVovels.keys()) + def clearWord (sWord): "remove vovels and h" - return sWord.translate(_CHARMAP) + return sWord[0:1].replace("h", "") + sWord[1:].translate(_xTransVovels) # Similar chars d1to1 = { Index: gc_core/py/ibdawg.py ================================================================== --- gc_core/py/ibdawg.py +++ gc_core/py/ibdawg.py @@ -182,57 +182,57 @@ l.extend(self.morph(sWord.lower())) if sWord.isupper() and len(sWord) > 1: l.extend(self.morph(sWord.capitalize())) return l - def suggest (self, sWord): - "returns a set of similar words" + def suggest (self, sWord, nMaxSugg=10): + "returns a set of suggestions for " # first, we check for similar words #return self._suggestWithCrushedUselessChars(cp.clearWord(sWord)) aSugg = self._suggest(sWord) if not aSugg: aSugg.update(self._suggest(sWord[1:])) if not aSugg: aSugg.update(self._suggestWithCrushedUselessChars(cp.clearWord(sWord))) - return aSugg + return sorted(aSugg, key=lambda sSugg: cp.distanceBetweenWords(sWord, sSugg)) - def _suggest (self, sWord, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False): - "returns a set of suggestions for " + def _suggest (self, sRemain, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False): + "returns a set of suggestions" # recursive function aSugg = set() - if not sWord: + if not sRemain: if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask: #show(nDeep, "___" + sNewWord + "___") aSugg.add(sNewWord) for sTail in self._getTails(iAddr): aSugg.add(sNewWord+sTail) return aSugg - #show(nDeep, "<" + sWord + "> ===> " + sNewWord) - cCurrent = sWord[0:1] + #show(nDeep, "<" + sRemain + "> ===> " + sNewWord) + cCurrent = sRemain[0:1] for cChar, jAddr in self._getSimilarArcs(cCurrent, iAddr): #show(nDeep, cChar) - aSugg.update(self._suggest(sWord[1:], nDeep+1, jAddr, sNewWord+cChar)) + aSugg.update(self._suggest(sRemain[1:], nDeep+1, jAddr, sNewWord+cChar)) if not bAvoidLoop: # avoid infinite loop #show(nDeep, ":no loop:") - if cCurrent == sWord[1:2]: + if cCurrent == sRemain[1:2]: # same char, we remove 1 char without adding 1 to - aSugg.update(self._suggest(sWord[1:], nDeep+1, iAddr, sNewWord)) + aSugg.update(self._suggest(sRemain[1:], nDeep+1, iAddr, sNewWord)) for sRepl in cp.d1toX.get(cCurrent, ()): #show(nDeep, sRepl) - aSugg.update(self._suggest(sRepl + sWord[1:], nDeep+1, iAddr, sNewWord, True)) - for sRepl in cp.d2toX.get(sWord[0:2], ()): + aSugg.update(self._suggest(sRepl + sRemain[1:], nDeep+1, iAddr, sNewWord, True)) + for sRepl in cp.d2toX.get(sRemain[0:2], ()): #show(nDeep, sRepl) - aSugg.update(self._suggest(sRepl + sWord[2:], nDeep+1, iAddr, sNewWord, True)) - if len(sWord) == 2: - for sRepl in cp.dFinal2.get(sWord, ()): + aSugg.update(self._suggest(sRepl + sRemain[2:], nDeep+1, iAddr, sNewWord, True)) + if len(sRemain) == 2: + for sRepl in cp.dFinal2.get(sRemain, ()): #show(nDeep, sRepl) aSugg.update(self._suggest(sRepl, nDeep+1, iAddr, sNewWord, True)) - elif len(sWord) == 1: + elif len(sRemain) == 1: #show(nDeep, ":end of word:") # end of word - aSugg.update(self._suggest("", nDeep+1, iAddr, sNewWord, True)) - for sRepl in cp.dFinal1.get(sWord, ()): + aSugg.update(self._suggest("", nDeep+1, iAddr, sNewWord, True)) # remove last char and go on + for sRepl in cp.dFinal1.get(sRemain, ()): #show(nDeep, sRepl) aSugg.update(self._suggest(sRepl, nDeep+1, iAddr, sNewWord, True)) return aSugg def _getSimilarArcs (self, cChar, iAddr): @@ -268,11 +268,11 @@ return aSugg def _getSimilarArcsAndCrushedChars (self, cChar, iAddr): "generator: yield similar char of and address of the following node" for nVal, jAddr in self._getArcs(iAddr): - if self.dCharVal.get(nVal, None) in cp.aUselessChar: + if self.dCharVal.get(nVal, None) in cp.aVovels: yield (self.dCharVal[nVal], jAddr) yield from self._getSimilarArcs(cChar, iAddr) def drawPath (self, sWord, iAddr=0): cChar = sWord[0:1] if sWord else " "