Overview
Comment: | [core] sort suggestions with distance of Damerau-Levenshtein + variables renaming |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | core |
Files: | files | file ages | folders |
SHA3-256: |
44cb3f4d5221da339dcdd3f98898d539 |
User & Date: | olr on 2017-07-05 11:22:06 |
Other Links: | manifest | tags |
Context
2017-07-05
| ||
15:01 | remove comment check-in: cffc315674 user: olr tags: trunk | |
11:22 | [core] sort suggestions with distance of Damerau-Levenshtein + variables renaming check-in: 44cb3f4d52 user: olr tags: trunk, core | |
2017-07-04
| ||
15:50 | [fr] oops, option conjugaison réactivée pour Writer +détails mineurs check-in: 7e75e7a4d4 user: olr tags: trunk, fr, v0.5.17.2 | |
Changes
Modified gc_core/py/char_player.py from [9837f292f2] to [55f90e3733].
1 2 3 4 5 6 | # list of similar chars # useful for suggestion mechanism # Method: Remove Useless Chars | > > > > > > > > > > > > > > > > > > > > > > > > | | > | > | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | # list of similar chars # useful for suggestion mechanism # distance between words def distanceBetweenWords (s1, s2): "distance of Damerau-Levenshtein between <s1> and <s2>" # https://fr.wikipedia.org/wiki/Distance_de_Damerau-Levenshtein d = {} nLen1 = len(s1) nLen2 = len(s2) for i in range(-1, nLen1+1): d[i, -1] = i + 1 for j in range(-1, nLen2+1): d[-1, j] = j + 1 for i in range(nLen1): for j in range(nLen2): nCost = 0 if s1[i] == s2[j] else 1 d[i, j] = min( d[i-1, j] + 1, # Deletion d[i, j-1] + 1, # Insertion d[i-1, j-1] + nCost, # Substitution ) if i and j and s1[i] == s2[j-1] and s1[i-1] == s2[j]: d[i, j] = min(d[i, j], d[i-2, j-2] + nCost) # Transposition return d[nLen1-1, nLen2-1] # Method: Remove Useless Chars _dVovels = { 'a': '', 'e': '', 'i': '', 'o': '', 'u': '', 'y': '', 'à': '', 'é': '', 'î': '', 'ô': '', 'û': '', 'ÿ': '', 'â': '', 'è': '', 'ï': '', 'ö': '', 'ù': '', 'ŷ': '', 'ä': '', 'ê': '', 'í': '', 'ó': '', 'ü': '', 'ý': '', 'á': '', 'ë': '', 'ì': '', 'ò': '', 'ú': '', 'ỳ': '', 'ā': '', 'ē': '', 'ī': '', 'ō': '', 'ū': '', 'ȳ': '', 'h': '', 'œ': '', 'æ': '' } _xTransVovels = str.maketrans(_dVovels) aVovels = frozenset(_dVovels.keys()) def clearWord (sWord): "remove vovels and h" return sWord[0:1].replace("h", "") + sWord[1:].translate(_xTransVovels) # Similar chars d1to1 = { "1": "li", "2": "z", |
︙ | ︙ |
Modified gc_core/py/ibdawg.py from [af23fd019b] to [750537b4f6].
︙ | ︙ | |||
180 181 182 183 184 185 186 | l = self.morph(sWord) if sWord[0:1].isupper(): l.extend(self.morph(sWord.lower())) if sWord.isupper() and len(sWord) > 1: l.extend(self.morph(sWord.capitalize())) return l | | | | | | | | | | | | | | | | | | | | | 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 | l = self.morph(sWord) if sWord[0:1].isupper(): l.extend(self.morph(sWord.lower())) if sWord.isupper() and len(sWord) > 1: l.extend(self.morph(sWord.capitalize())) return l def suggest (self, sWord, nMaxSugg=10): "returns a set of suggestions for <sWord>" # first, we check for similar words #return self._suggestWithCrushedUselessChars(cp.clearWord(sWord)) aSugg = self._suggest(sWord) if not aSugg: aSugg.update(self._suggest(sWord[1:])) if not aSugg: aSugg.update(self._suggestWithCrushedUselessChars(cp.clearWord(sWord))) return sorted(aSugg, key=lambda sSugg: cp.distanceBetweenWords(sWord, sSugg)) def _suggest (self, sRemain, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False): "returns a set of suggestions" # recursive function aSugg = set() if not sRemain: if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask: #show(nDeep, "___" + sNewWord + "___") aSugg.add(sNewWord) for sTail in self._getTails(iAddr): aSugg.add(sNewWord+sTail) return aSugg #show(nDeep, "<" + sRemain + "> ===> " + sNewWord) cCurrent = sRemain[0:1] for cChar, jAddr in self._getSimilarArcs(cCurrent, iAddr): #show(nDeep, cChar) aSugg.update(self._suggest(sRemain[1:], nDeep+1, jAddr, sNewWord+cChar)) if not bAvoidLoop: # avoid infinite loop #show(nDeep, ":no loop:") if cCurrent == sRemain[1:2]: # same char, we remove 1 char without adding 1 to <sNewWord> aSugg.update(self._suggest(sRemain[1:], nDeep+1, iAddr, sNewWord)) for sRepl in cp.d1toX.get(cCurrent, ()): #show(nDeep, sRepl) aSugg.update(self._suggest(sRepl + sRemain[1:], nDeep+1, iAddr, sNewWord, True)) for sRepl in cp.d2toX.get(sRemain[0:2], ()): #show(nDeep, sRepl) aSugg.update(self._suggest(sRepl + sRemain[2:], nDeep+1, iAddr, sNewWord, True)) if len(sRemain) == 2: for sRepl in cp.dFinal2.get(sRemain, ()): #show(nDeep, sRepl) aSugg.update(self._suggest(sRepl, nDeep+1, iAddr, sNewWord, True)) elif len(sRemain) == 1: #show(nDeep, ":end of word:") # end of word aSugg.update(self._suggest("", nDeep+1, iAddr, sNewWord, True)) # remove last char and go on for sRepl in cp.dFinal1.get(sRemain, ()): #show(nDeep, sRepl) aSugg.update(self._suggest(sRepl, nDeep+1, iAddr, sNewWord, True)) return aSugg def _getSimilarArcs (self, cChar, iAddr): "generator: yield similar char of <cChar> and address of the following node" for c in cp.d1to1.get(cChar, [cChar]): |
︙ | ︙ | |||
266 267 268 269 270 271 272 | show(nDeep, cChar) aSugg.update(self._suggestWithCrushedUselessChars(sWord[1:], nDeep+1, jAddr, sNewWord+cChar)) return aSugg def _getSimilarArcsAndCrushedChars (self, cChar, iAddr): "generator: yield similar char of <cChar> and address of the following node" for nVal, jAddr in self._getArcs(iAddr): | | | 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 | show(nDeep, cChar) aSugg.update(self._suggestWithCrushedUselessChars(sWord[1:], nDeep+1, jAddr, sNewWord+cChar)) return aSugg def _getSimilarArcsAndCrushedChars (self, cChar, iAddr): "generator: yield similar char of <cChar> and address of the following node" for nVal, jAddr in self._getArcs(iAddr): if self.dCharVal.get(nVal, None) in cp.aVovels: yield (self.dCharVal[nVal], jAddr) yield from self._getSimilarArcs(cChar, iAddr) def drawPath (self, sWord, iAddr=0): cChar = sWord[0:1] if sWord else " " iPos = -1 n = 0 |
︙ | ︙ |