Overview
Comment: | [core] ibdawg: suggest() use sets instead of lists |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | core |
Files: | files | file ages | folders |
SHA3-256: |
f11e2ad39c0469b0b44e29b754fd0899 |
User & Date: | olr on 2017-07-02 19:53:02 |
Other Links: | manifest | tags |
Context
2017-07-03
| ||
02:35 | [core] ibdawg: code cleaning check-in: f4c85fbe0b user: olr tags: trunk, core | |
2017-07-02
| ||
19:53 | [core] ibdawg: suggest() use sets instead of lists check-in: f11e2ad39c user: olr tags: trunk, core | |
18:12 | [fr] pt: d’évidence (faux positif) + bug de suggestion check-in: e8450a49af user: olr tags: trunk, fr | |
Changes
Modified gc_core/py/ibdawg.py from [5262800a60] to [48f4ad6627].
︙ | ︙ | |||
185 186 187 188 189 190 191 | l.extend(self.morph(sWord.capitalize())) return l def suggest (self, sWord): "returns a set of similar words" # first, we check for similar words #return set(self._suggestWithCrushedUselessChars(cp.clearWord(sWord))) | | | | | | | | | > | | | < | | | | | | | > | | < | | | 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 | l.extend(self.morph(sWord.capitalize())) return l def suggest (self, sWord): "returns a set of similar words" # first, we check for similar words #return set(self._suggestWithCrushedUselessChars(cp.clearWord(sWord))) aSugg = self._suggest(sWord) if not aSugg: aSugg.update(self._suggest(sWord[1:])) aSugg.update(self._suggest(sWord[:-1])) aSugg.update(self._suggest(sWord[1:-1])) if not aSugg: aSugg.update(self._suggestWithCrushedUselessChars(cp.clearWord(sWord))) return aSugg def _suggest (self, sWord, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False): # RECURSIVE FUNCTION aSugg = set() if not sWord: if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask: #show(nDeep, "___" + sNewWord + "___") aSugg.add(sNewWord) return aSugg #show(nDeep, "<" + sWord + "> ===> " + sNewWord) cCurrent = sWord[0:1] for cChar, jAddr in self._getSimilarArcs(cCurrent, iAddr): #show(nDeep, cChar) aSugg.update(self._suggest(sWord[1:], nDeep+1, jAddr, sNewWord+cChar)) if not bAvoidLoop: # avoid infinite loop #show(nDeep, ":no loop:") if cCurrent == sWord[1:2]: # same char, we remove 1 char without adding 1 to <sNewWord> aSugg.update(self._suggest(sWord[1:], nDeep+1, iAddr, sNewWord)) for sRepl in cp.d1toX.get(cCurrent, ()): #show(nDeep, sRepl) aSugg.update(self._suggest(sRepl + sWord[1:], nDeep+1, iAddr, sNewWord, True)) for sRepl in cp.d2toX.get(sWord[0:2], ()): #show(nDeep, sRepl) aSugg.update(self._suggest(sRepl + sWord[2:], nDeep+1, iAddr, sNewWord, True)) if len(sWord) == 2: for sRepl in cp.dFinal2.get(sWord, ()): #show(nDeep, sRepl) aSugg.update(self._suggest(sRepl, nDeep+1, iAddr, sNewWord, True)) elif len(sWord) == 1: #show(nDeep, ":end of word:") # end of word for sRepl in cp.dFinal1.get(sWord, ()): #show(nDeep, sRepl) aSugg.update(self._suggest(sRepl, nDeep+1, iAddr, sNewWord, True)) return aSugg def _getSimilarArcs (self, cChar, iAddr): "generator: yield similar char of <cChar> and address of the following node" for c in cp.d1to1.get(cChar, [cChar]): if c in self.dChar: jAddr = self._lookupArcNode(self.dChar[c], iAddr) if jAddr: yield (c, jAddr) def _suggestWithCrushedUselessChars (self, sWord, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False): aSugg = set() if not sWord: if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask: show(nDeep, "!!! " + sNewWord + " !!!") aSugg.add(sNewWord) return aSugg cCurrent = sWord[0:1] for cChar, jAddr in self._getSimilarArcsAndCrushedChars(cCurrent, iAddr): show(nDeep, cChar) aSugg.update(self._suggestWithCrushedUselessChars(sWord[1:], nDeep+1, jAddr, sNewWord+cChar)) return aSugg def _getSimilarArcsAndCrushedChars (self, cChar, iAddr): "generator: yield similar char of <cChar> and address of the following node" for nVal, jAddr in self._getArcs(iAddr): if self.dCharVal.get(nVal, None) in cp.aUselessChar: yield (self.dCharVal[nVal], jAddr) for c in cp.d1to1.get(cChar, [cChar]): |
︙ | ︙ |