Overview
Comment: | [graphspell] suggestion mechanism optimization: parse graph arcs according to similar chars |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | trunk | graphspell |
Files: | files | file ages | folders |
SHA3-256: |
58fed3978799489194f877e74844d386 |
User & Date: | olr on 2025-09-23 11:48:05 |
Other Links: | manifest | tags |
Context
2025-09-23
| ||
11:48 | [graphspell] suggestion mechanism optimization: parse graph arcs according to similar chars Leaf check-in: 58fed39787 user: olr tags: trunk, graphspell | |
11:14 | [cli] clarity for spelling suggestion check-in: 4858dc598e user: olr tags: trunk, cli | |
Changes
Modified gc_lang/fr/modules/tests_modules.py from [10ff0f96cf] to [69c65c5b78].
︙ | ︙ | |||
63 64 65 66 67 68 69 | ("Emilie", "Émilie"), ("exibission", "exhibition"), ("ditirembique", "dithyrambique"), ("jai", "j’ai"), ("email", "courriel"), ("fatiqué", "fatigué"), ("coeur", "cœur"), | | > | 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 | ("Emilie", "Émilie"), ("exibission", "exhibition"), ("ditirembique", "dithyrambique"), ("jai", "j’ai"), ("email", "courriel"), ("fatiqué", "fatigué"), ("coeur", "cœur"), ("triiiiicheuuuur", "tricheur"), ("vraaaaiiiimeeeeennnt", "vraiment"), ("oeil", "œil"), ("Oeil", "Œil"), ("OEIL", "ŒIL"), ("apele", "appel"), ("Co2", "CO₂"), ("emmppâiiiller", "empailler"), ("testt", "test"), ("apelaion", "appellation"), ("exsepttion", "exception"), ("ebriete", "ébriété"), ("ennormmement", "énormément"), ("maîtnesse", "maîtresse"), ("sintaxik", "syntaxique") ]: #with timeblock(sWord): for lSugg in self.oSpellChecker.suggest(sWrong): #print(sWord, "->", " ".join(lSugg)) self.assertIn(sSugg, lSugg) |
︙ | ︙ |
Modified graphspell-js/ibdawg.js from [d23b81aee3] to [f51bd17e6f].
︙ | ︙ | |||
363 364 365 366 367 368 369 | } } } if (nDist > oSuggResult.nDistLimit) { return; } let cCurrent = sRemain.slice(0, 1); | | | 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 | } } } if (nDist > oSuggResult.nDistLimit) { return; } let cCurrent = sRemain.slice(0, 1); for (let [cChar, jAddr] of this._getCharArcs(iAddr, cCurrent)) { if (char_player.d1to1.gl_get(cCurrent, cCurrent).indexOf(cChar) != -1) { this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDist, nDeep+1, jAddr, sNewWord+cChar); } else if (!bAvoidLoop) { if (nMaxHardRepl && this.isNgramsOK(cChar+sRemain.slice(1,2))) { this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl-1, nMaxJump, nDist+1, nDeep+1, jAddr, sNewWord+cChar, true); } |
︙ | ︙ | |||
425 426 427 428 429 430 431 | } if (!this.a2grams) { return true; } return this.a2grams.has(sChars); } | | > > | | > > > > > > > | 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 | } if (!this.a2grams) { return true; } return this.a2grams.has(sChars); } * _getCharArcs (iAddr, cChar="") { // generator: yield all chars and addresses from node at address <iAddr> let lStack = []; for (let [nVal, jAddr] of this._getArcs(iAddr)) { if (nVal <= this.nChar) { if (char_player.d1to1.gl_get(cChar, cChar).indexOf(this.dCharVal.get(nVal)) != -1) { yield [this.dCharVal.get(nVal), jAddr]; } else { lStack.push([this.dCharVal.get(nVal), jAddr]); } } } while (lStack.length > 0) { yield lStack.shift(); } } * _getSimilarCharArcs (cChar, iAddr) { // generator: yield similar char of <cChar> and address of the following node for (let c of char_player.d1to1.gl_get(cChar, [cChar])) { if (this.dChar.has(c)) { |
︙ | ︙ |
Modified graphspell/ibdawg.py from [b0bfbd049d] to [df9af9aea6].
︙ | ︙ | |||
249 250 251 252 253 254 255 | if cSplitter in sWord: sWord1, sWord2 = sWord.split(cSplitter, 1) if self.isValid(sWord1) and self.isValid(sWord2): oSuggResult.addSugg(sWord1+" "+sWord2) def _suggest (self, oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nMaxJump=0, nDist=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False): # recursive function | | | | 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 | if cSplitter in sWord: sWord1, sWord2 = sWord.split(cSplitter, 1) if self.isValid(sWord1) and self.isValid(sWord2): oSuggResult.addSugg(sWord1+" "+sWord2) def _suggest (self, oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nMaxJump=0, nDist=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False): # recursive function #logging.info((nDeep * " ") + f"{sNewWord}:{sRemain} nMaxSwitch:{nMaxSwitch} nMaxDel:{nMaxDel} nMaxHardRepl:{nMaxHardRepl} nMaxJump:{nMaxJump} | nDist:{nDist} / {oSuggResult.nDistLimit}") if self.lByDic[iAddr] & self._finalNodeMask: if not sRemain: oSuggResult.addSugg(sNewWord, nDeep) for sTail in self._getTails(iAddr): oSuggResult.addSugg(sNewWord+sTail, nDeep) return if (len(sNewWord) + len(sRemain) == len(oSuggResult.sWord)) and oSuggResult.sWord.lower().startswith(sNewWord.lower()) and self.isValid(sRemain): if self.sLangCode == "fr" and sNewWord.lower() in ("l", "d", "n", "m", "t", "s", "c", "j", "qu", "lorsqu", "puisqu", "quoiqu", "jusqu", "quelqu") and sRemain[0:1] in cp.aVowel: oSuggResult.addSugg(sNewWord+"’"+sRemain, nDeep) if (len(sNewWord) > 1 and len(sRemain) > 1) or sNewWord in "aày" or sRemain in "aày": oSuggResult.addSugg(sNewWord+" "+sRemain, nDeep) if nDist > oSuggResult.nDistLimit: return cCurrent = sRemain[0:1] for cChar, jAddr in self._getCharArcs(iAddr, cCurrent): if cChar in cp.d1to1.get(cCurrent, cCurrent): self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDist, nDeep+1, jAddr, sNewWord+cChar) elif not bAvoidLoop: if nMaxHardRepl and self.isNgramsOK(cChar+sRemain[1:2]): self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl-1, nMaxJump, nDist+1, nDeep+1, jAddr, sNewWord+cChar, True) if nMaxJump: self._suggest(oSuggResult, sRemain, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump-1, nDist+1, nDeep+1, jAddr, sNewWord+cChar, True) # True for avoiding loop? |
︙ | ︙ | |||
306 307 308 309 310 311 312 | "returns True if sChars in known 2grams" if len(sChars) != 2: return True if not self.a2grams: return True return sChars in self.a2grams | | > > | > > > > | 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 | "returns True if sChars in known 2grams" if len(sChars) != 2: return True if not self.a2grams: return True return sChars in self.a2grams def _getCharArcs (self, iAddr, cChar=""): "generator: yield all chars and addresses from node at address <iAddr>" lStack = [] for nVal, jAddr in self._getArcs(iAddr): if nVal <= self.nChar: if self.dCharVal[nVal] in cp.d1to1.get(cChar, cChar): yield (self.dCharVal[nVal], jAddr) else: lStack.append((self.dCharVal[nVal], jAddr)) while lStack: yield lStack.pop(0) def _getTails (self, iAddr, sTail="", n=2): "return a list of suffixes ending at a distance of <n> from <iAddr>" aTails = set() for nVal, jAddr in self._getArcs(iAddr): if nVal <= self.nChar: if self.lByDic[jAddr] & self._finalNodeMask: |
︙ | ︙ |