Overview
Comment: | [graphspell] ibdawg > suggest(): seek first simple combinations |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | graphspell |
Files: | files | file ages | folders |
SHA3-256: |
982c1b5eb083ff0cc78f2fb608eb7c72 |
User & Date: | olr on 2021-02-18 08:49:19 |
Other Links: | manifest | tags |
Context
2021-02-18
| ||
10:26 | [fr] remove old useless tests check-in: 15a51e51ca user: olr tags: trunk, fr | |
08:49 | [graphspell] ibdawg > suggest(): seek first simple combinations check-in: 982c1b5eb0 user: olr tags: trunk, graphspell | |
08:48 | [fr] faux positifs check-in: 950c661775 user: olr tags: trunk, fr | |
Changes
Modified gc_lang/fr/modules/tests_modules.py from [5c8bb6ae99] to [2556c753ce].
︙ | ︙ | |||
56 57 58 59 60 61 62 | def test_suggest (self): for sWord in [ "déelirranttesss", "vallidasion", "Emilie", "exibission", "ditirembique", "jai", "email", "fatiqué", "coeur", "trèèèèèèèèès", "vraaaaiiiimeeeeennnt", "apele", "Co2", "emmppâiiiller", "testt", "apelaion", "exsepttion", "sintaxik", "ebriete", "ennormmement" ]: | < < | | > > | 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | def test_suggest (self): for sWord in [ "déelirranttesss", "vallidasion", "Emilie", "exibission", "ditirembique", "jai", "email", "fatiqué", "coeur", "trèèèèèèèèès", "vraaaaiiiimeeeeennnt", "apele", "Co2", "emmppâiiiller", "testt", "apelaion", "exsepttion", "sintaxik", "ebriete", "ennormmement" ]: #with timeblock(sWord): for lSugg in self.oSpellChecker.suggest(sWord): #print(sWord, "->", " ".join(lSugg)) self.assertTrue(len(lSugg) > 0) def test_lemmas (self): for sWord, sInfi in [ ("suis", "suivre"), ("suis", "être"), ("a", "avoir"), ("a", "a"), |
︙ | ︙ |
Modified graphspell-js/ibdawg.js from [44a920520f] to [20fbadf805].
︙ | ︙ | |||
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | if (this.aAllSugg.has(sSugg)) { return; } this.aAllSugg.add(sSugg); // jaro 0->1 1 les chaines sont égale let nDistJaro = 1 - str_transform.distanceJaroWinkler(this.sSimplifiedWord, str_transform.simplifyWord(sSugg)); let nDist = Math.floor(nDistJaro * 10); if (nDistJaro < .11) { // Best suggestions this.dBestSugg.set(sSugg, Math.round(nDistJaro*1000)); if (this.dBestSugg.size > this.nBestSuggLimit) { this.nDistLimit = -1; // make suggest() to end search } } else if (nDistJaro < .33) { // Good suggestions this.dGoodSugg.set(sSugg, Math.round(nDistJaro*1000)); if (this.dGoodSugg.size > this.nGoodSuggLimit) { this.nDistLimit = -1; // make suggest() to end search } | > > > < < < | < < < < < < | < | 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | if (this.aAllSugg.has(sSugg)) { return; } this.aAllSugg.add(sSugg); // jaro 0->1 1 les chaines sont égale let nDistJaro = 1 - str_transform.distanceJaroWinkler(this.sSimplifiedWord, str_transform.simplifyWord(sSugg)); let nDist = Math.floor(nDistJaro * 10); if (nDist < this.nMinDist) { this.nMinDist = nDist; } if (nDistJaro < .11) { // Best suggestions this.dBestSugg.set(sSugg, Math.round(nDistJaro*1000)); if (this.dBestSugg.size > this.nBestSuggLimit) { this.nDistLimit = -1; // make suggest() to end search } } else if (nDistJaro < .33) { // Good suggestions this.dGoodSugg.set(sSugg, Math.round(nDistJaro*1000)); if (this.dGoodSugg.size > this.nGoodSuggLimit) { this.nDistLimit = -1; // make suggest() to end search } } this.nDistLimit = Math.min(this.nDistLimit, this.nMinDist+1); } getSuggestions () { // return a list of suggestions let lRes = []; if (this.dBestSugg.size > 0) { // sort only with simplified words |
︙ | ︙ | |||
340 341 342 343 344 345 346 347 348 349 350 351 352 353 | let nMaxJump = Math.max(Math.floor(sWord.length / 4), 1); let oSuggResult = new SuggResult(sWord, nSuggLimit); sWord = str_transform.cleanWord(sWord); if (bSplitTrailingNumbers) { this._splitTrailingNumbers(oSuggResult, sWord); } this._splitSuggest(oSuggResult, sWord); this._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump); let aSugg = oSuggResult.getSuggestions(); if (this.lexicographer) { aSugg = this.lexicographer.filterSugg(aSugg); } if (sSfx || sPfx) { // we add what we removed | > | 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 | let nMaxJump = Math.max(Math.floor(sWord.length / 4), 1); let oSuggResult = new SuggResult(sWord, nSuggLimit); sWord = str_transform.cleanWord(sWord); if (bSplitTrailingNumbers) { this._splitTrailingNumbers(oSuggResult, sWord); } this._splitSuggest(oSuggResult, sWord); this._suggest(oSuggResult, sWord); this._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump); let aSugg = oSuggResult.getSuggestions(); if (this.lexicographer) { aSugg = this.lexicographer.filterSugg(aSugg); } if (sSfx || sPfx) { // we add what we removed |
︙ | ︙ |
Modified graphspell/ibdawg.py from [13d2327263] to [e27ae4ab79].
︙ | ︙ | |||
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | def addSugg (self, sSugg, nDeep=0): "add a suggestion" if sSugg in self.aAllSugg: return self.aAllSugg.add(sSugg) nDistJaro = 1 - st.distanceJaroWinkler(self.sSimplifiedWord, st.simplifyWord(sSugg)) nDist = floor(nDistJaro * 10) #logging.info((nDeep * " ") + "__" + sSugg + "__ " + str(round(nDistJaro*1000))) if nDistJaro < .11: # Best suggestions self.dBestSugg[sSugg] = round(nDistJaro*1000) if len(self.dBestSugg) > self.nBestSuggLimit: self.nDistLimit = -1 # make suggest() to end search elif nDistJaro < .33: # Good suggestions self.dGoodSugg[sSugg] = round(nDistJaro*1000) if len(self.dGoodSugg) > self.nGoodSuggLimit: self.nDistLimit = -1 # make suggest() to end search | > > < < < < < < < | | 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | def addSugg (self, sSugg, nDeep=0): "add a suggestion" if sSugg in self.aAllSugg: return self.aAllSugg.add(sSugg) nDistJaro = 1 - st.distanceJaroWinkler(self.sSimplifiedWord, st.simplifyWord(sSugg)) nDist = floor(nDistJaro * 10) if nDist < self.nMinDist: self.nMinDist = nDist #logging.info((nDeep * " ") + "__" + sSugg + "__ " + str(round(nDistJaro*1000))) if nDistJaro < .11: # Best suggestions self.dBestSugg[sSugg] = round(nDistJaro*1000) if len(self.dBestSugg) > self.nBestSuggLimit: self.nDistLimit = -1 # make suggest() to end search elif nDistJaro < .33: # Good suggestions self.dGoodSugg[sSugg] = round(nDistJaro*1000) if len(self.dGoodSugg) > self.nGoodSuggLimit: self.nDistLimit = -1 # make suggest() to end search self.nDistLimit = min(self.nDistLimit, self.nMinDist+1) def getSuggestions (self): "return a list of suggestions" # we sort the better results with the original word lRes = [] if len(self.dBestSugg) > 0: # sort only with simplified words |
︙ | ︙ | |||
242 243 244 245 246 247 248 249 250 251 252 253 254 255 | nMaxHardRepl = max((len(sWord) - 5) // 4, 1) nMaxJump = max(len(sWord) // 4, 1) oSuggResult = SuggResult(sWord, nSuggLimit) sWord = st.cleanWord(sWord) if bSplitTrailingNumbers: self._splitTrailingNumbers(oSuggResult, sWord) self._splitSuggest(oSuggResult, sWord) self._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump) aSugg = oSuggResult.getSuggestions() if self.lexicographer: aSugg = self.lexicographer.filterSugg(aSugg) if sSfx or sPfx: # we add what we removed return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg)) | > | 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 | nMaxHardRepl = max((len(sWord) - 5) // 4, 1) nMaxJump = max(len(sWord) // 4, 1) oSuggResult = SuggResult(sWord, nSuggLimit) sWord = st.cleanWord(sWord) if bSplitTrailingNumbers: self._splitTrailingNumbers(oSuggResult, sWord) self._splitSuggest(oSuggResult, sWord) self._suggest(oSuggResult, sWord) self._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump) aSugg = oSuggResult.getSuggestions() if self.lexicographer: aSugg = self.lexicographer.filterSugg(aSugg) if sSfx or sPfx: # we add what we removed return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg)) |
︙ | ︙ |