Index: graphspell-js/ibdawg.js ================================================================== --- graphspell-js/ibdawg.js +++ graphspell-js/ibdawg.js @@ -307,27 +307,28 @@ let sSfx = ""; [sPfx, sWord, sSfx] = char_player.cut(sWord); let nMaxSwitch = Math.max(Math.floor(sWord.length / 3), 1); let nMaxDel = Math.floor(sWord.length / 5); let nMaxHardRepl = Math.max(Math.floor((sWord.length - 5) / 4), 1); + let nMaxJump = Math.max(Math.floor(sWord.length / 4), 1); let oSuggResult = new SuggResult(sWord); - this._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl); + this._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump); if (sWord.gl_isTitle()) { - this._suggest(oSuggResult, sWord.toLowerCase(), nMaxSwitch, nMaxDel, nMaxHardRepl); + this._suggest(oSuggResult, sWord.toLowerCase(), nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump); } else if (sWord.gl_isLowerCase()) { - this._suggest(oSuggResult, sWord.gl_toCapitalize(), nMaxSwitch, nMaxDel, nMaxHardRepl); + this._suggest(oSuggResult, sWord.gl_toCapitalize(), nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump); } let aSugg = oSuggResult.getSuggestions(nSuggLimit); if (sSfx || sPfx) { // we add what we removed return aSugg.map( (sSugg) => { return sPfx + sSugg + sSfx } ); } return aSugg; } - _suggest (oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=false) { + _suggest (oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nMaxJump=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=false) { // returns a set of suggestions // recursive function if (sRemain == "") { if (this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask) { oSuggResult.addSugg(sNewWord); @@ -339,50 +340,55 @@ } let cCurrent = sRemain.slice(0, 1); for (let [cChar, jAddr] of this._getCharArcs(iAddr)) { if (char_player.d1to1.gl_get(cCurrent, cCurrent).indexOf(cChar) != -1) { - this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, jAddr, sNewWord+cChar); + this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, jAddr, sNewWord+cChar); } - else if (!bAvoidLoop && nMaxHardRepl) { - this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl-1, nDeep+1, jAddr, sNewWord+cChar, true); + else if (!bAvoidLoop) { + if (nMaxHardRepl) { + this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl-1, nMaxJump, nDeep+1, jAddr, sNewWord+cChar, true); + } + if (nMaxJump) { + this._suggest(oSuggResult, sRemain, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump-1, nDeep+1, jAddr, sNewWord+cChar, true); + } } } if (!bAvoidLoop) { // avoid infinite loop if (sRemain.length > 1) { if (cCurrent == sRemain.slice(1, 2)) { // same char, we remove 1 char without adding 1 to - this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord); + this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord); } else { // switching chars if (nMaxSwitch > 0) { - this._suggest(oSuggResult, sRemain.slice(1, 2)+sRemain.slice(0, 1)+sRemain.slice(2), nMaxSwitch-1, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true); + this._suggest(oSuggResult, sRemain.slice(1, 2)+sRemain.slice(0, 1)+sRemain.slice(2), nMaxSwitch-1, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, true); } // delete char if (nMaxDel > 0) { - this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true); + this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel-1, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, true); } } // Phonetic replacements for (let sRepl of char_player.get1toXReplacement(sNewWord.slice(-1), cCurrent, sRemain.slice(1,2))) { - this._suggest(oSuggResult, sRepl + sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true); + this._suggest(oSuggResult, sRepl + sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, true); } for (let sRepl of char_player.d2toX.gl_get(sRemain.slice(0, 2), [])) { - this._suggest(oSuggResult, sRepl + sRemain.slice(2), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true); + this._suggest(oSuggResult, sRepl + sRemain.slice(2), nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, true); } } // end of word if (sRemain.length == 2) { for (let sRepl of char_player.dFinal2.gl_get(sRemain, [])) { - this._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true); + this._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, true); } } else if (sRemain.length == 1) { - this._suggest(oSuggResult, "", nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true); // remove last char and go on + this._suggest(oSuggResult, "", nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, true); // remove last char and go on for (let sRepl of char_player.dFinal1.gl_get(sRemain, [])) { - this._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true); + this._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, true); } } } } Index: graphspell/ibdawg.py ================================================================== --- graphspell/ibdawg.py +++ graphspell/ibdawg.py @@ -284,23 +284,24 @@ sWord = cp.spellingNormalization(sWord) sPfx, sWord, sSfx = cp.cut(sWord) nMaxSwitch = max(len(sWord) // 3, 1) nMaxDel = len(sWord) // 5 nMaxHardRepl = max((len(sWord) - 5) // 4, 1) + nMaxJump = max(len(sWord) // 4, 1) oSuggResult = SuggResult(sWord) - self._suggest(oSuggResult, sWord, nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl) + self._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump) if sWord.istitle(): - self._suggest(oSuggResult, sWord.lower(), nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl) + self._suggest(oSuggResult, sWord.lower(), nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump) elif sWord.islower(): - self._suggest(oSuggResult, sWord.title(), nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl) + self._suggest(oSuggResult, sWord.title(), nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump) aSugg = oSuggResult.getSuggestions(nSuggLimit) if sSfx or sPfx: # we add what we removed return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg)) return aSugg - def _suggest (self, oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False): + def _suggest (self, oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nMaxJump=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False): # recursive function #logging.info((nDeep * " ") + sNewWord + ":" + sRemain) if not sRemain: if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask: oSuggResult.addSugg(sNewWord, nDeep) @@ -308,38 +309,41 @@ oSuggResult.addSugg(sNewWord+sTail, nDeep) return cCurrent = sRemain[0:1] for cChar, jAddr in self._getCharArcs(iAddr): if cChar in cp.d1to1.get(cCurrent, cCurrent): - self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, jAddr, sNewWord+cChar) - elif not bAvoidLoop and nMaxHardRepl: - self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl-1, nDeep+1, jAddr, sNewWord+cChar, True) + self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, jAddr, sNewWord+cChar) + elif not bAvoidLoop: + if nMaxHardRepl: + self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl-1, nMaxJump, nDeep+1, jAddr, sNewWord+cChar, True) + if nMaxJump: + self._suggest(oSuggResult, sRemain, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump-1, nDeep+1, jAddr, sNewWord+cChar, True) if not bAvoidLoop: # avoid infinite loop if len(sRemain) > 1: if cCurrent == sRemain[1:2]: # same char, we remove 1 char without adding 1 to - self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord) + self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord) else: # switching chars if nMaxSwitch: - self._suggest(oSuggResult, sRemain[1:2]+sRemain[0:1]+sRemain[2:], nMaxSwitch-1, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True) + self._suggest(oSuggResult, sRemain[1:2]+sRemain[0:1]+sRemain[2:], nMaxSwitch-1, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, True) # delete char if nMaxDel: - self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True) + self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel-1, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, True) # Phonetic replacements for sRepl in cp.get1toXReplacement(sNewWord[-1:], cCurrent, sRemain[1:2]): - self._suggest(oSuggResult, sRepl + sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True) + self._suggest(oSuggResult, sRepl + sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, True) for sRepl in cp.d2toX.get(sRemain[0:2], ()): - self._suggest(oSuggResult, sRepl + sRemain[2:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True) + self._suggest(oSuggResult, sRepl + sRemain[2:], nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, True) # end of word if len(sRemain) == 2: for sRepl in cp.dFinal2.get(sRemain, ()): - self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True) + self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, True) elif len(sRemain) == 1: - self._suggest(oSuggResult, "", nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True) # remove last char and go on + self._suggest(oSuggResult, "", nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, True) # remove last char and go on for sRepl in cp.dFinal1.get(sRemain, ()): - self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True) + self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, True) #@timethis def suggest2 (self, sWord, nMaxSugg=10): "returns a set of suggestions for " sWord = cp.spellingNormalization(sWord)