@@ -46,11 +46,11 @@ this.dSugg.get(nDist).push(sSugg); this.aSugg.add(sSugg); if (nDist < this.nMinDist) { this.nMinDist = nDist; } - this.nDistLimit = Math.min(this.nDistLimit, this.nMinDist+2); + this.nDistLimit = Math.min(this.nDistLimit, this.nMinDist+1); } } } getSuggestions (nSuggLimit=10, nDistLimit=-1) { @@ -137,10 +137,11 @@ throw RangeError("# Error. Unknown dictionary compression method: " + this.nCompressionMethod); } // to get the value of an arc, to get the char of an arc with its value this.dChar = helpers.objectToMap(this.dChar); this.dCharVal = this.dChar.gl_reverse(); + this.a2grams = new Set(this.l2grams); if (this.cStemming == "S") { this.funcStemming = str_transform.changeWordWithSuffixCode; } else if (this.cStemming == "A") { this.funcStemming = str_transform.changeWordWithAffixCode; @@ -212,11 +213,12 @@ "nArcVal": this.nArcVal, "nCompressionMethod": this.nCompressionMethod, "nBytesArc": this.nBytesArc, "nBytesNodeAddress": this.nBytesNodeAddress, "nBytesOffset": this.nBytesOffset, - "sByDic": this.sByDic // binary word graph + "sByDic": this.sByDic, // binary word graph + "l2grams": this.l2grams }; return oJSON; } isValidToken (sToken) { @@ -349,11 +351,11 @@ for (let [cChar, jAddr] of this._getCharArcs(iAddr)) { if (char_player.d1to1.gl_get(cCurrent, cCurrent).indexOf(cChar) != -1) { this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDist, nDeep+1, jAddr, sNewWord+cChar); } else if (!bAvoidLoop) { - if (nMaxHardRepl) { + if (nMaxHardRepl && this.isNgramsOK(cChar+sRemain.slice(1,2))) { this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl-1, nMaxJump, nDist+1, nDeep+1, jAddr, sNewWord+cChar, true); } if (nMaxJump) { this._suggest(oSuggResult, sRemain, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump-1, nDist+1, nDeep+1, jAddr, sNewWord+cChar, true); } @@ -365,15 +367,15 @@ // same char, we remove 1 char without adding 1 to this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDist, nDeep+1, iAddr, sNewWord); } else { // switching chars - if (nMaxSwitch > 0) { + if (nMaxSwitch > 0 && this.isNgramsOK(sNewWord.slice(-1)+sRemain.slice(1,2)) && this.isNgramsOK(sRemain.slice(1,2)+sRemain.slice(0,1))) { this._suggest(oSuggResult, sRemain.slice(1, 2)+sRemain.slice(0, 1)+sRemain.slice(2), nMaxSwitch-1, nMaxDel, nMaxHardRepl, nMaxJump, nDist+1, nDeep+1, iAddr, sNewWord, true); } // delete char - if (nMaxDel > 0) { + if (nMaxDel > 0 && this.isNgramsOK(sNewWord.slice(-1)+sRemain.slice(1,2))) { this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel-1, nMaxHardRepl, nMaxJump, nDist+1, nDeep+1, iAddr, sNewWord, true); } } // Phonetic replacements for (let sRepl of char_player.get1toXReplacement(sNewWord.slice(-1), cCurrent, sRemain.slice(1,2))) { @@ -395,10 +397,17 @@ this._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDist, nDeep+1, iAddr, sNewWord, true); } } } } + + isNgramsOK (sChars) { + if (sChars.length != 2) { + return true; + } + return this.a2grams.has(sChars); + } * _getCharArcs (iAddr) { // generator: yield all chars and addresses from node at address for (let [nVal, jAddr] of this._getArcs(iAddr)) { if (nVal <= this.nChar) {