Overview
| Comment: | [graphspell] suggestion mechanism: jumps |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | graphspell |
| Files: | files | file ages | folders |
| SHA3-256: |
74cd9793a933b96bf8a5eaa03d7989e3 |
| User & Date: | olr on 2018-04-16 13:36:01 |
| Other Links: | manifest | tags |
Context
|
2018-04-16
| ||
| 18:06 | [fx] Write corrections in editable node also check-in: 6565051bbd user: olr tags: trunk, fx | |
| 13:36 | [graphspell] suggestion mechanism: jumps check-in: 74cd9793a9 user: olr tags: trunk, graphspell | |
| 12:45 | [graphspell] char_player update: ç -> c check-in: ccbcddb798 user: olr tags: trunk, graphspell | |
Changes
Modified graphspell-js/ibdawg.js from [d566558346] to [ee32a83c3f].
| ︙ | ︙ | |||
305 306 307 308 309 310 311 312 |
sWord = char_player.spellingNormalization(sWord)
let sPfx = "";
let sSfx = "";
[sPfx, sWord, sSfx] = char_player.cut(sWord);
let nMaxSwitch = Math.max(Math.floor(sWord.length / 3), 1);
let nMaxDel = Math.floor(sWord.length / 5);
let nMaxHardRepl = Math.max(Math.floor((sWord.length - 5) / 4), 1);
let oSuggResult = new SuggResult(sWord);
| > | | | | | | > | > > > > | | | | | | | | | 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 |
sWord = char_player.spellingNormalization(sWord)
let sPfx = "";
let sSfx = "";
[sPfx, sWord, sSfx] = char_player.cut(sWord);
let nMaxSwitch = Math.max(Math.floor(sWord.length / 3), 1);
let nMaxDel = Math.floor(sWord.length / 5);
let nMaxHardRepl = Math.max(Math.floor((sWord.length - 5) / 4), 1);
let nMaxJump = Math.max(Math.floor(sWord.length / 4), 1);
let oSuggResult = new SuggResult(sWord);
this._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump);
if (sWord.gl_isTitle()) {
this._suggest(oSuggResult, sWord.toLowerCase(), nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump);
}
else if (sWord.gl_isLowerCase()) {
this._suggest(oSuggResult, sWord.gl_toCapitalize(), nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump);
}
let aSugg = oSuggResult.getSuggestions(nSuggLimit);
if (sSfx || sPfx) {
// we add what we removed
return aSugg.map( (sSugg) => { return sPfx + sSugg + sSfx } );
}
return aSugg;
}
_suggest (oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nMaxJump=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=false) {
// returns a set of suggestions
// recursive function
if (sRemain == "") {
if (this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask) {
oSuggResult.addSugg(sNewWord);
}
for (let sTail of this._getTails(iAddr)) {
oSuggResult.addSugg(sNewWord+sTail);
}
return;
}
let cCurrent = sRemain.slice(0, 1);
for (let [cChar, jAddr] of this._getCharArcs(iAddr)) {
if (char_player.d1to1.gl_get(cCurrent, cCurrent).indexOf(cChar) != -1) {
this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, jAddr, sNewWord+cChar);
}
else if (!bAvoidLoop) {
if (nMaxHardRepl) {
this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl-1, nMaxJump, nDeep+1, jAddr, sNewWord+cChar, true);
}
if (nMaxJump) {
this._suggest(oSuggResult, sRemain, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump-1, nDeep+1, jAddr, sNewWord+cChar, true);
}
}
}
if (!bAvoidLoop) { // avoid infinite loop
if (sRemain.length > 1) {
if (cCurrent == sRemain.slice(1, 2)) {
// same char, we remove 1 char without adding 1 to <sNewWord>
this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord);
}
else {
// switching chars
if (nMaxSwitch > 0) {
this._suggest(oSuggResult, sRemain.slice(1, 2)+sRemain.slice(0, 1)+sRemain.slice(2), nMaxSwitch-1, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, true);
}
// delete char
if (nMaxDel > 0) {
this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel-1, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, true);
}
}
// Phonetic replacements
for (let sRepl of char_player.get1toXReplacement(sNewWord.slice(-1), cCurrent, sRemain.slice(1,2))) {
this._suggest(oSuggResult, sRepl + sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, true);
}
for (let sRepl of char_player.d2toX.gl_get(sRemain.slice(0, 2), [])) {
this._suggest(oSuggResult, sRepl + sRemain.slice(2), nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, true);
}
}
// end of word
if (sRemain.length == 2) {
for (let sRepl of char_player.dFinal2.gl_get(sRemain, [])) {
this._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, true);
}
}
else if (sRemain.length == 1) {
this._suggest(oSuggResult, "", nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, true); // remove last char and go on
for (let sRepl of char_player.dFinal1.gl_get(sRemain, [])) {
this._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, true);
}
}
}
}
* _getCharArcs (iAddr) {
// generator: yield all chars and addresses from node at address <iAddr>
|
| ︙ | ︙ |
Modified graphspell/ibdawg.py from [e811d4895f] to [cffe6cd67c].
| ︙ | ︙ | |||
282 283 284 285 286 287 288 289 |
def suggest (self, sWord, nSuggLimit=10):
"returns a set of suggestions for <sWord>"
sWord = cp.spellingNormalization(sWord)
sPfx, sWord, sSfx = cp.cut(sWord)
nMaxSwitch = max(len(sWord) // 3, 1)
nMaxDel = len(sWord) // 5
nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
oSuggResult = SuggResult(sWord)
| > | | | | | | > | > > | | | | | | | | | 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 |
def suggest (self, sWord, nSuggLimit=10):
"returns a set of suggestions for <sWord>"
sWord = cp.spellingNormalization(sWord)
sPfx, sWord, sSfx = cp.cut(sWord)
nMaxSwitch = max(len(sWord) // 3, 1)
nMaxDel = len(sWord) // 5
nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
nMaxJump = max(len(sWord) // 4, 1)
oSuggResult = SuggResult(sWord)
self._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump)
if sWord.istitle():
self._suggest(oSuggResult, sWord.lower(), nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump)
elif sWord.islower():
self._suggest(oSuggResult, sWord.title(), nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump)
aSugg = oSuggResult.getSuggestions(nSuggLimit)
if sSfx or sPfx:
# we add what we removed
return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
return aSugg
def _suggest (self, oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nMaxJump=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
# recursive function
#logging.info((nDeep * " ") + sNewWord + ":" + sRemain)
if not sRemain:
if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
oSuggResult.addSugg(sNewWord, nDeep)
for sTail in self._getTails(iAddr):
oSuggResult.addSugg(sNewWord+sTail, nDeep)
return
cCurrent = sRemain[0:1]
for cChar, jAddr in self._getCharArcs(iAddr):
if cChar in cp.d1to1.get(cCurrent, cCurrent):
self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, jAddr, sNewWord+cChar)
elif not bAvoidLoop:
if nMaxHardRepl:
self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl-1, nMaxJump, nDeep+1, jAddr, sNewWord+cChar, True)
if nMaxJump:
self._suggest(oSuggResult, sRemain, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump-1, nDeep+1, jAddr, sNewWord+cChar, True)
if not bAvoidLoop: # avoid infinite loop
if len(sRemain) > 1:
if cCurrent == sRemain[1:2]:
# same char, we remove 1 char without adding 1 to <sNewWord>
self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord)
else:
# switching chars
if nMaxSwitch:
self._suggest(oSuggResult, sRemain[1:2]+sRemain[0:1]+sRemain[2:], nMaxSwitch-1, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, True)
# delete char
if nMaxDel:
self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel-1, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, True)
# Phonetic replacements
for sRepl in cp.get1toXReplacement(sNewWord[-1:], cCurrent, sRemain[1:2]):
self._suggest(oSuggResult, sRepl + sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, True)
for sRepl in cp.d2toX.get(sRemain[0:2], ()):
self._suggest(oSuggResult, sRepl + sRemain[2:], nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, True)
# end of word
if len(sRemain) == 2:
for sRepl in cp.dFinal2.get(sRemain, ()):
self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, True)
elif len(sRemain) == 1:
self._suggest(oSuggResult, "", nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, True) # remove last char and go on
for sRepl in cp.dFinal1.get(sRemain, ()):
self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDeep+1, iAddr, sNewWord, True)
#@timethis
def suggest2 (self, sWord, nMaxSugg=10):
"returns a set of suggestions for <sWord>"
sWord = cp.spellingNormalization(sWord)
sPfx, sWord, sSfx = cp.cut(sWord)
oSuggResult = SuggResult(sWord)
|
| ︙ | ︙ |