Overview
| Comment: | [core] ibdawg: suggestion mechanism > reduce 1toX replacements overload (much, much faster) |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | core | spellsugg |
| Files: | files | file ages | folders |
| SHA3-256: |
767e396f2db4db56a9299f004eef88e9 |
| User & Date: | olr on 2017-11-09 11:56:13 |
| Original Comment: | [core] ibdawg: suggestion mechanisme > reduce 1toX replacements overload (much, much faster) |
| Other Links: | branch diff | manifest | tags |
Context
|
2017-11-21
| ||
| 16:57 | [core] better suggestion engine Closed-Leaf check-in: 6c5050fe91 user: olr tags: core, spellsugg | |
|
2017-11-10
| ||
| 16:52 | [core] merge spellsugg: much faster suggestion engine check-in: e6e44e506c user: olr tags: trunk, core | |
|
2017-11-09
| ||
| 11:56 | [core] ibdawg: suggestion mechanism > reduce 1toX replacements overload (much, much faster) check-in: 767e396f2d user: olr tags: core, spellsugg | |
|
2017-11-08
| ||
| 21:16 | [core] ibdawg: update char_player check-in: 51e3a2e76e user: olr tags: core, spellsugg | |
Changes
Modified gc_core/js/char_player.js from [ac345212e4] to [0547b59e35].
| ︙ | ︙ | |||
23 24 25 26 27 28 29 |
for (let c of sWord) {
sRes += this._dTransChars.gl_get(c, c);
}
return sRes.replace("eau", "o").replace("au", "o");
},
aVowel: new Set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ"),
| | | | 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
for (let c of sWord) {
sRes += this._dTransChars.gl_get(c, c);
}
return sRes.replace("eau", "o").replace("au", "o");
},
aVowel: new Set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ"),
aConsonant: new Set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ"),
aDouble: new Set("bcçdfjklmnprstzBCÇDFJKLMNPRSTZ"), // letters that may be used twice successively
// Similar chars
d1to1: new Map([
["1", "liîLIÎ"],
["2", "zZ"],
|
| ︙ | ︙ | |||
160 161 162 163 164 165 166 |
d1toX: new Map([
["æ", ["ae",]],
["Æ", ["AE",]],
["b", ["bb",]],
["B", ["BB",]],
["c", ["cc", "ss", "qu", "ch"]],
["C", ["CC", "SS", "QU", "CH"]],
| < < < < < < < < | 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
d1toX: new Map([
["æ", ["ae",]],
["Æ", ["AE",]],
["b", ["bb",]],
["B", ["BB",]],
["c", ["cc", "ss", "qu", "ch"]],
["C", ["CC", "SS", "QU", "CH"]],
["d", ["dd",]],
["D", ["DD",]],
["é", ["ai", "ei"]],
["É", ["AI", "EI"]],
["f", ["ff", "ph"]],
["F", ["FF", "PH"]],
["g", ["gu", "ge", "gg", "gh"]],
["G", ["GU", "GE", "GG", "GH"]],
["j", ["jj", "dj"]],
["J", ["JJ", "DJ"]],
["k", ["qu", "ck", "ch", "cu", "kk", "kh"]],
|
| ︙ | ︙ | |||
205 206 207 208 209 210 211 212 213 214 215 216 217 218 |
["t", ["tt", "th"]],
["T", ["TT", "TH"]],
["x", ["cc", "ct", "xx"]],
["X", ["CC", "CT", "XX"]],
["z", ["ss", "zh"]],
["Z", ["SS", "ZH"]],
]),
d2toX: new Map([
["an", ["en",]],
["AN", ["EN",]],
["au", ["eau", "o", "ô"]],
["AU", ["EAU", "O", "Ô"]],
["en", ["an",]],
| > > > > > > > | 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
["t", ["tt", "th"]],
["T", ["TT", "TH"]],
["x", ["cc", "ct", "xx"]],
["X", ["CC", "CT", "XX"]],
["z", ["ss", "zh"]],
["Z", ["SS", "ZH"]],
]),
get1toXReplacement: function (cPrev, cCur, cNext) {
if (this.aConsonant.has(cCur) && (this.aConsonant.has(cPrev) || this.aConsonant.has(cNext))) {
return [];
}
return this.d1toX.gl_get(cCur, []);
},
d2toX: new Map([
["an", ["en",]],
["AN", ["EN",]],
["au", ["eau", "o", "ô"]],
["AU", ["EAU", "O", "Ô"]],
["en", ["an",]],
|
| ︙ | ︙ |
Modified gc_core/js/ibdawg.js from [ca747a7a44] to [952ba094d6].
| ︙ | ︙ | |||
304 305 306 307 308 309 310 |
}
// delete char
if (nMaxDel > 0) {
this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true);
}
}
// Phonetic replacements
| | | 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 |
}
// delete char
if (nMaxDel > 0) {
this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true);
}
}
// Phonetic replacements
for (let sRepl of char_player.get1toXReplacement(sNewWord.slice(-1), cCurrent, sRemain.slice(1,2))) {
this._suggest(oSuggResult, sRepl + sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true);
}
for (let sRepl of char_player.d2toX.gl_get(sRemain.slice(0, 2), [])) {
this._suggest(oSuggResult, sRepl + sRemain.slice(2), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true);
}
// Hard replacements
if (nDeep > 3 && nMaxHardRepl && sRemain.length >= 2) {
|
| ︙ | ︙ |
Modified gc_core/py/char_player.py from [b008c1ffec] to [b0152aab01].
| ︙ | ︙ | |||
16 17 18 19 20 21 22 |
def cleanWord (sWord):
"word simplication before calculating distance between words"
return sWord.lower().translate(_xTransChars).replace("eau", "o").replace("au", "o")
aVowel = set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ")
| | | | 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
def cleanWord (sWord):
"word simplication before calculating distance between words"
return sWord.lower().translate(_xTransChars).replace("eau", "o").replace("au", "o")
aVowel = set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ")
aConsonant = set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ")
aDouble = set("bcçdfjklmnprstzBCÇDFJKLMNPRSTZ") # letters that may be used twice successively
# Similar chars
d1to1 = {
"1": "liîLIÎ",
"2": "zZ",
|
| ︙ | ︙ | |||
153 154 155 156 157 158 159 |
d1toX = {
"æ": ("ae",),
"Æ": ("AE",),
"b": ("bb",),
"B": ("BB",),
"c": ("cc", "ss", "qu", "ch"),
"C": ("CC", "SS", "QU", "CH"),
| < < < < < < < < | 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
d1toX = {
"æ": ("ae",),
"Æ": ("AE",),
"b": ("bb",),
"B": ("BB",),
"c": ("cc", "ss", "qu", "ch"),
"C": ("CC", "SS", "QU", "CH"),
"d": ("dd",),
"D": ("DD",),
"é": ("ai", "ei"),
"É": ("AI", "EI"),
"f": ("ff", "ph"),
"F": ("FF", "PH"),
"g": ("gu", "ge", "gg", "gh"),
"G": ("GU", "GE", "GG", "GH"),
"j": ("jj", "dj"),
"J": ("JJ", "DJ"),
"k": ("qu", "ck", "ch", "cu", "kk", "kh"),
|
| ︙ | ︙ | |||
198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
"t": ("tt", "th"),
"T": ("TT", "TH"),
"x": ("cc", "ct", "xx"),
"X": ("CC", "CT", "XX"),
"z": ("ss", "zh"),
"Z": ("SS", "ZH"),
}
d2toX = {
"an": ("en",),
"AN": ("EN",),
"au": ("eau", "o", "ô"),
"AU": ("EAU", "O", "Ô"),
"en": ("an",),
| > > > > > > > | 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
"t": ("tt", "th"),
"T": ("TT", "TH"),
"x": ("cc", "ct", "xx"),
"X": ("CC", "CT", "XX"),
"z": ("ss", "zh"),
"Z": ("SS", "ZH"),
}
def get1toXReplacement (cPrev, cCur, cNext):
if cCur in aConsonant and (cPrev in aConsonant or cNext in aConsonant):
return ()
return d1toX.get(cCur, ())
d2toX = {
"an": ("en",),
"AN": ("EN",),
"au": ("eau", "o", "ô"),
"AU": ("EAU", "O", "Ô"),
"en": ("an",),
|
| ︙ | ︙ |
Modified gc_core/py/ibdawg.py from [0203105f4e] to [8ce21115dd].
| ︙ | ︙ | |||
287 288 289 290 291 292 293 |
# switching chars
if nMaxSwitch:
self._suggest(oSuggResult, sRemain[1:2]+sRemain[0:1]+sRemain[2:], nMaxSwitch-1, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, "><",True)
# delete char
if nMaxDel:
self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, "-"+cCurrent, True)
# Phonetic replacements
| | | 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 |
# switching chars
if nMaxSwitch:
self._suggest(oSuggResult, sRemain[1:2]+sRemain[0:1]+sRemain[2:], nMaxSwitch-1, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, "><",True)
# delete char
if nMaxDel:
self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, "-"+cCurrent, True)
# Phonetic replacements
for sRepl in cp.get1toXReplacement(sNewWord[-1:], cCurrent, sRemain[1:2]):
self._suggest(oSuggResult, sRepl + sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, cCurrent+">"+sRepl, True)
for sRepl in cp.d2toX.get(sRemain[0:2], ()):
self._suggest(oSuggResult, sRepl + sRemain[2:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, sRemain[0:2]+">"+sRepl, True)
# Hard replacements
if nDeep > 3 and nMaxHardRepl:
for cChar, kAddr in self._getCharArcs(iAddr):
if cChar not in cp.d1to1.get(cCurrent, ""):
|
| ︙ | ︙ |