Overview
Comment: | [core] ibdawg: suggestion mechanism > reduce 1toX replacements overload (much, much faster) |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | core | spellsugg |
Files: | files | file ages | folders |
SHA3-256: |
767e396f2db4db56a9299f004eef88e9 |
User & Date: | olr on 2017-11-09 11:56:13 |
Original Comment: | [core] ibdawg: suggestion mechanisme > reduce 1toX replacements overload (much, much faster) |
Other Links: | branch diff | manifest | tags |
Context
2017-11-21
| ||
16:57 | [core] better suggestion engine Closed-Leaf check-in: 6c5050fe91 user: olr tags: core, spellsugg | |
2017-11-10
| ||
16:52 | [core] merge spellsugg: much faster suggestion engine check-in: e6e44e506c user: olr tags: trunk, core | |
2017-11-09
| ||
11:56 | [core] ibdawg: suggestion mechanism > reduce 1toX replacements overload (much, much faster) check-in: 767e396f2d user: olr tags: core, spellsugg | |
2017-11-08
| ||
21:16 | [core] ibdawg: update char_player check-in: 51e3a2e76e user: olr tags: core, spellsugg | |
Changes
Modified gc_core/js/char_player.js from [ac345212e4] to [0547b59e35].
︙ | ︙ | |||
23 24 25 26 27 28 29 | for (let c of sWord) { sRes += this._dTransChars.gl_get(c, c); } return sRes.replace("eau", "o").replace("au", "o"); }, aVowel: new Set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ"), | | | | 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | for (let c of sWord) { sRes += this._dTransChars.gl_get(c, c); } return sRes.replace("eau", "o").replace("au", "o"); }, aVowel: new Set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ"), aConsonant: new Set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ"), aDouble: new Set("bcçdfjklmnprstzBCÇDFJKLMNPRSTZ"), // letters that may be used twice successively // Similar chars d1to1: new Map([ ["1", "liîLIÎ"], ["2", "zZ"], |
︙ | ︙ | |||
160 161 162 163 164 165 166 | d1toX: new Map([ ["æ", ["ae",]], ["Æ", ["AE",]], ["b", ["bb",]], ["B", ["BB",]], ["c", ["cc", "ss", "qu", "ch"]], ["C", ["CC", "SS", "QU", "CH"]], | < < < < < < < < | 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 | d1toX: new Map([ ["æ", ["ae",]], ["Æ", ["AE",]], ["b", ["bb",]], ["B", ["BB",]], ["c", ["cc", "ss", "qu", "ch"]], ["C", ["CC", "SS", "QU", "CH"]], ["d", ["dd",]], ["D", ["DD",]], ["é", ["ai", "ei"]], ["É", ["AI", "EI"]], ["f", ["ff", "ph"]], ["F", ["FF", "PH"]], ["g", ["gu", "ge", "gg", "gh"]], ["G", ["GU", "GE", "GG", "GH"]], ["j", ["jj", "dj"]], ["J", ["JJ", "DJ"]], ["k", ["qu", "ck", "ch", "cu", "kk", "kh"]], |
︙ | ︙ | |||
205 206 207 208 209 210 211 212 213 214 215 216 217 218 | ["t", ["tt", "th"]], ["T", ["TT", "TH"]], ["x", ["cc", "ct", "xx"]], ["X", ["CC", "CT", "XX"]], ["z", ["ss", "zh"]], ["Z", ["SS", "ZH"]], ]), d2toX: new Map([ ["an", ["en",]], ["AN", ["EN",]], ["au", ["eau", "o", "ô"]], ["AU", ["EAU", "O", "Ô"]], ["en", ["an",]], | > > > > > > > | 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 | ["t", ["tt", "th"]], ["T", ["TT", "TH"]], ["x", ["cc", "ct", "xx"]], ["X", ["CC", "CT", "XX"]], ["z", ["ss", "zh"]], ["Z", ["SS", "ZH"]], ]), get1toXReplacement: function (cPrev, cCur, cNext) { if (this.aConsonant.has(cCur) && (this.aConsonant.has(cPrev) || this.aConsonant.has(cNext))) { return []; } return this.d1toX.gl_get(cCur, []); }, d2toX: new Map([ ["an", ["en",]], ["AN", ["EN",]], ["au", ["eau", "o", "ô"]], ["AU", ["EAU", "O", "Ô"]], ["en", ["an",]], |
︙ | ︙ |
Modified gc_core/js/ibdawg.js from [ca747a7a44] to [952ba094d6].
︙ | ︙ | |||
304 305 306 307 308 309 310 | } // delete char if (nMaxDel > 0) { this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true); } } // Phonetic replacements | | | 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 | } // delete char if (nMaxDel > 0) { this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true); } } // Phonetic replacements for (let sRepl of char_player.get1toXReplacement(sNewWord.slice(-1), cCurrent, sRemain.slice(1,2))) { this._suggest(oSuggResult, sRepl + sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true); } for (let sRepl of char_player.d2toX.gl_get(sRemain.slice(0, 2), [])) { this._suggest(oSuggResult, sRepl + sRemain.slice(2), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true); } // Hard replacements if (nDeep > 3 && nMaxHardRepl && sRemain.length >= 2) { |
︙ | ︙ |
Modified gc_core/py/char_player.py from [b008c1ffec] to [b0152aab01].
︙ | ︙ | |||
16 17 18 19 20 21 22 | def cleanWord (sWord): "word simplication before calculating distance between words" return sWord.lower().translate(_xTransChars).replace("eau", "o").replace("au", "o") aVowel = set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ") | | | | 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | def cleanWord (sWord): "word simplication before calculating distance between words" return sWord.lower().translate(_xTransChars).replace("eau", "o").replace("au", "o") aVowel = set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ") aConsonant = set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ") aDouble = set("bcçdfjklmnprstzBCÇDFJKLMNPRSTZ") # letters that may be used twice successively # Similar chars d1to1 = { "1": "liîLIÎ", "2": "zZ", |
︙ | ︙ | |||
153 154 155 156 157 158 159 | d1toX = { "æ": ("ae",), "Æ": ("AE",), "b": ("bb",), "B": ("BB",), "c": ("cc", "ss", "qu", "ch"), "C": ("CC", "SS", "QU", "CH"), | < < < < < < < < | 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 | d1toX = { "æ": ("ae",), "Æ": ("AE",), "b": ("bb",), "B": ("BB",), "c": ("cc", "ss", "qu", "ch"), "C": ("CC", "SS", "QU", "CH"), "d": ("dd",), "D": ("DD",), "é": ("ai", "ei"), "É": ("AI", "EI"), "f": ("ff", "ph"), "F": ("FF", "PH"), "g": ("gu", "ge", "gg", "gh"), "G": ("GU", "GE", "GG", "GH"), "j": ("jj", "dj"), "J": ("JJ", "DJ"), "k": ("qu", "ck", "ch", "cu", "kk", "kh"), |
︙ | ︙ | |||
198 199 200 201 202 203 204 205 206 207 208 209 210 211 | "t": ("tt", "th"), "T": ("TT", "TH"), "x": ("cc", "ct", "xx"), "X": ("CC", "CT", "XX"), "z": ("ss", "zh"), "Z": ("SS", "ZH"), } d2toX = { "an": ("en",), "AN": ("EN",), "au": ("eau", "o", "ô"), "AU": ("EAU", "O", "Ô"), "en": ("an",), | > > > > > > > | 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 | "t": ("tt", "th"), "T": ("TT", "TH"), "x": ("cc", "ct", "xx"), "X": ("CC", "CT", "XX"), "z": ("ss", "zh"), "Z": ("SS", "ZH"), } def get1toXReplacement (cPrev, cCur, cNext): if cCur in aConsonant and (cPrev in aConsonant or cNext in aConsonant): return () return d1toX.get(cCur, ()) d2toX = { "an": ("en",), "AN": ("EN",), "au": ("eau", "o", "ô"), "AU": ("EAU", "O", "Ô"), "en": ("an",), |
︙ | ︙ |
Modified gc_core/py/ibdawg.py from [0203105f4e] to [8ce21115dd].
︙ | ︙ | |||
287 288 289 290 291 292 293 | # switching chars if nMaxSwitch: self._suggest(oSuggResult, sRemain[1:2]+sRemain[0:1]+sRemain[2:], nMaxSwitch-1, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, "><",True) # delete char if nMaxDel: self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, "-"+cCurrent, True) # Phonetic replacements | | | 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 | # switching chars if nMaxSwitch: self._suggest(oSuggResult, sRemain[1:2]+sRemain[0:1]+sRemain[2:], nMaxSwitch-1, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, "><",True) # delete char if nMaxDel: self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, "-"+cCurrent, True) # Phonetic replacements for sRepl in cp.get1toXReplacement(sNewWord[-1:], cCurrent, sRemain[1:2]): self._suggest(oSuggResult, sRepl + sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, cCurrent+">"+sRepl, True) for sRepl in cp.d2toX.get(sRemain[0:2], ()): self._suggest(oSuggResult, sRepl + sRemain[2:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, sRemain[0:2]+">"+sRepl, True) # Hard replacements if nDeep > 3 and nMaxHardRepl: for cChar, kAddr in self._getCharArcs(iAddr): if cChar not in cp.d1to1.get(cCurrent, ""): |
︙ | ︙ |