Overview
Comment: | [core] merge spellsugg: much faster suggestion engine |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | core |
Files: | files | file ages | folders |
SHA3-256: |
e6e44e506c927b243de9359a882cf37b |
User & Date: | olr on 2017-11-10 16:52:28 |
Original Comment: | [core] marge spellsugg: much faster suggestion engine |
Other Links: | manifest | tags |
Context
2017-11-10
| ||
20:45 | [fr] autres tests pour les guillemets + commentaires check-in: e3c4cc6975 user: olr tags: trunk, fr | |
16:52 | [core] merge spellsugg: much faster suggestion engine check-in: e6e44e506c user: olr tags: trunk, core | |
16:49 | [fr] remove remaining crap check-in: 6ee59d86c1 user: olr tags: trunk, fr | |
2017-11-09
| ||
11:56 | [core] ibdawg: suggestion mechanism > reduce 1toX replacements overload (much, much faster) check-in: 767e396f2d user: olr tags: core, spellsugg | |
Changes
Modified gc_core/js/char_player.js from [09a4bffb3f] to [0547b59e35].
︙ | ︙ | |||
22 23 24 25 26 27 28 29 30 31 32 33 34 35 | let sRes = ""; for (let c of sWord) { sRes += this._dTransChars.gl_get(c, c); } return sRes.replace("eau", "o").replace("au", "o"); }, // Similar chars d1to1: new Map([ ["1", "liîLIÎ"], ["2", "zZ"], ["3", "eéèêEÉÈÊ"], | > > > > | 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | let sRes = ""; for (let c of sWord) { sRes += this._dTransChars.gl_get(c, c); } return sRes.replace("eau", "o").replace("au", "o"); }, aVowel: new Set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ"), aConsonant: new Set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ"), aDouble: new Set("bcçdfjklmnprstzBCÇDFJKLMNPRSTZ"), // letters that may be used twice successively // Similar chars d1to1: new Map([ ["1", "liîLIÎ"], ["2", "zZ"], ["3", "eéèêEÉÈÊ"], |
︙ | ︙ | |||
156 157 158 159 160 161 162 | d1toX: new Map([ ["æ", ["ae",]], ["Æ", ["AE",]], ["b", ["bb",]], ["B", ["BB",]], ["c", ["cc", "ss", "qu", "ch"]], ["C", ["CC", "SS", "QU", "CH"]], | < < > > < < | 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 | d1toX: new Map([ ["æ", ["ae",]], ["Æ", ["AE",]], ["b", ["bb",]], ["B", ["BB",]], ["c", ["cc", "ss", "qu", "ch"]], ["C", ["CC", "SS", "QU", "CH"]], ["d", ["dd",]], ["D", ["DD",]], ["é", ["ai", "ei"]], ["É", ["AI", "EI"]], ["f", ["ff", "ph"]], ["F", ["FF", "PH"]], ["g", ["gu", "ge", "gg", "gh"]], ["G", ["GU", "GE", "GG", "GH"]], ["j", ["jj", "dj"]], ["J", ["JJ", "DJ"]], ["k", ["qu", "ck", "ch", "cu", "kk", "kh"]], ["K", ["QU", "CK", "CH", "CU", "KK", "KH"]], ["l", ["ll",]], ["L", ["LL",]], ["m", ["mm", "mn"]], |
︙ | ︙ | |||
195 196 197 198 199 200 201 202 203 204 205 206 207 208 | ["t", ["tt", "th"]], ["T", ["TT", "TH"]], ["x", ["cc", "ct", "xx"]], ["X", ["CC", "CT", "XX"]], ["z", ["ss", "zh"]], ["Z", ["SS", "ZH"]], ]), d2toX: new Map([ ["an", ["en",]], ["AN", ["EN",]], ["au", ["eau", "o", "ô"]], ["AU", ["EAU", "O", "Ô"]], ["en", ["an",]], | > > > > > > > | 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 | ["t", ["tt", "th"]], ["T", ["TT", "TH"]], ["x", ["cc", "ct", "xx"]], ["X", ["CC", "CT", "XX"]], ["z", ["ss", "zh"]], ["Z", ["SS", "ZH"]], ]), get1toXReplacement: function (cPrev, cCur, cNext) { if (this.aConsonant.has(cCur) && (this.aConsonant.has(cPrev) || this.aConsonant.has(cNext))) { return []; } return this.d1toX.gl_get(cCur, []); }, d2toX: new Map([ ["an", ["en",]], ["AN", ["EN",]], ["au", ["eau", "o", "ô"]], ["AU", ["EAU", "O", "Ô"]], ["en", ["an",]], |
︙ | ︙ |
Modified gc_core/js/ibdawg.js from [ca747a7a44] to [952ba094d6].
︙ | ︙ | |||
304 305 306 307 308 309 310 | } // delete char if (nMaxDel > 0) { this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true); } } // Phonetic replacements | | | 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 | } // delete char if (nMaxDel > 0) { this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true); } } // Phonetic replacements for (let sRepl of char_player.get1toXReplacement(sNewWord.slice(-1), cCurrent, sRemain.slice(1,2))) { this._suggest(oSuggResult, sRepl + sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true); } for (let sRepl of char_player.d2toX.gl_get(sRemain.slice(0, 2), [])) { this._suggest(oSuggResult, sRepl + sRemain.slice(2), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true); } // Hard replacements if (nDeep > 3 && nMaxHardRepl && sRemain.length >= 2) { |
︙ | ︙ |
Modified gc_core/py/char_player.py from [aea8dd1016] to [b0152aab01].
︙ | ︙ | |||
14 15 16 17 18 19 20 21 22 23 24 25 26 27 | 'œ': 'oe', 'æ': 'ae', }) def cleanWord (sWord): "word simplication before calculating distance between words" return sWord.lower().translate(_xTransChars).replace("eau", "o").replace("au", "o") # Similar chars d1to1 = { "1": "liîLIÎ", "2": "zZ", "3": "eéèêEÉÈÊ", | > > > > > | 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | 'œ': 'oe', 'æ': 'ae', }) def cleanWord (sWord): "word simplication before calculating distance between words" return sWord.lower().translate(_xTransChars).replace("eau", "o").replace("au", "o") aVowel = set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ") aConsonant = set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ") aDouble = set("bcçdfjklmnprstzBCÇDFJKLMNPRSTZ") # letters that may be used twice successively # Similar chars d1to1 = { "1": "liîLIÎ", "2": "zZ", "3": "eéèêEÉÈÊ", |
︙ | ︙ | |||
148 149 150 151 152 153 154 | d1toX = { "æ": ("ae",), "Æ": ("AE",), "b": ("bb",), "B": ("BB",), "c": ("cc", "ss", "qu", "ch"), "C": ("CC", "SS", "QU", "CH"), | < < > > < < | 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 | d1toX = { "æ": ("ae",), "Æ": ("AE",), "b": ("bb",), "B": ("BB",), "c": ("cc", "ss", "qu", "ch"), "C": ("CC", "SS", "QU", "CH"), "d": ("dd",), "D": ("DD",), "é": ("ai", "ei"), "É": ("AI", "EI"), "f": ("ff", "ph"), "F": ("FF", "PH"), "g": ("gu", "ge", "gg", "gh"), "G": ("GU", "GE", "GG", "GH"), "j": ("jj", "dj"), "J": ("JJ", "DJ"), "k": ("qu", "ck", "ch", "cu", "kk", "kh"), "K": ("QU", "CK", "CH", "CU", "KK", "KH"), "l": ("ll",), "L": ("LL",), "m": ("mm", "mn"), |
︙ | ︙ | |||
187 188 189 190 191 192 193 194 195 196 197 198 199 200 | "t": ("tt", "th"), "T": ("TT", "TH"), "x": ("cc", "ct", "xx"), "X": ("CC", "CT", "XX"), "z": ("ss", "zh"), "Z": ("SS", "ZH"), } d2toX = { "an": ("en",), "AN": ("EN",), "au": ("eau", "o", "ô"), "AU": ("EAU", "O", "Ô"), "en": ("an",), | > > > > > > > | 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 | "t": ("tt", "th"), "T": ("TT", "TH"), "x": ("cc", "ct", "xx"), "X": ("CC", "CT", "XX"), "z": ("ss", "zh"), "Z": ("SS", "ZH"), } def get1toXReplacement (cPrev, cCur, cNext): if cCur in aConsonant and (cPrev in aConsonant or cNext in aConsonant): return () return d1toX.get(cCur, ()) d2toX = { "an": ("en",), "AN": ("EN",), "au": ("eau", "o", "ô"), "AU": ("EAU", "O", "Ô"), "en": ("an",), |
︙ | ︙ |
Modified gc_core/py/ibdawg.py from [dc355a8e9b] to [846d5dd677].
︙ | ︙ | |||
286 287 288 289 290 291 292 | # switching chars if nMaxSwitch: self._suggest(oSuggResult, sRemain[1:2]+sRemain[0:1]+sRemain[2:], nMaxSwitch-1, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, "><",True) # delete char if nMaxDel: self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, "-"+cCurrent, True) # Phonetic replacements | | | 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 | # switching chars if nMaxSwitch: self._suggest(oSuggResult, sRemain[1:2]+sRemain[0:1]+sRemain[2:], nMaxSwitch-1, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, "><",True) # delete char if nMaxDel: self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, "-"+cCurrent, True) # Phonetic replacements for sRepl in cp.get1toXReplacement(sNewWord[-1:], cCurrent, sRemain[1:2]): self._suggest(oSuggResult, sRepl + sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, cCurrent+">"+sRepl, True) for sRepl in cp.d2toX.get(sRemain[0:2], ()): self._suggest(oSuggResult, sRepl + sRemain[2:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, sRemain[0:2]+">"+sRepl, True) # Hard replacements if nDeep > 3 and nMaxHardRepl: for cChar, kAddr in self._getCharArcs(iAddr): if cChar not in cp.d1to1.get(cCurrent, ""): |
︙ | ︙ |