Overview
Comment: | [core] ibdawg: bug fixed and code cleaning |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | core |
Files: | files | file ages | folders |
SHA3-256: |
fbf59c7547496f1405546022c62cb095 |
User & Date: | olr on 2017-11-22 14:57:05 |
Other Links: | manifest | tags |
Context
2017-11-22
| ||
14:58 | [core][js] Damerau-Levenshtein distance buggy: can’t find out why -> another version check-in: 300e9c7d3d user: olr tags: trunk, core | |
14:57 | [core] ibdawg: bug fixed and code cleaning check-in: fbf59c7547 user: olr tags: trunk, core | |
2017-11-21
| ||
17:54 | [core] suggestion engine: word simplification check-in: fa01465ba8 user: olr tags: trunk, core | |
Changes
Modified gc_core/js/char_player.js from [2d3bad14c9] to [c9b14a8774].
︙ | ︙ | |||
12 13 14 15 16 17 18 | ['ä', 'a'], ['ê', 'e'], ['í', 'i'], ['ó', 'o'], ['ü', 'u'], ['ý', 'i'], ['á', 'a'], ['ë', 'e'], ['ì', 'i'], ['ò', 'o'], ['ú', 'u'], ['ỳ', 'i'], ['ā', 'a'], ['ē', 'e'], ['ī', 'i'], ['ō', 'o'], ['ū', 'u'], ['ȳ', 'i'], ['ñ', 'n'], ['k', 'q'], ['w', 'v'], ['œ', 'oe'], ['æ', 'ae'], ]), | | | | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | ['ä', 'a'], ['ê', 'e'], ['í', 'i'], ['ó', 'o'], ['ü', 'u'], ['ý', 'i'], ['á', 'a'], ['ë', 'e'], ['ì', 'i'], ['ò', 'o'], ['ú', 'u'], ['ỳ', 'i'], ['ā', 'a'], ['ē', 'e'], ['ī', 'i'], ['ō', 'o'], ['ū', 'u'], ['ȳ', 'i'], ['ñ', 'n'], ['k', 'q'], ['w', 'v'], ['œ', 'oe'], ['æ', 'ae'], ]), simplifyWord: function (sWord) { // word simplication before calculating distance between words sWord = sWord.toLowerCase(); let sNewWord = ""; let i = 1; for (let c of sWord) { let cNew = this._dTransChars.gl_get(c, c); let cNext = sWord.slice(i, i+1) if (cNew != this._dTransChars.gl_get(cNext, cNext)) { sNewWord += cNew; } i++; } return sNewWord.replace(/eau/g, "o").replace(/au/g, "o").replace(/ai/g, "e").replace(/ei/g, "e").replace(/ph/g, "f"); }, aVowel: new Set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ"), aConsonant: new Set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ"), aDouble: new Set("bcdfjklmnprstzBCDFJKLMNPRSTZ"), // letters that may be used twice successively |
︙ | ︙ |
Modified gc_core/js/ibdawg.js from [0055aa4ed5] to [c1be157216].
︙ | ︙ | |||
19 20 21 22 23 24 25 | class SuggResult { // Structure for storing, classifying and filtering suggestions constructor (sWord, nDistLimit=-1) { this.sWord = sWord; | | | | | 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | class SuggResult { // Structure for storing, classifying and filtering suggestions constructor (sWord, nDistLimit=-1) { this.sWord = sWord; this.sSimplifiedWord = char_player.simplifyWord(sWord); this.nDistLimit = (nDistLimit >= 0) ? nDistLimit : Math.floor(sWord.length / 3) + 1; this.nMinDist = 1000; this.aSugg = new Set(); this.dSugg = new Map([ [0, []], [1, []], [2, []] ]); } addSugg (sSugg, nDeep=0) { // add a suggestion if (!this.aSugg.has(sSugg)) { let nDist = str_transform.distanceDamerauLevenshtein(this.sSimplifiedWord, char_player.simplifyWord(sSugg)); if (nDist <= this.nDistLimit) { if (!this.dSugg.has(nDist)) { this.dSugg.set(nDist, []); } this.dSugg.get(nDist).push(sSugg); this.aSugg.add(sSugg); if (nDist < this.nMinDist) { |
︙ | ︙ | |||
247 248 249 250 251 252 253 | if (sWord.gl_isUpperCase() && sWord.length > 1) { l = l.concat(this.morph(sWord.gl_toCapitalize())); } } return l; } | | | | 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 | if (sWord.gl_isUpperCase() && sWord.length > 1) { l = l.concat(this.morph(sWord.gl_toCapitalize())); } } return l; } suggest (sWord, nSuggLimit=10) { // returns a array of suggestions for <sWord> let sPfx = ""; let sSfx = ""; [sPfx, sWord, sSfx] = char_player.cut(sWord); let nMaxSwitch = Math.max(Math.floor(sWord.length / 3), 1); let nMaxDel = Math.floor(sWord.length / 5); let nMaxHardRepl = Math.max(Math.floor((sWord.length - 5) / 4), 1); let oSuggResult = new SuggResult(sWord); this._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl); if (sWord.gl_isTitle()) { this._suggest(oSuggResult, sWord.toLowerCase(), nMaxSwitch, nMaxDel, nMaxHardRepl); } else if (sWord.gl_isLowerCase()) { this._suggest(oSuggResult, sWord.gl_toCapitalize(), nMaxSwitch, nMaxDel, nMaxHardRepl); } let aSugg = oSuggResult.getSuggestions(nSuggLimit); if (sSfx || sPfx) { // we add what we removed return aSugg.map( (sSugg) => { return sPfx + sSugg + sSfx } ); } return aSugg; } |
︙ | ︙ | |||
285 286 287 288 289 290 291 | for (let sTail of this._getTails(iAddr)) { oSuggResult.addSugg(sNewWord+sTail); } return; } let cCurrent = sRemain.slice(0, 1); for (let [cChar, jAddr] of this._getCharArcs(iAddr)) { | | | 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 | for (let sTail of this._getTails(iAddr)) { oSuggResult.addSugg(sNewWord+sTail); } return; } let cCurrent = sRemain.slice(0, 1); for (let [cChar, jAddr] of this._getCharArcs(iAddr)) { if (char_player.d1to1.gl_get(cCurrent, cCurrent).indexOf(cChar) != -1) { this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, jAddr, sNewWord+cChar); } else if (!bAvoidLoop && nMaxHardRepl) { this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl-1, nDeep+1, jAddr, sNewWord+cChar, true); } } if (!bAvoidLoop) { // avoid infinite loop |
︙ | ︙ |
Modified gc_core/js/str_transform.js from [e70f6ede55] to [9dddadeae1].
︙ | ︙ | |||
30 31 32 33 34 35 36 | matrix[i-1][j-1] + nCost // Substitution ); if (i > 1 && j > 1 && s1[i] == s2[j-1] && s1[i-1] == s2[j]) { matrix[i][j] = Math.min(matrix[i][j], matrix[i-2][j-2] + nCost); // Transposition } } } | < | 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | matrix[i-1][j-1] + nCost // Substitution ); if (i > 1 && j > 1 && s1[i] == s2[j-1] && s1[i-1] == s2[j]) { matrix[i][j] = Math.min(matrix[i][j], matrix[i-2][j-2] + nCost); // Transposition } } } return matrix[nLen1][nLen2]; } catch (e) { helpers.logerror(e); } }, |
︙ | ︙ |
Modified gc_core/py/char_player.py from [a33649b94f] to [82e97eae54].
︙ | ︙ | |||
10 11 12 13 14 15 16 | 'ä': 'a', 'ê': 'e', 'í': 'i', 'ó': 'o', 'ü': 'u', 'ý': 'i', 'á': 'a', 'ë': 'e', 'ì': 'i', 'ò': 'o', 'ú': 'u', 'ỳ': 'i', 'ā': 'a', 'ē': 'e', 'ī': 'i', 'ō': 'o', 'ū': 'u', 'ȳ': 'i', 'ñ': 'n', 'k': 'q', 'w': 'v', 'œ': 'oe', 'æ': 'ae', }) | | | 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | 'ä': 'a', 'ê': 'e', 'í': 'i', 'ó': 'o', 'ü': 'u', 'ý': 'i', 'á': 'a', 'ë': 'e', 'ì': 'i', 'ò': 'o', 'ú': 'u', 'ỳ': 'i', 'ā': 'a', 'ē': 'e', 'ī': 'i', 'ō': 'o', 'ū': 'u', 'ȳ': 'i', 'ñ': 'n', 'k': 'q', 'w': 'v', 'œ': 'oe', 'æ': 'ae', }) def simplifyWord (sWord): "word simplication before calculating distance between words" sWord = sWord.lower().translate(_xTransChars) sNewWord = "" for i, c in enumerate(sWord, 1): if c != sWord[i:i+1]: sNewWord += c return sNewWord.replace("eau", "o").replace("au", "o").replace("ai", "e").replace("ei", "e").replace("ph", "f") |
︙ | ︙ |
Modified gc_core/py/ibdawg.py from [4376dde7d7] to [0bb28ceca0].
︙ | ︙ | |||
28 29 30 31 32 33 34 | class SuggResult: """Structure for storing, classifying and filtering suggestions""" def __init__ (self, sWord, nDistLimit=-1): self.sWord = sWord | | | | | 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | class SuggResult: """Structure for storing, classifying and filtering suggestions""" def __init__ (self, sWord, nDistLimit=-1): self.sWord = sWord self.sSimplifiedWord = cp.simplifyWord(sWord) self.nDistLimit = nDistLimit if nDistLimit >= 0 else (len(sWord) // 3) + 1 self.nMinDist = 1000 self.aSugg = set() self.dSugg = { 0: [], 1: [], 2: [] } def addSugg (self, sSugg, nDeep=0): "add a suggestion" #logging.info((nDeep * " ") + "__" + sSugg + "__") if sSugg not in self.aSugg: nDist = st.distanceDamerauLevenshtein(self.sSimplifiedWord, cp.simplifyWord(sSugg)) if nDist <= self.nDistLimit: if nDist not in self.dSugg: self.dSugg[nDist] = [] self.dSugg[nDist].append(sSugg) self.aSugg.add(sSugg) if nDist < self.nMinDist: self.nMinDist = nDist |
︙ | ︙ | |||
243 244 245 246 247 248 249 | if sWord[0:1].isupper(): l.extend(self.morph(sWord.lower())) if sWord.isupper() and len(sWord) > 1: l.extend(self.morph(sWord.capitalize())) return l #@timethis | | | | 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 | if sWord[0:1].isupper(): l.extend(self.morph(sWord.lower())) if sWord.isupper() and len(sWord) > 1: l.extend(self.morph(sWord.capitalize())) return l #@timethis def suggest (self, sWord, nSuggLimit=10): "returns a set of suggestions for <sWord>" sPfx, sWord, sSfx = cp.cut(sWord) nMaxSwitch = max(len(sWord) // 3, 1) nMaxDel = len(sWord) // 5 nMaxHardRepl = max((len(sWord) - 5) // 4, 1) oSuggResult = SuggResult(sWord) self._suggest(oSuggResult, sWord, nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl) if sWord.istitle(): self._suggest(oSuggResult, sWord.lower(), nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl) elif sWord.islower(): self._suggest(oSuggResult, sWord.title(), nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl) aSugg = oSuggResult.getSuggestions(nSuggLimit) if sSfx or sPfx: # we add what we removed return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg)) return aSugg def _suggest (self, oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False): # recursive function |
︙ | ︙ | |||
318 319 320 321 322 323 324 | return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg)) return aSugg def _suggest2 (self, oSuggResult, nDeep=0, iAddr=0, sNewWord=""): # recursive function #logging.info((nDeep * " ") + sNewWord) if nDeep >= oSuggResult.nDistLimit: | | | 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 | return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg)) return aSugg def _suggest2 (self, oSuggResult, nDeep=0, iAddr=0, sNewWord=""): # recursive function #logging.info((nDeep * " ") + sNewWord) if nDeep >= oSuggResult.nDistLimit: sCleanNewWord = cp.simplifyWord(sNewWord) if st.distanceSift4(oSuggResult.sCleanWord[:len(sCleanNewWord)], sCleanNewWord) > oSuggResult.nDistLimit: return if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask: oSuggResult.addSugg(sNewWord, nDeep) for cChar, jAddr in self._getCharArcsWithPriority(iAddr, oSuggResult.sWord[nDeep:nDeep+1]): self._suggest2(oSuggResult, nDeep+1, jAddr, sNewWord+cChar) return |
︙ | ︙ |
Modified make.py from [37cd0bb45e] to [2aed0ffb52].
︙ | ︙ | |||
353 354 355 356 357 358 359 | if False: # obsolete with helpers.cd("_build/xpi/"+sLang): spfFirefox = dVars['win_fx_dev_path'] if platform.system() == "Windows" else dVars['linux_fx_dev_path'] os.system('jpm run -b "' + spfFirefox + '"') if xArgs.web_ext or xArgs.firefox: | < | 353 354 355 356 357 358 359 360 361 362 363 364 365 366 | if False: # obsolete with helpers.cd("_build/xpi/"+sLang): spfFirefox = dVars['win_fx_dev_path'] if platform.system() == "Windows" else dVars['linux_fx_dev_path'] os.system('jpm run -b "' + spfFirefox + '"') if xArgs.web_ext or xArgs.firefox: with helpers.cd("_build/webext/"+sLang): if xArgs.firefox: # Firefox Developper edition spfFirefox = dVars['win_fx_dev_path'] if platform.system() == "Windows" else dVars['linux_fx_dev_path'] else: # Firefox Nightly edition spfFirefox = dVars['win_fx_nightly_path'] if platform.system() == "Windows" else dVars['linux_fx_nightly_path'] |
︙ | ︙ |