Overview
Comment: | [core] ibdawg: suggestion mechanism > exclude some suffixes (ß) |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | core |
Files: | files | file ages | folders |
SHA3-256: |
5fbb7ec853c196c128eccdc07a189068 |
User & Date: | olr on 2017-10-24 22:24:15 |
Other Links: | manifest | tags |
Context
2017-10-25
| ||
09:41 | [core] ibdawg: suggestion mechanism > split word function check-in: 388e8809cf user: olr tags: trunk, core | |
2017-10-24
| ||
22:24 | [core] ibdawg: suggestion mechanism > exclude some suffixes (ß) check-in: 5fbb7ec853 user: olr tags: trunk, core | |
12:05 | [core] ibdawg: reduce hard replacements in suggestion mechanism check-in: 35abf9fb76 user: olr tags: trunk, core | |
Changes
Modified gc_core/js/char_player.js from [cdf575879a] to [56bb998588].
︙ | ︙ | |||
322 323 324 325 326 327 328 329 330 331 332 333 334 335 | aPfx1: new Set([ "anti", "archi", "contre", "hyper", "mé", "méta", "im", "in", "ir", "par", "proto", "pseudo", "pré", "re", "ré", "sans", "sous", "supra", "sur", "ultra" ]), aPfx2: new Set([ "belgo", "franco", "génito", "gynéco", "médico", "russo" ]) } | > > > > | 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 | aPfx1: new Set([ "anti", "archi", "contre", "hyper", "mé", "méta", "im", "in", "ir", "par", "proto", "pseudo", "pré", "re", "ré", "sans", "sous", "supra", "sur", "ultra" ]), aPfx2: new Set([ "belgo", "franco", "génito", "gynéco", "médico", "russo" ]), aExcludedSfx: new Set([ "je", "tu", "il", "elle", "on", "t-il", "t-elle", "t-on", "nous", "vous", "ils", "elles" ]) } |
︙ | ︙ |
Modified gc_core/js/ibdawg.js from [330301cb59] to [f9dd570de6].
︙ | ︙ | |||
186 187 188 189 190 191 192 193 194 195 196 197 198 199 | } } return l; } suggest (sWord, nMaxSugg=10) { // returns a array of suggestions for <sWord> let nMaxDel = Math.floor(sWord.length / 5); let nMaxHardRepl = Math.max(Math.floor((sWord.length - 5) / 4), 1); let aSugg = this._suggest(sWord, nMaxDel, nMaxHardRepl); if (sWord.gl_isTitle()) { aSugg.gl_update(this._suggest(sWord.toLowerCase(), nMaxDel, nMaxHardRepl)); } else if (sWord.gl_isLowerCase()) { | > > > > > > > > | 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 | } } return l; } suggest (sWord, nMaxSugg=10) { // returns a array of suggestions for <sWord> let sAdd = ""; if (sWord.includes("-")) { let nLastHyphenPos = sWord.lastIndexOf("-"); if (char_player.aExcludedSfx.has(sWord.slice(nLastHyphenPos+1))) { sAdd = sWord.slice(nLastHyphenPos); sWord = sWord.slice(0, nLastHyphenPos); } } let nMaxDel = Math.floor(sWord.length / 5); let nMaxHardRepl = Math.max(Math.floor((sWord.length - 5) / 4), 1); let aSugg = this._suggest(sWord, nMaxDel, nMaxHardRepl); if (sWord.gl_isTitle()) { aSugg.gl_update(this._suggest(sWord.toLowerCase(), nMaxDel, nMaxHardRepl)); } else if (sWord.gl_isLowerCase()) { |
︙ | ︙ | |||
208 209 210 211 212 213 214 215 216 217 218 219 220 221 | if (sWord.gl_isTitle()) { aSugg = aSugg.map((sSugg) => { return sSugg.gl_toCapitalize(); }); } let dDistTemp = new Map(); aSugg.forEach((sSugg) => { dDistTemp.set(sSugg, char_player.distanceDamerauLevenshtein(sWord, sSugg)); }); aSugg = aSugg.sort((sA, sB) => { return dDistTemp.get(sA) - dDistTemp.get(sB); }).slice(0, nMaxSugg); dDistTemp.clear(); return aSugg; } _suggest (sRemain, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=false) { // returns a set of suggestions // recursive function let aSugg = new Set(); | > > > > | 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 | if (sWord.gl_isTitle()) { aSugg = aSugg.map((sSugg) => { return sSugg.gl_toCapitalize(); }); } let dDistTemp = new Map(); aSugg.forEach((sSugg) => { dDistTemp.set(sSugg, char_player.distanceDamerauLevenshtein(sWord, sSugg)); }); aSugg = aSugg.sort((sA, sB) => { return dDistTemp.get(sA) - dDistTemp.get(sB); }).slice(0, nMaxSugg); dDistTemp.clear(); if (sAdd) { // we add what we removed return aSugg.map( (sSugg) => { return sSugg + sAdd } ); } return aSugg; } _suggest (sRemain, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=false) { // returns a set of suggestions // recursive function let aSugg = new Set(); |
︙ | ︙ |
Modified gc_core/py/char_player.py from [c41be1a0f8] to [e5dd8880c3].
︙ | ︙ | |||
305 306 307 308 309 310 311 | aPfx1 = frozenset([ "anti", "archi", "contre", "hyper", "mé", "méta", "im", "in", "ir", "par", "proto", "pseudo", "pré", "re", "ré", "sans", "sous", "supra", "sur", "ultra" ]) aPfx2 = frozenset([ "belgo", "franco", "génito", "gynéco", "médico", "russo" ]) | > > > > | 305 306 307 308 309 310 311 312 313 314 315 | aPfx1 = frozenset([ "anti", "archi", "contre", "hyper", "mé", "méta", "im", "in", "ir", "par", "proto", "pseudo", "pré", "re", "ré", "sans", "sous", "supra", "sur", "ultra" ]) aPfx2 = frozenset([ "belgo", "franco", "génito", "gynéco", "médico", "russo" ]) aExcludedSfx = frozenset([ "je", "tu", "il", "elle", "on", "t-il", "t-elle", "t-on", "nous", "vous", "ils", "elles" ]) |
Modified gc_core/py/ibdawg.py from [d3aa940937] to [f31919afcb].
︙ | ︙ | |||
185 186 187 188 189 190 191 | l.extend(self.morph(sWord.lower())) if sWord.isupper() and len(sWord) > 1: l.extend(self.morph(sWord.capitalize())) return l def suggest (self, sWord, nMaxSugg=10): "returns a set of suggestions for <sWord>" | | > > > > > | | > > > > | 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 | l.extend(self.morph(sWord.lower())) if sWord.isupper() and len(sWord) > 1: l.extend(self.morph(sWord.capitalize())) return l def suggest (self, sWord, nMaxSugg=10): "returns a set of suggestions for <sWord>" sAdd = "" if "-" in sWord: nLastHyphenPos = sWord.rfind("-") if sWord[nLastHyphenPos+1:] in cp.aExcludedSfx: sAdd = sWord[nLastHyphenPos:] sWord = sWord[:nLastHyphenPos] nMaxDel = len(sWord) // 5 nMaxHardRepl = max((len(sWord) - 5) // 4, 1) aSugg = self._suggest(sWord, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl) if sWord.istitle(): aSugg.update(self._suggest(sWord.lower(), nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)) aSugg = set(map(lambda sSugg: sSugg.title(), aSugg)) elif sWord.islower(): aSugg.update(self._suggest(sWord.title(), nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)) if not aSugg: #print("crush useless chars") aSugg.update(self._suggestWithCrushedUselessChars(cp.clearWord(sWord))) aSugg = filter(lambda sSugg: not sSugg.endswith(("è", "È")), aSugg) # fr language aSugg = sorted(aSugg, key=lambda sSugg: cp.distanceDamerauLevenshtein(sWord, sSugg))[:nMaxSugg] if sAdd: # we add what we removed return list(map(lambda sSug: sSug+sAdd, aSugg)) return aSugg def _suggest (self, sRemain, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False): "returns a set of suggestions" # recursive function #show(nDeep, sNewWord + ":" + sRemain) aSugg = set() if not sRemain: |
︙ | ︙ |