Overview
Comment: | [graphspell] suggestions with trailing numbers: avoid to repeat splitting for each dictionary |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | graphspell |
Files: | files | file ages | folders |
SHA3-256: |
e32c092585e506508db1f8dbb0b25b4c |
User & Date: | olr on 2019-04-04 08:03:38 |
Other Links: | manifest | tags |
Context
2019-04-04
| ||
12:16 | [fr] genfrdic: add flexion id to lexicon check-in: 0f24ce1e2c user: olr tags: trunk, fr | |
08:03 | [graphspell] suggestions with trailing numbers: avoid to repeat splitting for each dictionary check-in: e32c092585 user: olr tags: trunk, graphspell | |
07:23 | [graphspell][core][build][lo] remove extended dictionary check-in: 51a40c07e3 user: olr tags: trunk, core, build, lo, graphspell | |
Changes
Modified graphspell-js/ibdawg.js from [5fadd970c2] to [9db187764b].
︙ | ︙ | |||
317 318 319 320 321 322 323 | if (sWord.gl_isUpperCase() && sWord.length > 1) { l.push(...this.morph(sWord.gl_toCapitalize())); } } return l; } | | > > > | < > > > | 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 | if (sWord.gl_isUpperCase() && sWord.length > 1) { l.push(...this.morph(sWord.gl_toCapitalize())); } } return l; } suggest (sWord, nSuggLimit=10, bSplitTrailingNumbers=false) { // returns a array of suggestions for <sWord> //console.time("Suggestions for " + sWord); sWord = char_player.spellingNormalization(sWord); let sPfx = ""; let sSfx = ""; [sPfx, sWord, sSfx] = char_player.cut(sWord); let nMaxSwitch = Math.max(Math.floor(sWord.length / 3), 1); let nMaxDel = Math.floor(sWord.length / 5); let nMaxHardRepl = Math.max(Math.floor((sWord.length - 5) / 4), 1); let nMaxJump = Math.max(Math.floor(sWord.length / 4), 1); let oSuggResult = new SuggResult(sWord); if (bSplitTrailingNumbers) { this._splitTrailingNumbers(oSuggResult, sWord); } this._splitSuggest(oSuggResult, sWord); this._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump); let aSugg = oSuggResult.getSuggestions(nSuggLimit); if (sSfx || sPfx) { // we add what we removed return aSugg.map( (sSugg) => { return sPfx + sSugg + sSfx; } ); } //console.timeEnd("Suggestions for " + sWord); return aSugg; } _splitTrailingNumbers (oSuggResult, sWord) { let m = /^([a-zA-Zà-öÀ-Ö_ø-ÿØ-ßĀ-ʯfi-st][a-zA-Zà-öÀ-Ö_ø-ÿØ-ßĀ-ʯfi-st-]+)([0-9]+)$/.exec(sWord); if (m) { oSuggResult.addSugg(m[1] + " " + char_player.numbersToExponent(m[2])); } } _splitSuggest (oSuggResult, sWord) { // split at apostrophes for (let cSplitter of "'’") { if (sWord.includes(cSplitter)) { let [sWord1, sWord2] = sWord.split(cSplitter, 2); if (this.isValid(sWord1) && this.isValid(sWord2)) { oSuggResult.addSugg(sWord1+" "+sWord2); } |
︙ | ︙ |
Modified graphspell-js/spellchecker.js from [2783c2d059] to [29bf30d9e3].
︙ | ︙ | |||
229 230 231 232 233 234 235 | return this._dLemmas.get(sWord); } return Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); }))); } * suggest (sWord, nSuggLimit=10) { // generator: returns 1, 2 or 3 lists of suggestions | | | | | 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 | return this._dLemmas.get(sWord); } return Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); }))); } * suggest (sWord, nSuggLimit=10) { // generator: returns 1, 2 or 3 lists of suggestions yield this.oMainDic.suggest(sWord, nSuggLimit, true); if (this.bCommunityDic) { yield this.oCommunityDic.suggest(sWord, Math.floor(nSuggLimit/2)+1); } if (this.bPersonalDic) { yield this.oPersonalDic.suggest(sWord, Math.floor(nSuggLimit/2)+1); } } * select (sFlexPattern="", sTagsPattern="") { // generator: returns all entries which flexion fits <sFlexPattern> and morphology fits <sTagsPattern> yield* this.oMainDic.select(sFlexPattern, sTagsPattern); if (this.bCommunityDic) { |
︙ | ︙ |
Modified graphspell/ibdawg.py from [21e398dbec] to [4586b340c3].
︙ | ︙ | |||
291 292 293 294 295 296 297 | if sWord[0:1].isupper(): l.extend(self.morph(sWord.lower())) if sWord.isupper() and len(sWord) > 1: l.extend(self.morph(sWord.capitalize())) return l #@timethis | | > > | < < > > | 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 | if sWord[0:1].isupper(): l.extend(self.morph(sWord.lower())) if sWord.isupper() and len(sWord) > 1: l.extend(self.morph(sWord.capitalize())) return l #@timethis def suggest (self, sWord, nSuggLimit=10, bSplitTrailingNumbers=False): "returns a set of suggestions for <sWord>" sWord = sWord.rstrip(".") # useful for LibreOffice sWord = cp.spellingNormalization(sWord) sPfx, sWord, sSfx = cp.cut(sWord) nMaxSwitch = max(len(sWord) // 3, 1) nMaxDel = len(sWord) // 5 nMaxHardRepl = max((len(sWord) - 5) // 4, 1) nMaxJump = max(len(sWord) // 4, 1) oSuggResult = SuggResult(sWord) if bSplitTrailingNumbers: self._splitTrailingNumbers(oSuggResult, sWord) self._splitSuggest(oSuggResult, sWord) self._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump) aSugg = oSuggResult.getSuggestions(nSuggLimit) if sSfx or sPfx: # we add what we removed return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg)) return aSugg def _splitTrailingNumbers (self, oSuggResult, sWord): m = re.match(r"(\D+)([0-9]+)$", sWord) if m: oSuggResult.addSugg(m.group(1) + " " + cp.numbersToExponent(m.group(2))) def _splitSuggest (self, oSuggResult, sWord): # split at apostrophes for cSplitter in "'’": if cSplitter in sWord: sWord1, sWord2 = sWord.split(cSplitter, 1) if self.isValid(sWord1) and self.isValid(sWord2): oSuggResult.addSugg(sWord1+" "+sWord2) |
︙ | ︙ |
Modified graphspell/spellchecker.py from [fe2798d7b3] to [274b3c4e53].
︙ | ︙ | |||
221 222 223 224 225 226 227 | if self.dDefaultSugg: if sWord in self.dDefaultSugg: yield self.dDefaultSugg[sWord].split("|") elif sWord.istitle() and sWord.lower() in self.dDefaultSugg: lRes = self.dDefaultSugg[sWord.lower()].split("|") yield list(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes)) else: | | | | | | 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 | if self.dDefaultSugg: if sWord in self.dDefaultSugg: yield self.dDefaultSugg[sWord].split("|") elif sWord.istitle() and sWord.lower() in self.dDefaultSugg: lRes = self.dDefaultSugg[sWord.lower()].split("|") yield list(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes)) else: yield self.oMainDic.suggest(sWord, nSuggLimit, True) else: yield self.oMainDic.suggest(sWord, nSuggLimit, True) if self.bCommunityDic: yield self.oCommunityDic.suggest(sWord, (nSuggLimit//2)+1) if self.bPersonalDic: yield self.oPersonalDic.suggest(sWord, (nSuggLimit//2)+1) def select (self, sFlexPattern="", sTagsPattern=""): "generator: returns all entries which flexion fits <sFlexPattern> and morphology fits <sTagsPattern>" yield from self.oMainDic.select(sFlexPattern, sTagsPattern) if self.bCommunityDic: yield from self.oCommunityDic.select(sFlexPattern, sTagsPattern) if self.bPersonalDic: |
︙ | ︙ |