Overview
Comment: | [graphspell] replace straight apostrophe in spellingNormalization() |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | graphspell |
Files: | files | file ages | folders |
SHA3-256: |
3f34a1e2e7a7fe0ecdde83b18be97ee6 |
User & Date: | olr on 2020-09-30 16:10:44 |
Other Links: | manifest | tags |
Context
2020-09-30
| ||
16:19 | [fr] update tests check-in: ec053526e5 user: olr tags: trunk, fr | |
16:10 | [graphspell] replace straight apostrophe in spellingNormalization() check-in: 3f34a1e2e7 user: olr tags: trunk, graphspell | |
16:06 | [graphspell] ignore underscore for spellchecking and morph check-in: c2a07ab8a2 user: olr tags: trunk, graphspell | |
Changes
Modified graphspell-js/ibdawg.js from [a4ceb45f56] to [5c5b2cad28].
︙ | ︙ | |||
273 274 275 276 277 278 279 | } isValid (sWord) { // checks if sWord is valid (different casing tested if the first letter is a capital) if (!sWord) { return true; } | < < < | 273 274 275 276 277 278 279 280 281 282 283 284 285 286 | } isValid (sWord) { // checks if sWord is valid (different casing tested if the first letter is a capital) if (!sWord) { return true; } if (this.lookup(sWord)) { return true; } if (sWord.charAt(0).gl_isUpperCase()) { if (sWord.length > 1) { if (sWord.gl_isTitle()) { return !!this.lookup(sWord.toLowerCase()); |
︙ | ︙ |
Modified graphspell-js/str_transform.js from [3bf8e3b480] to [a1596e1ca3].
︙ | ︙ | |||
22 23 24 25 26 27 28 | for (let i=0; i <= sWord.length - n; i++) { lNgrams.push(sWord.slice(i, i+n)); } return lNgrams; }, _xTransCharsForSpelling: new Map([ | | > | 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | for (let i=0; i <= sWord.length - n; i++) { lNgrams.push(sWord.slice(i, i+n)); } return lNgrams; }, _xTransCharsForSpelling: new Map([ ['ſ', 's'], ['ffi', 'ffi'], ['ffl', 'ffl'], ['ff', 'ff'], ['ſt', 'ft'], ['fi', 'fi'], ['fl', 'fl'], ['st', 'st'], ["'", '’'] ]), spellingNormalization: function (sWord) { let sNewWord = ""; for (let c of sWord) { sNewWord += this._xTransCharsForSpelling.gl_get(c, c); } |
︙ | ︙ |
Modified graphspell/ibdawg.py from [43975eca5b] to [cae8eca48c].
︙ | ︙ | |||
275 276 277 278 279 280 281 | return True return False def isValid (self, sWord): "checks if <sWord> is valid (different casing tested if the first letter is a capital)" if not sWord: return True | < < | 275 276 277 278 279 280 281 282 283 284 285 286 287 288 | return True return False def isValid (self, sWord): "checks if <sWord> is valid (different casing tested if the first letter is a capital)" if not sWord: return True if self.lookup(sWord): return True if sWord[0:1].isupper(): if len(sWord) > 1: if sWord.istitle(): return self.lookup(sWord.lower()) if sWord.isupper(): |
︙ | ︙ |
Modified graphspell/str_transform.py from [98c57fa9ba] to [ee895b8347].
︙ | ︙ | |||
17 18 19 20 21 22 23 | return [ sWord[i:i+n] for i in range(len(sWord)-n+1) ] #### WORD NORMALIZATION _xTransCharsForSpelling = str.maketrans({ | | > | 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | return [ sWord[i:i+n] for i in range(len(sWord)-n+1) ] #### WORD NORMALIZATION _xTransCharsForSpelling = str.maketrans({ 'ſ': 's', 'ffi': 'ffi', 'ffl': 'ffl', 'ff': 'ff', 'ſt': 'ft', 'fi': 'fi', 'fl': 'fl', 'st': 'st', "'": '’' }) def spellingNormalization (sWord): "nomalization NFC and removing ligatures" return unicodedata.normalize("NFC", sWord.translate(_xTransCharsForSpelling)) |
︙ | ︙ |