Overview
| Comment: | [graphspell] replace straight apostrophe in spellingNormalization() | 
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive | 
| Timelines: | family | ancestors | descendants | both | trunk | graphspell | 
| Files: | files | file ages | folders | 
| SHA3-256: | 3f34a1e2e7a7fe0ecdde83b18be97ee6 | 
| User & Date: | olr on 2020-09-30 16:10:44 | 
| Other Links: | manifest | tags | 
Context
| 2020-09-30 | ||
| 16:19 | [fr] update tests check-in: ec053526e5 user: olr tags: trunk, fr | |
| 16:10 | [graphspell] replace straight apostrophe in spellingNormalization() check-in: 3f34a1e2e7 user: olr tags: trunk, graphspell | |
| 16:06 | [graphspell] ignore underscore for spellchecking and morph check-in: c2a07ab8a2 user: olr tags: trunk, graphspell | |
Changes
Modified graphspell-js/ibdawg.js from [a4ceb45f56] to [5c5b2cad28].
| ︙ | ︙ | |||
| 273 274 275 276 277 278 279 | 
    }
    isValid (sWord) {
        // checks if sWord is valid (different casing tested if the first letter is a capital)
        if (!sWord) {
            return true;
        }
 | < < < | 273 274 275 276 277 278 279 280 281 282 283 284 285 286 | 
    }
    isValid (sWord) {
        // checks if sWord is valid (different casing tested if the first letter is a capital)
        if (!sWord) {
            return true;
        }
        if (this.lookup(sWord)) {
            return true;
        }
        if (sWord.charAt(0).gl_isUpperCase()) {
            if (sWord.length > 1) {
                if (sWord.gl_isTitle()) {
                    return !!this.lookup(sWord.toLowerCase());
 | 
| ︙ | ︙ | 
Modified graphspell-js/str_transform.js from [3bf8e3b480] to [a1596e1ca3].
| ︙ | ︙ | |||
| 22 23 24 25 26 27 28 | 
        for (let i=0;  i <= sWord.length - n;  i++) {
            lNgrams.push(sWord.slice(i, i+n));
        }
        return lNgrams;
    },
    _xTransCharsForSpelling: new Map([
 | | > | 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | 
        for (let i=0;  i <= sWord.length - n;  i++) {
            lNgrams.push(sWord.slice(i, i+n));
        }
        return lNgrams;
    },
    _xTransCharsForSpelling: new Map([
        ['ſ', 's'],  ['ffi', 'ffi'],  ['ffl', 'ffl'],  ['ff', 'ff'],  ['ſt', 'ft'],  ['fi', 'fi'],  ['fl', 'fl'],  ['st', 'st'],
        ["'", '’']
    ]),
    spellingNormalization: function (sWord) {
        let sNewWord = "";
        for (let c of sWord) {
            sNewWord += this._xTransCharsForSpelling.gl_get(c, c);
        }
 | 
| ︙ | ︙ | 
Modified graphspell/ibdawg.py from [43975eca5b] to [cae8eca48c].
| ︙ | ︙ | |||
| 275 276 277 278 279 280 281 | 
            return True
        return False
    def isValid (self, sWord):
        "checks if <sWord> is valid (different casing tested if the first letter is a capital)"
        if not sWord:
            return True
 | < < | 275 276 277 278 279 280 281 282 283 284 285 286 287 288 | 
            return True
        return False
    def isValid (self, sWord):
        "checks if <sWord> is valid (different casing tested if the first letter is a capital)"
        if not sWord:
            return True
        if self.lookup(sWord):
            return True
        if sWord[0:1].isupper():
            if len(sWord) > 1:
                if sWord.istitle():
                    return self.lookup(sWord.lower())
                if sWord.isupper():
 | 
| ︙ | ︙ | 
Modified graphspell/str_transform.py from [98c57fa9ba] to [ee895b8347].
| ︙ | ︙ | |||
| 17 18 19 20 21 22 23 | 
    return [ sWord[i:i+n]  for i in range(len(sWord)-n+1) ]
#### WORD NORMALIZATION
_xTransCharsForSpelling = str.maketrans({
 | | > | 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | 
    return [ sWord[i:i+n]  for i in range(len(sWord)-n+1) ]
#### WORD NORMALIZATION
_xTransCharsForSpelling = str.maketrans({
    'ſ': 's',  'ffi': 'ffi',  'ffl': 'ffl',  'ff': 'ff',  'ſt': 'ft',  'fi': 'fi',  'fl': 'fl',  'st': 'st',
    "'": '’'
})
def spellingNormalization (sWord):
    "nomalization NFC and removing ligatures"
    return unicodedata.normalize("NFC", sWord.translate(_xTransCharsForSpelling))
 | 
| ︙ | ︙ |