Grammalecte  Check-in [3f34a1e2e7]

Overview
Comment:[graphspell] replace straight apostrophe in spellingNormalization()
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | graphspell
Files: files | file ages | folders
SHA3-256: 3f34a1e2e7a7fe0ecdde83b18be97ee63f6d3bce0d096455ec304634b2e9e48c
User & Date: olr on 2020-09-30 16:10:44
Other Links: manifest | tags
Context
2020-09-30
16:19
[fr] update tests check-in: ec053526e5 user: olr tags: trunk, fr
16:10
[graphspell] replace straight apostrophe in spellingNormalization() check-in: 3f34a1e2e7 user: olr tags: trunk, graphspell
16:06
[graphspell] ignore underscore for spellchecking and morph check-in: c2a07ab8a2 user: olr tags: trunk, graphspell
Changes

Modified graphspell-js/ibdawg.js from [a4ceb45f56] to [5c5b2cad28].

273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
273
274
275
276
277
278
279



280
281
282
283
284
285
286







-
-
-







    }

    isValid (sWord) {
        // checks if sWord is valid (different casing tested if the first letter is a capital)
        if (!sWord) {
            return true;
        }
        if (sWord.includes("'")) { // ugly hack
            sWord = sWord.replace("'", "’");
        }
        if (this.lookup(sWord)) {
            return true;
        }
        if (sWord.charAt(0).gl_isUpperCase()) {
            if (sWord.length > 1) {
                if (sWord.gl_isTitle()) {
                    return !!this.lookup(sWord.toLowerCase());

Modified graphspell-js/str_transform.js from [3bf8e3b480] to [a1596e1ca3].

22
23
24
25
26
27
28
29


30
31
32
33
34
35
36
22
23
24
25
26
27
28

29
30
31
32
33
34
35
36
37







-
+
+







        for (let i=0;  i <= sWord.length - n;  i++) {
            lNgrams.push(sWord.slice(i, i+n));
        }
        return lNgrams;
    },

    _xTransCharsForSpelling: new Map([
        ['ſ', 's'],  ['ffi', 'ffi'],  ['ffl', 'ffl'],  ['ff', 'ff'],  ['ſt', 'ft'],  ['fi', 'fi'],  ['fl', 'fl'],  ['st', 'st']
        ['ſ', 's'],  ['ffi', 'ffi'],  ['ffl', 'ffl'],  ['ff', 'ff'],  ['ſt', 'ft'],  ['fi', 'fi'],  ['fl', 'fl'],  ['st', 'st'],
        ["'", '’']
    ]),

    spellingNormalization: function (sWord) {
        let sNewWord = "";
        for (let c of sWord) {
            sNewWord += this._xTransCharsForSpelling.gl_get(c, c);
        }

Modified graphspell/ibdawg.py from [43975eca5b] to [cae8eca48c].

275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
275
276
277
278
279
280
281


282
283
284
285
286
287
288







-
-







            return True
        return False

    def isValid (self, sWord):
        "checks if <sWord> is valid (different casing tested if the first letter is a capital)"
        if not sWord:
            return True
        if "'" in sWord: # ugly hack
            sWord = sWord.replace("'", "’")
        if self.lookup(sWord):
            return True
        if sWord[0:1].isupper():
            if len(sWord) > 1:
                if sWord.istitle():
                    return self.lookup(sWord.lower())
                if sWord.isupper():

Modified graphspell/str_transform.py from [98c57fa9ba] to [ee895b8347].

17
18
19
20
21
22
23
24


25
26
27
28
29
30
31
17
18
19
20
21
22
23

24
25
26
27
28
29
30
31
32







-
+
+







    return [ sWord[i:i+n]  for i in range(len(sWord)-n+1) ]



#### WORD NORMALIZATION

_xTransCharsForSpelling = str.maketrans({
    'ſ': 's',  'ffi': 'ffi',  'ffl': 'ffl',  'ff': 'ff',  'ſt': 'ft',  'fi': 'fi',  'fl': 'fl',  'st': 'st'
    'ſ': 's',  'ffi': 'ffi',  'ffl': 'ffl',  'ff': 'ff',  'ſt': 'ft',  'fi': 'fi',  'fl': 'fl',  'st': 'st',
    "'": '’'
})

def spellingNormalization (sWord):
    "nomalization NFC and removing ligatures"
    return unicodedata.normalize("NFC", sWord.translate(_xTransCharsForSpelling))