Grammalecte  Check-in [0d2727274d]

Overview
Comment:[graphspell] ibdawg: suggestions for trailing numbers -> exponent
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | graphspell
Files: files | file ages | folders
SHA3-256: 0d2727274d5b1169aa7878d97dfabc13ef7b4a96b0be2adf313bbdac50b92914
User & Date: olr on 2019-03-04 09:50:09
Other Links: manifest | tags
Context
2019-03-04
14:31
[graphspell] ibdawg: better suggestions when splitting check-in: aefcee1f12 user: olr tags: trunk, graphspell
09:50
[graphspell] ibdawg: suggestions for trailing numbers -> exponent check-in: 0d2727274d user: olr tags: trunk, graphspell
09:48
[fx][bug] init: set this check-in: 7abad73bbb user: olr tags: trunk, fx
Changes

Modified graphspell-js/char_player.js from [aaed91ab4d] to [ac09087eb7].

25
26
27
28
29
30
31
32


33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48












49
50
51
52
53
54
55
25
26
27
28
29
30
31

32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68







-
+
+
















+
+
+
+
+
+
+
+
+
+
+
+







        ['à', 'a'],  ['é', 'e'],  ['î', 'i'],  ['ô', 'o'],  ['û', 'u'],  ['ÿ', 'i'],  ['y', 'i'],
        ['â', 'a'],  ['è', 'e'],  ['ï', 'i'],  ['ö', 'o'],  ['ù', 'u'],  ['ŷ', 'i'],
        ['ä', 'a'],  ['ê', 'e'],  ['í', 'i'],  ['ó', 'o'],  ['ü', 'u'],  ['ý', 'i'],
        ['á', 'a'],  ['ë', 'e'],  ['ì', 'i'],  ['ò', 'o'],  ['ú', 'u'],  ['ỳ', 'i'],
        ['ā', 'a'],  ['ē', 'e'],  ['ī', 'i'],  ['ō', 'o'],  ['ū', 'u'],  ['ȳ', 'i'],
        ['ç', 'c'],  ['ñ', 'n'],  ['k', 'q'],  ['w', 'v'],
        ['œ', 'oe'], ['æ', 'ae'],
        ['ſ', 's'],  ['ffi', 'ffi'],  ['ffl', 'ffl'],  ['ff', 'ff'],  ['ſt', 'ft'],  ['fi', 'fi'],  ['fl', 'fl'],  ['st', 'st']
        ['ſ', 's'],  ['ffi', 'ffi'],  ['ffl', 'ffl'],  ['ff', 'ff'],  ['ſt', 'ft'],  ['fi', 'fi'],  ['fl', 'fl'],  ['st', 'st'],
        ["⁰", "0"], ["¹", "1"], ["²", "2"], ["³", "3"], ["⁴", "4"], ["⁵", "5"], ["⁶", "6"], ["⁷", "7"], ["⁸", "8"], ["⁹", "9"]
    ]),

    simplifyWord: function (sWord) {
        // word simplication before calculating distance between words
        sWord = sWord.toLowerCase();
        sWord = [...sWord].map(c => this._xTransCharsForSimplification.gl_get(c, c)).join('');
        let sNewWord = "";
        let i = 1;
        for (let c of sWord) {
            if (c == 'e' || c != sWord.slice(i, i+1)) {  // exception for <e> to avoid confusion between crée / créai
                sNewWord += c;
            }
            i++;
        }
        return sNewWord.replace(/eau/g, "o").replace(/au/g, "o").replace(/ai/g, "ẽ").replace(/ei/g, "ẽ").replace(/ph/g, "f");
    },

    _xTransNumbersToExponent: new Map([
        ["0", "⁰"], ["1", "¹"], ["2", "²"], ["3", "³"], ["4", "⁴"], ["5", "⁵"], ["6", "⁶"], ["7", "⁷"], ["8", "⁸"], ["9", "⁹"]
    ]),

    numbersToExponent: function (sWord) {
        let sNewWord = "";
        for (let c of sWord) {
            sNewWord += this._xTransNumbersToExponent.gl_get(c, c);
        }
        return sNewWord;
    },

    aVowel: new Set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ"),
    aConsonant: new Set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ"),
    aDouble: new Set("bcdfjklmnprstzBCDFJKLMNPRSTZ"),  // letters that may be used twice successively


    // Similar chars

Modified graphspell-js/ibdawg.js from [eb5a5d5e73] to [31dc2284c6].

341
342
343
344
345
346
347






348
349
350
351
352
353
354
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360







+
+
+
+
+
+







            return aSugg.map( (sSugg) => { return sPfx + sSugg + sSfx; } );
        }
        //console.timeEnd("Suggestions for " + sWord);
        return aSugg;
    }

    _splitSuggest (oSuggResult, sWord) {
        // split trailing numbers
        let m = /^([a-zA-Zà-öÀ-Ö_ø-ÿØ-ßĀ-ʯfi-st][a-zA-Zà-öÀ-Ö_ø-ÿØ-ßĀ-ʯfi-st-]+)([0-9]+)$/.exec(sWord);
        if (m) {
            oSuggResult.addSugg(m[1] + " " + char_player.numbersToExponent(m[2]));
        }
        // split at apostrophes
        for (let cSplitter of "'’") {
            if (sWord.includes(cSplitter)) {
                let [sWord1, sWord2] = sWord.split(cSplitter, 2);
                if (this.isValid(sWord1) && this.isValid(sWord2)) {
                    oSuggResult.addSugg(sWord1+" "+sWord2);
                }
            }

Modified graphspell/char_player.py from [64714404fb] to [01c5a83d44].

21
22
23
24
25
26
27

28
29
30
31
32
33
34
35
36
37
38








39
40
41
42
43
44
45
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54







+











+
+
+
+
+
+
+
+







    'â': 'a',  'è': 'e',  'ï': 'i',  'ö': 'o',  'ù': 'u',  'ŷ': 'i',
    'ä': 'a',  'ê': 'e',  'í': 'i',  'ó': 'o',  'ü': 'u',  'ý': 'i',
    'á': 'a',  'ë': 'e',  'ì': 'i',  'ò': 'o',  'ú': 'u',  'ỳ': 'i',
    'ā': 'a',  'ē': 'e',  'ī': 'i',  'ō': 'o',  'ū': 'u',  'ȳ': 'i',
    'ç': 'c',  'ñ': 'n',  'k': 'q',  'w': 'v',
    'œ': 'oe',  'æ': 'ae',
    'ſ': 's',  'ffi': 'ffi',  'ffl': 'ffl',  'ff': 'ff',  'ſt': 'ft',  'fi': 'fi',  'fl': 'fl',  'st': 'st',
    "⁰": "0", "¹": "1", "²": "2", "³": "3", "⁴": "4", "⁵": "5", "⁶": "6", "⁷": "7", "⁸": "8", "⁹": "9"
})

def simplifyWord (sWord):
    "word simplication before calculating distance between words"
    sWord = sWord.lower().translate(_xTransCharsForSimplification)
    sNewWord = ""
    for i, c in enumerate(sWord, 1):
        if c == 'e' or c != sWord[i:i+1]:  # exception for <e> to avoid confusion between crée / créai
            sNewWord += c
    return sNewWord.replace("eau", "o").replace("au", "o").replace("ai", "ẽ").replace("ei", "ẽ").replace("ph", "f")


_xTransNumbersToExponent = str.maketrans({
    "0": "⁰", "1": "¹", "2": "²", "3": "³", "4": "⁴", "5": "⁵", "6": "⁶", "7": "⁷", "8": "⁸", "9": "⁹"
})

def numbersToExponent (sWord):
    return sWord.translate(_xTransNumbersToExponent)


aVowel = set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ")
aConsonant = set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ")
aDouble = set("bcdfjklmnprstzBCDFJKLMNPRSTZ")  # letters that may be used twice successively


# Similar chars

Modified graphspell/ibdawg.py from [bd7f8fa082] to [e1b96535dd].

310
311
312
313
314
315
316






317
318
319
320
321
322
323
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329







+
+
+
+
+
+







        aSugg = oSuggResult.getSuggestions(nSuggLimit)
        if sSfx or sPfx:
            # we add what we removed
            return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
        return aSugg

    def _splitSuggest (self, oSuggResult, sWord):
        # split trailing numbers
        m = re.match(r"(\D+)([0-9]+)$", sWord)
        if m:
            print("ok")
            oSuggResult.addSugg(m.group(1) + " " + cp.numbersToExponent(m.group(2)))
        # split at apostrophes
        for cSplitter in "'’":
            if cSplitter in sWord:
                sWord1, sWord2 = sWord.split(cSplitter, 1)
                if self.isValid(sWord1) and self.isValid(sWord2):
                    oSuggResult.addSugg(sWord1+" "+sWord2)

    def _suggest (self, oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nMaxJump=0, nDist=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):