Grammalecte  Check-in [3955fe8676]

Overview
Comment:[graphspell] char_player: cut also prefixes for better suggestions
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | graphspell
Files: files | file ages | folders
SHA3-256: 3955fe867647fb6a77e3dc111e4856ed5d0c40e17ecbdab3d051aa20e6ce6742
User & Date: olr on 2018-05-02 10:29:58
Other Links: manifest | tags
Context
2018-05-02
11:22
[graphspell] use nDistLimit to truncate number of suggestions check-in: f3a3ed9041 user: olr tags: trunk, graphspell
10:29
[graphspell] char_player: cut also prefixes for better suggestions check-in: 3955fe8676 user: olr tags: trunk, graphspell
2018-05-01
20:00
[fr] faux positif: quelle doit/peut être +sujet check-in: ab0e7af2a4 user: olr tags: trunk, fr
Changes

Modified graphspell-js/char_player.js from [1bb2f8a481] to [c2f75d3e03].

359
360
361
362
363
364
365







366

367
368


369
370

371
372
373
374
375
376
377
359
360
361
362
363
364
365
366
367
368
369
370
371
372

373
374

375
376
377

378
379
380
381
382
383
384
385







+
+
+
+
+
+
+
-
+

-
+
+

-
+







    aPfx2: new Set([
        "belgo", "franco", "génito", "gynéco", "médico", "russo"
    ]),


    cut: function (sWord) {
        // returns an arry of strings (prefix, trimed_word, suffix)
        let sPrefix = "";
        let sSuffix = "";
        let m = /^([ldmtsnjcç]|lorsqu|presqu|jusqu|puisqu|quoiqu|quelqu|qu)[’'‘`]([a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]+)/i.exec(sWord);
        if (m) {
            sPrefix = m[1] + "’";
            sWord = m[2];
        }
        let m = /^([a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st]+)(-(?:t-|)(?:ils?|elles?|on|je|tu|nous|vous|ce)$)/.exec(sWord);
        m = /^([a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st]+)(-(?:t-|)(?:ils?|elles?|on|je|tu|nous|vous|ce)$)/i.exec(sWord);
        if (m) {
            return ["", m[1], m[2]];
            sWord = m[1];
            sSuffix = m[2];
        }
        return ["", sWord, ""];
        return [sPrefix, sWord, sSuffix];
    },

    // Other functions
    filterSugg: function (aSugg) {
        return aSugg.filter((sSugg) => { return !sSugg.endsWith("è") && !sSugg.endsWith("È"); });
    }

Modified graphspell/char_player.py from [195969a6df] to [73eee3ee03].

348
349
350
351
352
353
354

355

356
357
358


359

360




361
362



363
364
365
366
367
368
369
348
349
350
351
352
353
354
355

356
357
358
359
360
361

362
363
364
365
366
367


368
369
370
371
372
373
374
375
376
377







+
-
+



+
+
-
+

+
+
+
+
-
-
+
+
+







    "pseudo", "pré", "re", "ré", "sans", "sous", "supra", "sur", "ultra"
])
aPfx2 = frozenset([
    "belgo", "franco", "génito", "gynéco", "médico", "russo"
])


_zWordPrefixes = re.compile("(?i)^([ldmtsnjcç]|lorsqu|presqu|jusqu|puisqu|quoiqu|quelqu|qu)[’'‘`]([\\w-]+)")
_zMotAvecPronom = re.compile("^(?i)(\\w+)(-(?:t-|)(?:ils?|elles?|on|je|tu|nous|vous|ce))$")
_zWordSuffixes = re.compile("(?i)^(\\w+)(-(?:t-|)(?:ils?|elles?|on|je|tu|nous|vous|ce))$")

def cut (sWord):
    "returns a tuple of strings (prefix, trimed_word, suffix)"
    sPrefix = ""
    sSuffix = ""
    m = _zMotAvecPronom.search(sWord)
    m = _zWordPrefixes.search(sWord)
    if m:
        sPrefix = m.group(1) + "’"
        sWord = m.group(2)
    m = _zWordSuffixes.search(sWord)
    if m:
        return ("", m.group(1), m.group(2))
    return ("", sWord, "")
        sWord = m.group(1)
        sSuffix = m.group(2)
    return (sPrefix, sWord, sSuffix)


# Other functions

def filterSugg (aSugg):
    "exclude suggestions"
    return filter(lambda sSugg: not sSugg.endswith(("è", "È")), aSugg)