Grammalecte  Check-in [710e4f6d06]

Overview
Comment:[graphspell] lexicographer: fix tag recognition
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | graphspell
Files: files | file ages | folders
SHA3-256: 710e4f6d06c2eb70cd297ad6957baaa8fcbca92e86becc208b9839790a9d1ad8
User & Date: olr on 2020-12-10 00:10:53
Other Links: manifest | tags
Context
2020-12-11
20:03
[fr] ajustements check-in: 7fba9f8163 user: olr tags: trunk, fr
2020-12-10
00:10
[graphspell] lexicographer: fix tag recognition check-in: 710e4f6d06 user: olr tags: trunk, graphspell
00:09
[fr] ajustements check-in: df96b38c07 user: olr tags: trunk, fr
Changes

Modified graphspell-js/lexgraph_fr.js from [514901bfb4] to [305b9fc724].

372
373
374
375
376
377
378
379

380
381
382
383
384
385
386
372
373
374
375
376
377
378

379
380
381
382
383
384
385
386







-
+







            ['‰', "signe pour mille"],
        ]),

    _zPartDemForm: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)-(là|ci)$", "i"),
    _aPartDemExceptList: new Set(["celui", "celle", "ceux", "celles", "de", "jusque", "par", "marie-couche-toi"]),
    _zInterroVerb: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$", "i"),
    _zImperatifVerb: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)(-(?:l(?:es?|a)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts]['’ʼ‘‛´`′‵՚ꞌꞋ](?:y|en)|les?|la|[mt]oi|leur|lui))$", "i"),
    _zTag: new RegExp("[:;/][a-zA-Z0-9É@*!][^:;/]*", "g"),
    _zTag: new RegExp("[:;/#][a-zA-Z0-9É@*!][^:;/#]*", "g"),

    split: function (sWord) {
        // returns an arry of strings (prefix, trimed_word, suffix)
        let sPrefix = "";
        let sSuffix = "";
        // préfixe élidé
        let m = /^([ldmtsnjcç]|lorsqu|presqu|jusqu|puisqu|quoiqu|quelqu|qu)['’ʼ‘‛´`′‵՚ꞌꞋ]([a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]+)/i.exec(sWord);

Modified graphspell/lexgraph_fr.py from [25c9c16f01] to [bcc7c5a33b].

376
377
378
379
380
381
382
383

384
385
386
387
388
389
390
376
377
378
379
380
381
382

383
384
385
386
387
388
389
390







-
+







    '%': "signe de pourcentage",
    '‰': "signe pour mille"
}


_zElidedPrefix = re.compile("(?i)^([ldmtsnjcç]|lorsqu|presqu|jusqu|puisqu|quoiqu|quelqu|qu)[’'‘`ʼ]([\\w-]+)")
_zCompoundWord = re.compile("(?i)(\\w+)(-(?:(?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts]’(?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous|ce))$")
_zTag = re.compile("[:;/][\\w@*!][^:;/]*")
_zTag = re.compile("[:;/#][\\w@*!][^:;/#]*")

def split (sWord):
    "split word in 3 parts: prefix, root, suffix"
    sPrefix = ""
    sSuffix = ""
    # préfixe élidé
    m = _zElidedPrefix.match(sWord)