Grammalecte  Diff

Differences From Artifact [24bc88a28a]:

To Artifact [851691ac43]:


369
370
371
372
373
374
375
376

377
378
379
380
381
382
383
369
370
371
372
373
374
375

376
377
378
379
380
381
382
383







-
+







    '%': "signe de pourcentage",
    '‰': "signe pour mille"
}


_zElidedPrefix = re.compile("(?i)^([ldmtsnjcç]|lorsqu|presqu|jusqu|puisqu|quoiqu|quelqu|qu)[’'‘`ʼ]([\\w-]+)")
_zCompoundWord = re.compile("(?i)(\\w+)(-(?:(?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts]’(?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous|ce))$")
_zTag = re.compile("[:;/][\\w*][^:;/]*")
_zTag = re.compile("[:;/][\\w@*!][^:;/]*")

def split (sWord):
    "split word in 3 parts: prefix, root, suffix"
    sPrefix = ""
    sSuffix = ""
    # préfixe élidé
    m = _zElidedPrefix.match(sWord)
401
402
403
404
405
406
407


408





409
410
411




412
413
414
415

416
417
418
419






420
421
422
423
424
425
426
401
402
403
404
405
406
407
408
409

410
411
412
413
414
415


416
417
418
419
420



421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438







+
+
-
+
+
+
+
+

-
-
+
+
+
+

-
-
-
+




+
+
+
+
+
+









def readableMorph (sMorph):
    "returns string: readable tags"
    if not sMorph:
        return "mot inconnu"
    sRes = ""
    sVType = ""
    if ":V" in sMorph:
    sMorph = re.sub("(?<=V[0123][ea_])[itpqnmr_eaxz]+", "", sMorph)
        sMorph = re.sub("(?<=V[0123][ea_])[itpqnmr_eaxz]+", "", sMorph)
    if ":Q" in sMorph:
        nVerbTag = sMorph.find(":V")
        sVType = sMorph[nVerbTag:nVerbTag+4]
        sMorph = sMorph[4:].replace(":1ŝ", "").replace(":1ś", "")
    for m in _zTag.finditer(sMorph):
        if m.group(0) in _dTAGS:
            sRes += _dTAGS[m.group(0)][0]
        sRes += _readableTag(m.group(0))
    if sRes.startswith((" verbe", " participe")) and not sRes.endswith("infinitif"):
        if sVType:
            sRes += " [" + sMorph[1:sMorph.find("/")] + " : " + _readableTag(sVType).rstrip(",") + "]"
        else:
            sRes += " [" + m.group(0) + "]?"
    if sRes.startswith(" verbe") and not sRes.endswith("infinitif"):
        sRes += " [" + sMorph[1:sMorph.find("/")] +"]"
            sRes += " [" + sMorph[1:sMorph.find("/")] + "]"
    if not sRes:
        return " [" + sMorph + "]: étiquettes inconnues"
    return sRes.rstrip(",")

def _readableTag (sTag):
    "returns string: readable tag"
    if sTag in _dTAGS:
        return _dTAGS[sTag][0]
    return " [" + sTag + "]?"


_zPartDemForm = re.compile("([\\w]+)-(là|ci)$")
_zInterroVerb = re.compile("([\\w]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$")
_zImperatifVerb = re.compile("([\\w]+)(-(?:l(?:es?|a)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts]['’ʼ‘‛´`′‵՚ꞌꞋ](?:y|en)|les?|la|[mt]oi|leur|lui))$")

def setLabelsOnToken (dToken):
    # Token: .sType, .sValue, .nStart, .nEnd, .lMorph