369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
|
'%': "signe de pourcentage",
'‰': "signe pour mille"
}
_zElidedPrefix = re.compile("(?i)^([ldmtsnjcç]|lorsqu|presqu|jusqu|puisqu|quoiqu|quelqu|qu)[’'‘`ʼ]([\\w-]+)")
_zCompoundWord = re.compile("(?i)(\\w+)(-(?:(?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts]’(?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous|ce))$")
_zTag = re.compile("[:;/][\\w*][^:;/]*")
def split (sWord):
"split word in 3 parts: prefix, root, suffix"
sPrefix = ""
sSuffix = ""
# préfixe élidé
m = _zElidedPrefix.match(sWord)
|
|
|
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
|
'%': "signe de pourcentage",
'‰': "signe pour mille"
}
_zElidedPrefix = re.compile("(?i)^([ldmtsnjcç]|lorsqu|presqu|jusqu|puisqu|quoiqu|quelqu|qu)[’'‘`ʼ]([\\w-]+)")
_zCompoundWord = re.compile("(?i)(\\w+)(-(?:(?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts]’(?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous|ce))$")
_zTag = re.compile("[:;/][\\w@*!][^:;/]*")
def split (sWord):
"split word in 3 parts: prefix, root, suffix"
sPrefix = ""
sSuffix = ""
# préfixe élidé
m = _zElidedPrefix.match(sWord)
|
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
|
def readableMorph (sMorph):
"returns string: readable tags"
if not sMorph:
return "mot inconnu"
sRes = ""
sMorph = re.sub("(?<=V[0123][ea_])[itpqnmr_eaxz]+", "", sMorph)
for m in _zTag.finditer(sMorph):
if m.group(0) in _dTAGS:
sRes += _dTAGS[m.group(0)][0]
else:
sRes += " [" + m.group(0) + "]?"
if sRes.startswith(" verbe") and not sRes.endswith("infinitif"):
sRes += " [" + sMorph[1:sMorph.find("/")] +"]"
if not sRes:
return " [" + sMorph + "]: étiquettes inconnues"
return sRes.rstrip(",")
_zPartDemForm = re.compile("([\\w]+)-(là|ci)$")
_zInterroVerb = re.compile("([\\w]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$")
_zImperatifVerb = re.compile("([\\w]+)(-(?:l(?:es?|a)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts]['’ʼ‘‛´`′‵՚ꞌꞋ](?:y|en)|les?|la|[mt]oi|leur|lui))$")
def setLabelsOnToken (dToken):
# Token: .sType, .sValue, .nStart, .nEnd, .lMorph
|
>
>
|
>
>
>
>
|
>
>
|
<
<
|
>
>
>
>
>
>
|
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
|
def readableMorph (sMorph):
"returns string: readable tags"
if not sMorph:
return "mot inconnu"
sRes = ""
sVType = ""
if ":V" in sMorph:
sMorph = re.sub("(?<=V[0123][ea_])[itpqnmr_eaxz]+", "", sMorph)
if ":Q" in sMorph:
nVerbTag = sMorph.find(":V")
sVType = sMorph[nVerbTag:nVerbTag+4]
sMorph = sMorph[4:].replace(":1ŝ", "").replace(":1ś", "")
for m in _zTag.finditer(sMorph):
sRes += _readableTag(m.group(0))
if sRes.startswith((" verbe", " participe")) and not sRes.endswith("infinitif"):
if sVType:
sRes += " [" + sMorph[1:sMorph.find("/")] + " : " + _readableTag(sVType).rstrip(",") + "]"
else:
sRes += " [" + sMorph[1:sMorph.find("/")] + "]"
if not sRes:
return " [" + sMorph + "]: étiquettes inconnues"
return sRes.rstrip(",")
def _readableTag (sTag):
"returns string: readable tag"
if sTag in _dTAGS:
return _dTAGS[sTag][0]
return " [" + sTag + "]?"
_zPartDemForm = re.compile("([\\w]+)-(là|ci)$")
_zInterroVerb = re.compile("([\\w]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$")
_zImperatifVerb = re.compile("([\\w]+)(-(?:l(?:es?|a)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts]['’ʼ‘‛´`′‵՚ꞌꞋ](?:y|en)|les?|la|[mt]oi|leur|lui))$")
def setLabelsOnToken (dToken):
# Token: .sType, .sValue, .nStart, .nEnd, .lMorph
|