397
398
399
400
401
402
403
404
405
406
407
408
409
410
|
if sWord in _dValues:
return _dValues[sWord]
return ""
def readableMorph (sMorph):
"returns string: readable tags"
sRes = ""
sMorph = re.sub("(?<=V[0123][ea_])[itpqnmr_eaxz]+", "", sMorph)
for m in _zTag.finditer(sMorph):
if m.group(0) in _dTAGS:
sRes += _dTAGS[m.group(0)][0]
else:
sRes += " [" + m.group(0) + "]?"
|
>
>
|
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
|
if sWord in _dValues:
return _dValues[sWord]
return ""
def readableMorph (sMorph):
"returns string: readable tags"
if not sMorph:
return "mot inconnu"
sRes = ""
sMorph = re.sub("(?<=V[0123][ea_])[itpqnmr_eaxz]+", "", sMorph)
for m in _zTag.finditer(sMorph):
if m.group(0) in _dTAGS:
sRes += _dTAGS[m.group(0)][0]
else:
sRes += " [" + m.group(0) + "]?"
|
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
|
dToken["aLabels"] = ["sigle ou acronyme"]
elif dToken["sType"] == 'WORD':
if "lMorph" in dToken and dToken["lMorph"]:
# with morphology
dToken["aLabels"] = []
for sMorph in dToken["lMorph"]:
dToken["aLabels"].append(readableMorph(sMorph))
if "sTags" in dToken:
aTags = []
for sTag in dToken["sTags"]:
if sTag in _dValues:
aTags.append(_dValues[sTag])
if aTags:
dToken["aOtherLabels"] = aTags
else:
# no morphology, guessing
if dToken["sValue"].count("-") > 4:
dToken["aLabels"] = ["élément complexe indéterminé"]
elif _zPartDemForm.search(dToken["sValue"]):
# mots avec particules démonstratives
dToken["aLabels"] = ["mot avec particule démonstrative"]
elif _zImperatifVerb.search(dToken["sValue"]):
# formes interrogatives
dToken["aLabels"] = ["forme verbale impérative"]
elif _zInterroVerb.search(dToken["sValue"]):
# formes interrogatives
dToken["aLabels"] = ["forme verbale interrogative"]
else:
dToken["aLabels"] = ["token de nature inconnue"]
except:
return
# Other functions
def filterSugg (aSugg):
"exclude suggestions"
return filter(lambda sSugg: not sSugg.endswith(("è", "È")), aSugg)
|
<
<
<
<
<
<
<
>
>
>
>
>
>
>
>
>
>
|
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
|
dToken["aLabels"] = ["sigle ou acronyme"]
elif dToken["sType"] == 'WORD':
if "lMorph" in dToken and dToken["lMorph"]:
# with morphology
dToken["aLabels"] = []
for sMorph in dToken["lMorph"]:
dToken["aLabels"].append(readableMorph(sMorph))
else:
# no morphology, guessing
if dToken["sValue"].count("-") > 4:
dToken["aLabels"] = ["élément complexe indéterminé"]
elif _zPartDemForm.search(dToken["sValue"]):
# mots avec particules démonstratives
dToken["aLabels"] = ["mot avec particule démonstrative"]
elif _zImperatifVerb.search(dToken["sValue"]):
# formes interrogatives
dToken["aLabels"] = ["forme verbale impérative"]
elif _zInterroVerb.search(dToken["sValue"]):
# formes interrogatives
dToken["aLabels"] = ["forme verbale interrogative"]
else:
dToken["aLabels"] = ["mot inconnu du dictionnaire"]
if "lSubTokens" in dToken:
for dSubToken in dToken["lSubTokens"]:
if dSubToken["sValue"]:
if dSubToken["sValue"] in _dValues:
dSubToken["lMorph"] = [ "" ]
dSubToken["aLabels"] = [ _dValues[dSubToken["sValue"]] ]
else:
dSubToken["aLabels"] = [ readableMorph(sMorph) for sMorph in dSubToken["lMorph"] ]
else:
dToken["aLabels"] = ["token de nature inconnue"]
except:
return
# Other functions
def filterSugg (aSugg):
"exclude suggestions"
return filter(lambda sSugg: not sSugg.endswith(("è", "È")), aSugg)
|