Overview
Comment: | [graphspell][py] lexicographer: function setLabelsOnToken() |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | graphspell | salxg |
Files: | files | file ages | folders |
SHA3-256: |
26f692e745658169bd36af754938de41 |
User & Date: | olr on 2020-08-26 08:15:55 |
Other Links: | branch diff | manifest | tags |
Context
2020-08-26
| ||
08:16 | [graphspell] spellchecker: fix call to setLabelsOnToken check-in: dca1db07b3 user: olr tags: graphspell, salxg | |
08:15 | [graphspell][py] lexicographer: function setLabelsOnToken() check-in: 26f692e745 user: olr tags: graphspell, salxg | |
2020-08-25
| ||
16:03 | [core][graphspell][fx] update lexicographer check-in: 257257469a user: olr tags: core, fx, graphspell, salxg | |
Changes
Modified graphspell/lexgraph_fr.py from [2f65ed2c97] to [daf02d1e51].
︙ | |||
410 411 412 413 414 415 416 417 418 419 420 421 422 | 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + | sRes += " [" + m.group(0) + "]?" if sRes.startswith(" verbe") and not sRes.endswith("infinitif"): sRes += " [" + sMorph[1:sMorph.find("/")] +"]" if not sRes: return " [" + sMorph + "]: étiquettes inconnues" return sRes.rstrip(",") _zPartDemForm = re.compile("([\\w]+)-(là|ci)$") _zInterroVerb = re.compile("([\\w]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$") _zImperatifVerb = re.compile("([\\w]+)(-(?:l(?:es?|a)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts][’'](?:y|en)|les?|la|[mt]oi|leur|lui))$") def setLabelsOnToken (dToken): # Token: .sType, .sValue, .nStart, .nEnd, .lMorph try: if dToken["sType"] == "PUNC" or dToken["sType"] == "SIGN": dToken["aLabels"] = [_dValues.get(dToken["sValue"], "signe de ponctuation divers")] elif dToken["sType"] == 'NUM': dToken["aLabels"] = ["nombre"] elif dToken["sType"] == 'LINK': dToken["aLabels"] = ["hyperlien"] elif dToken["sType"] == 'TAG': dToken["aLabels"] = ["étiquette (hashtag)"] elif dToken["sType"] == 'HTML': dToken["aLabels"] = ["balise HTML"] elif dToken["sType"] == 'PSEUDOHTML': dToken["aLabels"] = ["balise pseudo-HTML"] elif dToken["sType"] == 'HTMLENTITY': dToken["aLabels"] = ["entité caractère XML/HTML"] elif dToken["sType"] == 'HOUR': dToken["aLabels"] = ["heure"] elif dToken["sType"] == 'WORD_ELIDED': dToken["aLabels"] = [_dValues.get(dToken["sValue"], "préfixe élidé inconnu")] elif dToken["sType"] == 'WORD_ORDINAL': dToken["aLabels"] = ["nombre ordinal"] elif dToken["sType"] == 'FOLDERUNIX': dToken["aLabels"] = ["dossier UNIX (et dérivés)"] elif dToken["sType"] == 'FOLDERWIN': dToken["aLabels"] = ["dossier Windows"] elif dToken["sType"] == 'WORD_ACRONYM': dToken["aLabels"] = ["sigle ou acronyme"] elif dToken["sType"] == 'WORD': if "lMorph" in dToken and dToken["lMorph"]: # with morphology dToken["aLabels"] = [] for sMorph in dToken["lMorph"]: dToken["aLabels"].append(readableMorph(sMorph)) if "sTags" in dToken: aTags = [] for sTag in dToken["sTags"]: if sTag in _dValues: aTags.append(_dValues[sTag]) if aTags: dToken["aOtherLabels"] = aTags else: # no morphology, guessing if dToken["sValue"].count("-") > 4: dToken["aLabels"] = ["élément complexe indéterminé"] elif _zPartDemForm.search(dToken["sValue"]): # mots avec particules démonstratives dToken["aLabels"] = ["mot avec particule démonstrative"] elif _zImperatifVerb.search(dToken["sValue"]): # formes interrogatives dToken["aLabels"] = ["forme verbale impérative"] elif _zInterroVerb.search(dToken["sValue"]): # formes interrogatives dToken["aLabels"] = ["forme verbale interrogative"] else: dToken["aLabels"] = ["token de nature inconnue"] except: return # Other functions def filterSugg (aSugg): "exclude suggestions" return filter(lambda sSugg: not sSugg.endswith(("è", "È")), aSugg) |