Overview
Comment: | [fr] thesaurus builder (merging synsets) |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | fr |
Files: | files | file ages | folders |
SHA3-256: |
e67b500b7b8adbee2249ac560668cb88 |
User & Date: | olr on 2019-06-26 18:54:17 |
Other Links: | manifest | tags |
Context
2019-06-26
| ||
23:06 | [fr] update thésaurus check-in: 4c57a4f8ce user: olr tags: trunk, fr | |
18:54 | [fr] thesaurus builder (merging synsets) check-in: e67b500b7b user: olr tags: trunk, fr | |
09:52 | [fr] faux positifs et ajustements check-in: 6d9ee21a54 user: olr tags: trunk, fr | |
Changes
Modified gc_lang/fr/dictionnaire/genfrdic.py from [4b330e0ec5] to [05828a16a3].
︙ | |||
17 18 19 20 21 22 23 24 25 26 27 28 29 30 | 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | + | from distutils import dir_util from distutils import file_util from string import Template import metagraphe import metaphone2 import thes_build # Dictionnaire des caractères pour le tri naturel. # Ordre souhaitable, mais pose problème pour la recherche, car engendre des égalités de lemmes différents. # Il faut donc travailler sur un dictionnaire trié *numériquement* et le sauvegarder selon le tri *naturel* CHARMAP = str.maketrans({ 'à': 'a', 'À': 'A', 'â': 'a', 'Â': 'A', 'ä': 'a', 'Ä': 'A', 'å': 'a', 'Å': 'A', 'ā': 'a', 'Ā': 'A', 'ç': 'c', 'Ç': 'C', |
︙ | |||
564 565 566 567 568 569 570 | 565 566 567 568 569 570 571 572 573 574 575 576 577 578 | - - - - | #file_util.copy_file('_templates/ooo/dictionaries.xcu.tpl.xml', spExt) copyTemplate('_templates/ooo', spExt, 'package-description.txt', dTplVars) for dVars in lDictVars: dicPath = spBuild + '/' + PREFIX_DICT_PATH + self.sVersion file_util.copy_file(dicPath+'/'+dVars['asciiName']+'.dic', spExt+'/dictionaries/'+dVars['asciiName']+'.dic') file_util.copy_file(dicPath+'/'+dVars['asciiName']+'.aff', spExt+'/dictionaries/'+dVars['asciiName']+'.aff') copyTemplate('orthographe', spExt+'/dictionaries', 'README_dict_fr.txt', dTplVars) |
︙ | |||
1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 | 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 | + + + + + + + + + + + + | with open(sPathFile, 'w', encoding='utf-8', newline="\n") as hDst: for t in self.lLex: hDst.write(str(t)+"\n") for e in self.dFlexions.items(): hDst.write("{} - {}\n".format(e[0], e[1])) def createThesaurusPackage (spBuild, sVersion, spCopy=""): print("Création du thésaurus") spThesaurus = spBuild+"/thesaurus-v"+sVersion dir_util.mkpath(spThesaurus) thes_build.build("thesaurus/thes_fr.dat", "thesaurus/synsets_fr.dat", spThesaurus) file_util.copy_file('thesaurus/README_thes_fr.txt', spThesaurus) if spCopy: # copy in libreoffice extension package file_util.copy_file(spThesaurus+'/thes_fr.dat', spCopy) file_util.copy_file(spThesaurus+'/thes_fr.idx', spCopy) file_util.copy_file(spThesaurus+'/README_thes_fr.txt', spCopy) def main (): xParser = argparse.ArgumentParser() xParser.add_argument("-v", "--verdic", help="set dictionary version, i.e. 5.4", type=str, default="X.Y.z") xParser.add_argument("-m", "--mode", help="0: no tags, 1: Hunspell tags (default), 2: All tags", type=int, choices=[0, 1, 2], default=1) xParser.add_argument("-u", "--uncompress", help="do not use Hunspell compression", action="store_true") xParser.add_argument("-s", "--simplify", help="no virtual lemmas", action="store_true") |
︙ | |||
1554 1555 1556 1557 1558 1559 1560 | 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 | - + + + - | oStatsLex.addLexFromFile('lexique/corpus_data/stats_google_ngram_1.txt', 'G', 'Google 1-grams') oStatsLex.addLexFromFile('lexique/corpus_data/stats_frwiki.txt', 'W', 'Wikipédia') oStatsLex.addLexFromFile('lexique/corpus_data/stats_frwikisource.txt', 'S', 'Wikisource') oStatsLex.addLexFromFile('lexique/corpus_data/stats_litterature.txt', 'L', 'Littérature') oStatsLex.write(spBuild+'/test_lex.txt') oFrenchDict.calculateStats(oStatsLex, spfStats) |
Added gc_lang/fr/dictionnaire/thes_build.py version [34ec46a285].