︙ | | | ︙ | |
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
from distutils import dir_util
from distutils import file_util
from string import Template
import metagraphe
import metaphone2
# Dictionnaire des caractères pour le tri naturel.
# Ordre souhaitable, mais pose problème pour la recherche, car engendre des égalités de lemmes différents.
# Il faut donc travailler sur un dictionnaire trié *numériquement* et le sauvegarder selon le tri *naturel*
CHARMAP = str.maketrans({ 'à': 'a', 'À': 'A', 'â': 'a', 'Â': 'A', 'ä': 'a', 'Ä': 'A', 'å': 'a', 'Å': 'A', 'ā': 'a', 'Ā': 'A',
'ç': 'c', 'Ç': 'C',
|
>
|
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
|
from distutils import dir_util
from distutils import file_util
from string import Template
import metagraphe
import metaphone2
import thes_build
# Dictionnaire des caractères pour le tri naturel.
# Ordre souhaitable, mais pose problème pour la recherche, car engendre des égalités de lemmes différents.
# Il faut donc travailler sur un dictionnaire trié *numériquement* et le sauvegarder selon le tri *naturel*
CHARMAP = str.maketrans({ 'à': 'a', 'À': 'A', 'â': 'a', 'Â': 'A', 'ä': 'a', 'Ä': 'A', 'å': 'a', 'Å': 'A', 'ā': 'a', 'Ā': 'A',
'ç': 'c', 'Ç': 'C',
|
︙ | | | ︙ | |
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
|
#file_util.copy_file('_templates/ooo/dictionaries.xcu.tpl.xml', spExt)
copyTemplate('_templates/ooo', spExt, 'package-description.txt', dTplVars)
for dVars in lDictVars:
dicPath = spBuild + '/' + PREFIX_DICT_PATH + self.sVersion
file_util.copy_file(dicPath+'/'+dVars['asciiName']+'.dic', spExt+'/dictionaries/'+dVars['asciiName']+'.dic')
file_util.copy_file(dicPath+'/'+dVars['asciiName']+'.aff', spExt+'/dictionaries/'+dVars['asciiName']+'.aff')
copyTemplate('orthographe', spExt+'/dictionaries', 'README_dict_fr.txt', dTplVars)
# thesaurus
file_util.copy_file('thesaurus/thes_fr.dat', spExt+'/dictionaries')
file_util.copy_file('thesaurus/thes_fr.idx', spExt+'/dictionaries')
file_util.copy_file('thesaurus/README_thes_fr.txt', spExt+'/dictionaries')
# hyphenation
file_util.copy_file('césures/hyph_fr.dic', spExt+'/dictionaries')
file_util.copy_file('césures/hyph_fr.iso8859-1.dic', spExt+'/dictionaries')
file_util.copy_file('césures/frhyph.tex', spExt+'/dictionaries')
file_util.copy_file('césures/hyph-fr.tex', spExt+'/dictionaries')
file_util.copy_file('césures/README_hyph_fr-3.0.txt', spExt+'/dictionaries')
file_util.copy_file('césures/README_hyph_fr-2.9.txt', spExt+'/dictionaries')
|
<
<
<
<
|
565
566
567
568
569
570
571
572
573
574
575
576
577
578
|
#file_util.copy_file('_templates/ooo/dictionaries.xcu.tpl.xml', spExt)
copyTemplate('_templates/ooo', spExt, 'package-description.txt', dTplVars)
for dVars in lDictVars:
dicPath = spBuild + '/' + PREFIX_DICT_PATH + self.sVersion
file_util.copy_file(dicPath+'/'+dVars['asciiName']+'.dic', spExt+'/dictionaries/'+dVars['asciiName']+'.dic')
file_util.copy_file(dicPath+'/'+dVars['asciiName']+'.aff', spExt+'/dictionaries/'+dVars['asciiName']+'.aff')
copyTemplate('orthographe', spExt+'/dictionaries', 'README_dict_fr.txt', dTplVars)
# hyphenation
file_util.copy_file('césures/hyph_fr.dic', spExt+'/dictionaries')
file_util.copy_file('césures/hyph_fr.iso8859-1.dic', spExt+'/dictionaries')
file_util.copy_file('césures/frhyph.tex', spExt+'/dictionaries')
file_util.copy_file('césures/hyph-fr.tex', spExt+'/dictionaries')
file_util.copy_file('césures/README_hyph_fr-3.0.txt', spExt+'/dictionaries')
file_util.copy_file('césures/README_hyph_fr-2.9.txt', spExt+'/dictionaries')
|
︙ | | | ︙ | |
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
|
with open(sPathFile, 'w', encoding='utf-8', newline="\n") as hDst:
for t in self.lLex:
hDst.write(str(t)+"\n")
for e in self.dFlexions.items():
hDst.write("{} - {}\n".format(e[0], e[1]))
def main ():
xParser = argparse.ArgumentParser()
xParser.add_argument("-v", "--verdic", help="set dictionary version, i.e. 5.4", type=str, default="X.Y.z")
xParser.add_argument("-m", "--mode", help="0: no tags, 1: Hunspell tags (default), 2: All tags", type=int, choices=[0, 1, 2], default=1)
xParser.add_argument("-u", "--uncompress", help="do not use Hunspell compression", action="store_true")
xParser.add_argument("-s", "--simplify", help="no virtual lemmas", action="store_true")
|
>
>
>
>
>
>
>
>
>
>
>
>
>
|
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
|
with open(sPathFile, 'w', encoding='utf-8', newline="\n") as hDst:
for t in self.lLex:
hDst.write(str(t)+"\n")
for e in self.dFlexions.items():
hDst.write("{} - {}\n".format(e[0], e[1]))
def createThesaurusPackage (spBuild, sVersion, spCopy=""):
print(" * Création du thésaurus")
spThesaurus = spBuild+"/thesaurus-v"+sVersion
dir_util.mkpath(spThesaurus)
thes_build.build("thesaurus/thes_fr.dat", "thesaurus/synsets_fr.dat", spThesaurus)
file_util.copy_file('thesaurus/README_thes_fr.txt', spThesaurus)
if spCopy:
# copy in libreoffice extension package
print(" Copie du thésaurus dans:", spCopy)
file_util.copy_file(spThesaurus+'/thes_fr.dat', spCopy)
file_util.copy_file(spThesaurus+'/thes_fr.idx', spCopy)
file_util.copy_file(spThesaurus+'/README_thes_fr.txt', spCopy)
def main ():
xParser = argparse.ArgumentParser()
xParser.add_argument("-v", "--verdic", help="set dictionary version, i.e. 5.4", type=str, default="X.Y.z")
xParser.add_argument("-m", "--mode", help="0: no tags, 1: Hunspell tags (default), 2: All tags", type=int, choices=[0, 1, 2], default=1)
xParser.add_argument("-u", "--uncompress", help="do not use Hunspell compression", action="store_true")
xParser.add_argument("-s", "--simplify", help="no virtual lemmas", action="store_true")
|
︙ | | | ︙ | |
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
|
oStatsLex.addLexFromFile('lexique/corpus_data/stats_google_ngram_1.txt', 'G', 'Google 1-grams')
oStatsLex.addLexFromFile('lexique/corpus_data/stats_frwiki.txt', 'W', 'Wikipédia')
oStatsLex.addLexFromFile('lexique/corpus_data/stats_frwikisource.txt', 'S', 'Wikisource')
oStatsLex.addLexFromFile('lexique/corpus_data/stats_litterature.txt', 'L', 'Littérature')
oStatsLex.write(spBuild+'/test_lex.txt')
oFrenchDict.calculateStats(oStatsLex, spfStats)
### écriture des paquets
echo("Création des paquets...")
spLexiconDestGL = "../../../lexicons" if xArgs.grammalecte else ""
spLibreOfficeExtDestGL = "../oxt/Dictionnaires/dictionaries" if xArgs.grammalecte else ""
spMozillaExtDestGL = "" if xArgs.grammalecte else "" # no more Hunspell dictionaries in Mozilla extensions for now
spDataDestGL = "../data" if xArgs.grammalecte else ""
if not xArgs.uncompress:
oFrenchDict.defineAbreviatedTags(xArgs.mode, spfStats)
oFrenchDict.createFiles(spBuild, [dTOUTESVAR, dCLASSIQUE, dREFORME1990], xArgs.mode, xArgs.simplify)
oFrenchDict.createLexiconPackages(spBuild, xArgs.verdic, oStatsLex, spLexiconDestGL)
oFrenchDict.createFileIfqForDB(spBuild)
oFrenchDict.createLibreOfficeExtension(spBuild, dMOZEXT, [dTOUTESVAR, dCLASSIQUE, dREFORME1990], spLibreOfficeExtDestGL)
oFrenchDict.createMozillaExtensions(spBuild, dMOZEXT, [dTOUTESVAR, dCLASSIQUE, dREFORME1990], spMozillaExtDestGL)
oFrenchDict.createDictConj(spBuild, spDataDestGL)
oFrenchDict.createDictDecl(spBuild, spDataDestGL)
if __name__ == '__main__':
main()
|
|
>
>
<
|
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
|
oStatsLex.addLexFromFile('lexique/corpus_data/stats_google_ngram_1.txt', 'G', 'Google 1-grams')
oStatsLex.addLexFromFile('lexique/corpus_data/stats_frwiki.txt', 'W', 'Wikipédia')
oStatsLex.addLexFromFile('lexique/corpus_data/stats_frwikisource.txt', 'S', 'Wikisource')
oStatsLex.addLexFromFile('lexique/corpus_data/stats_litterature.txt', 'L', 'Littérature')
oStatsLex.write(spBuild+'/test_lex.txt')
oFrenchDict.calculateStats(oStatsLex, spfStats)
### Écriture des paquets
echo("Création des paquets...")
spLexiconDestGL = "../../../lexicons" if xArgs.grammalecte else ""
spLibreOfficeExtDestGL = "../oxt/Dictionnaires/dictionaries" if xArgs.grammalecte else ""
spMozillaExtDestGL = "" if xArgs.grammalecte else "" # no more Hunspell dictionaries in Mozilla extensions for now
spDataDestGL = "../data" if xArgs.grammalecte else ""
### dictionnaires
if not xArgs.uncompress:
oFrenchDict.defineAbreviatedTags(xArgs.mode, spfStats)
oFrenchDict.createFiles(spBuild, [dTOUTESVAR, dCLASSIQUE, dREFORME1990], xArgs.mode, xArgs.simplify)
oFrenchDict.createLexiconPackages(spBuild, xArgs.verdic, oStatsLex, spLexiconDestGL)
oFrenchDict.createFileIfqForDB(spBuild)
createThesaurusPackage(spBuild, "2.4", spLibreOfficeExtDestGL)
oFrenchDict.createLibreOfficeExtension(spBuild, dMOZEXT, [dTOUTESVAR, dCLASSIQUE, dREFORME1990], spLibreOfficeExtDestGL)
oFrenchDict.createMozillaExtensions(spBuild, dMOZEXT, [dTOUTESVAR, dCLASSIQUE, dREFORME1990], spMozillaExtDestGL)
oFrenchDict.createDictConj(spBuild, spDataDestGL)
oFrenchDict.createDictDecl(spBuild, spDataDestGL)
if __name__ == '__main__':
main()
|