547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
|
dVars['version'] = self.sVersion
# Dictionaries files (.dic) (.aff)
self.writeAffixes(spDic, dVars, nMode, bSimplified)
self.writeDictionary(spDic, dVars, nMode, bSimplified)
copyTemplate('orthographe', spDic, 'README_dict_fr.txt', dVars)
createZipFiles(spDic, spDst, sDicName + '.zip')
def createLibreOfficeExtension (self, spBuild, dTplVars, lDictVars, spGL):
# LibreOffice extension
echo(" * Dictionnaire >> extension pour LibreOffice")
dTplVars['version'] = self.sVersion
sExtensionName = EXT_PREFIX_OOO + self.sVersion
spExt = spBuild + '/' + sExtensionName
dir_util.mkpath(spExt+'/META-INF')
dir_util.mkpath(spExt+'/ui')
|
|
|
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
|
dVars['version'] = self.sVersion
# Dictionaries files (.dic) (.aff)
self.writeAffixes(spDic, dVars, nMode, bSimplified)
self.writeDictionary(spDic, dVars, nMode, bSimplified)
copyTemplate('orthographe', spDic, 'README_dict_fr.txt', dVars)
createZipFiles(spDic, spDst, sDicName + '.zip')
def createLibreOfficeExtension (self, spBuild, dTplVars, lDictVars, spDestGL=""):
# LibreOffice extension
echo(" * Dictionnaire >> extension pour LibreOffice")
dTplVars['version'] = self.sVersion
sExtensionName = EXT_PREFIX_OOO + self.sVersion
spExt = spBuild + '/' + sExtensionName
dir_util.mkpath(spExt+'/META-INF')
dir_util.mkpath(spExt+'/ui')
|
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
|
file_util.copy_file('césures/frhyph.tex', spExt+'/dictionaries')
file_util.copy_file('césures/hyph-fr.tex', spExt+'/dictionaries')
file_util.copy_file('césures/README_hyph_fr-3.0.txt', spExt+'/dictionaries')
file_util.copy_file('césures/README_hyph_fr-2.9.txt', spExt+'/dictionaries')
# zip
createZipFiles(spExt, spBuild, sExtensionName + '.oxt')
# copy to Grammalecte Project
if spGL:
echo(" extension copiée dans Grammalecte...")
dir_util.copy_tree(spExt+'/dictionaries', spGL)
def createMozillaExtensions (self, spBuild, dTplVars, lDictVars, spDestGL):
# Mozilla extension 1
echo(" * Dictionnaire >> extension pour Mozilla")
dTplVars['version'] = self.sVersion
sExtensionName = EXT_PREFIX_MOZ + self.sVersion
spExt = spBuild + '/' + sExtensionName
dir_util.mkpath(spExt+'/dictionaries')
copyTemplate('_templates/moz', spExt, 'install.rdf', dTplVars)
spDict = spBuild + '/' + PREFIX_DICT_PATH + self.sVersion
file_util.copy_file(spDict+'/fr-classique.dic', spExt+'/dictionaries/fr-classic.dic')
file_util.copy_file(spDict+'/fr-classique.aff', spExt+'/dictionaries/fr-classic.aff')
copyTemplate('orthographe', spExt, 'README_dict_fr.txt', dTplVars)
createZipFiles(spExt, spBuild, sExtensionName + '.xpi')
# Grammalecte
echo(" * Dictionnaire >> copie des dicos dans Grammalecte")
for dVars in lDictVars:
file_util.copy_file(spDict+'/'+dVars['asciiName']+'.dic', spDestGL+'/'+dVars['mozAsciiName']+"/"+dVars['mozAsciiName']+'.dic')
file_util.copy_file(spDict+'/'+dVars['asciiName']+'.aff', spDestGL+'/'+dVars['mozAsciiName']+"/"+dVars['mozAsciiName']+'.aff')
def createFileIfqForDB (self, spBuild):
echo(" * Dictionnaire >> indices de fréquence pour la DB...")
with open(spBuild+'/dictIdxIfq-'+self.sVersion+'.diff.txt', 'w', encoding='utf-8', newline="\n") as hDiff, \
open(spBuild+'/dictIdxIfq-'+self.sVersion+'.notes.txt', 'w', encoding='utf-8', newline="\n") as hNotes:
for oEntry in self.lEntry:
if oEntry.fq != oEntry.oldFq:
hDiff.write("{0.iD}\t{0.fq}\n".format(oEntry))
hNotes.write("{0.lemma}/{0.flags}\t{0.oldFq} > {0.fq}\n".format(oEntry))
def createLexiconPackages (self, spBuild, version, oStatsLex, spLexGL):
sLexName = LEX_PREFIX + version
spLex = spBuild + '/' + sLexName
dir_util.mkpath(spLex)
# write Dicollecte lexicon
self.sortLexiconByFreq()
self.writeLexicon(spLex + '/' + sLexName + '.txt', version, oStatsLex)
self.writeGrammarCheckerLexicon(spBuild + '/' + sLexName + '.lex', version)
copyTemplate('lexique', spLex, 'README_lexique.txt', {'version': version})
# zip
createZipFiles(spLex, spBuild, sLexName + '.zip')
# copy GC lexicon to Grammalecte
file_util.copy_file(spBuild + '/' + sLexName + '.lex', spLexGL + '/French.lex')
file_util.copy_file('lexique/French.tagset.txt', spLexGL)
def createDictConj (self, spBuild, spCopy):
echo(" * Dictionnaire >> fichier de conjugaison...")
with open(spBuild+'/dictConj.txt', 'w', encoding='utf-8', newline="\n") as hDst:
for oEntry in self.lEntry:
if oEntry.po.startswith("v"):
hDst.write(oEntry.getConjugation())
echo(" Fichier de conjugaison copié dans Grammalecte...")
file_util.copy_file(spBuild+'/dictConj.txt', spCopy)
def createDictDecl (self, spBuild, spCopy):
echo(" * Dictionnaire >> fichier de déclinaison...")
with open(spBuild+'/dictDecl.txt', 'w', encoding='utf-8', newline="\n") as hDst:
for oEntry in self.lEntry:
if re.match("[SXFWIA]", oEntry.flags) and (oEntry.po.startswith("nom") or oEntry.po.startswith("adj")):
hDst.write(oEntry.getDeclination())
echo(" Fichier de déclinaison copié dans Grammalecte...")
file_util.copy_file(spBuild+'/dictDecl.txt', spCopy)
def generateSpellVariants (self, nReq, spBuild):
if nReq < 1: nReq = 1
if nReq > 2: nReq = 2
echo(" * Lexique >> variantes par suppression... n = " + str(nReq))
with open(spBuild+'/dictSpellVariants-'+str(nReq)+'.txt', 'w', encoding='utf-8', newline="\n") as hDst:
for oFlex in frozenset(self.lFlexions):
|
|
|
|
>
|
|
|
|
|
>
|
|
|
>
|
|
|
>
|
|
|
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
|
file_util.copy_file('césures/frhyph.tex', spExt+'/dictionaries')
file_util.copy_file('césures/hyph-fr.tex', spExt+'/dictionaries')
file_util.copy_file('césures/README_hyph_fr-3.0.txt', spExt+'/dictionaries')
file_util.copy_file('césures/README_hyph_fr-2.9.txt', spExt+'/dictionaries')
# zip
createZipFiles(spExt, spBuild, sExtensionName + '.oxt')
# copy to Grammalecte Project
if spDestGL:
echo(" extension copiée dans Grammalecte...")
dir_util.copy_tree(spExt+'/dictionaries', spDestGL)
def createMozillaExtensions (self, spBuild, dTplVars, lDictVars, spDestGL=""):
# Mozilla extension 1
echo(" * Dictionnaire >> extension pour Mozilla")
dTplVars['version'] = self.sVersion
sExtensionName = EXT_PREFIX_MOZ + self.sVersion
spExt = spBuild + '/' + sExtensionName
dir_util.mkpath(spExt+'/dictionaries')
copyTemplate('_templates/moz', spExt, 'install.rdf', dTplVars)
spDict = spBuild + '/' + PREFIX_DICT_PATH + self.sVersion
file_util.copy_file(spDict+'/fr-classique.dic', spExt+'/dictionaries/fr-classic.dic')
file_util.copy_file(spDict+'/fr-classique.aff', spExt+'/dictionaries/fr-classic.aff')
copyTemplate('orthographe', spExt, 'README_dict_fr.txt', dTplVars)
createZipFiles(spExt, spBuild, sExtensionName + '.xpi')
# Grammalecte
if spDestGL:
echo(" * Dictionnaire >> copie des dicos dans Grammalecte")
for dVars in lDictVars:
file_util.copy_file(spDict+'/'+dVars['asciiName']+'.dic', spDestGL+'/'+dVars['mozAsciiName']+"/"+dVars['mozAsciiName']+'.dic')
file_util.copy_file(spDict+'/'+dVars['asciiName']+'.aff', spDestGL+'/'+dVars['mozAsciiName']+"/"+dVars['mozAsciiName']+'.aff')
def createFileIfqForDB (self, spBuild):
echo(" * Dictionnaire >> indices de fréquence pour la DB...")
with open(spBuild+'/dictIdxIfq-'+self.sVersion+'.diff.txt', 'w', encoding='utf-8', newline="\n") as hDiff, \
open(spBuild+'/dictIdxIfq-'+self.sVersion+'.notes.txt', 'w', encoding='utf-8', newline="\n") as hNotes:
for oEntry in self.lEntry:
if oEntry.fq != oEntry.oldFq:
hDiff.write("{0.iD}\t{0.fq}\n".format(oEntry))
hNotes.write("{0.lemma}/{0.flags}\t{0.oldFq} > {0.fq}\n".format(oEntry))
def createLexiconPackages (self, spBuild, version, oStatsLex, spDestGL=""):
sLexName = LEX_PREFIX + version
spLex = spBuild + '/' + sLexName
dir_util.mkpath(spLex)
# write Dicollecte lexicon
self.sortLexiconByFreq()
self.writeLexicon(spLex + '/' + sLexName + '.txt', version, oStatsLex)
self.writeGrammarCheckerLexicon(spBuild + '/' + sLexName + '.lex', version)
copyTemplate('lexique', spLex, 'README_lexique.txt', {'version': version})
# zip
createZipFiles(spLex, spBuild, sLexName + '.zip')
# copy GC lexicon to Grammalecte
if spDestGL:
file_util.copy_file(spBuild + '/' + sLexName + '.lex', spDestGL + '/French.lex')
file_util.copy_file('lexique/French.tagset.txt', spDestGL)
def createDictConj (self, spBuild, spDestGL=""):
echo(" * Dictionnaire >> fichier de conjugaison...")
with open(spBuild+'/dictConj.txt', 'w', encoding='utf-8', newline="\n") as hDst:
for oEntry in self.lEntry:
if oEntry.po.startswith("v"):
hDst.write(oEntry.getConjugation())
if spDestGL:
echo(" Fichier de conjugaison copié dans Grammalecte...")
file_util.copy_file(spBuild+'/dictConj.txt', spDestGL)
def createDictDecl (self, spBuild, spDestGL=""):
echo(" * Dictionnaire >> fichier de déclinaison...")
with open(spBuild+'/dictDecl.txt', 'w', encoding='utf-8', newline="\n") as hDst:
for oEntry in self.lEntry:
if re.match("[SXFWIA]", oEntry.flags) and (oEntry.po.startswith("nom") or oEntry.po.startswith("adj")):
hDst.write(oEntry.getDeclination())
if spDestGL:
echo(" Fichier de déclinaison copié dans Grammalecte...")
file_util.copy_file(spBuild+'/dictDecl.txt', spDestGL)
def generateSpellVariants (self, nReq, spBuild):
if nReq < 1: nReq = 1
if nReq > 2: nReq = 2
echo(" * Lexique >> variantes par suppression... n = " + str(nReq))
with open(spBuild+'/dictSpellVariants-'+str(nReq)+'.txt', 'w', encoding='utf-8', newline="\n") as hDst:
for oFlex in frozenset(self.lFlexions):
|
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
|
xParser = argparse.ArgumentParser()
xParser.add_argument("-v", "--verdic", help="set dictionary version, i.e. 5.4", type=str, default="X.Y.z")
xParser.add_argument("-m", "--mode", help="0: no tags, 1: Hunspell tags (default), 2: All tags", type=int, choices=[0, 1, 2], default=1)
xParser.add_argument("-u", "--uncompress", help="do not use Hunspell compression", action="store_true")
xParser.add_argument("-s", "--simplify", help="no virtual lemmas", action="store_true")
xParser.add_argument("-sv", "--spellvariants", help="generate spell variants", action="store_true")
xArgs = xParser.parse_args()
if xArgs.simplify:
xArgs.mode = 0
xArgs.uncompress = True
echo("Python: " + sys.version)
|
>
|
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
|
xParser = argparse.ArgumentParser()
xParser.add_argument("-v", "--verdic", help="set dictionary version, i.e. 5.4", type=str, default="X.Y.z")
xParser.add_argument("-m", "--mode", help="0: no tags, 1: Hunspell tags (default), 2: All tags", type=int, choices=[0, 1, 2], default=1)
xParser.add_argument("-u", "--uncompress", help="do not use Hunspell compression", action="store_true")
xParser.add_argument("-s", "--simplify", help="no virtual lemmas", action="store_true")
xParser.add_argument("-sv", "--spellvariants", help="generate spell variants", action="store_true")
xParser.add_argument("-gl", "--grammalecte", help="copy generated files to Grammalecte folders", action="store_true")
xArgs = xParser.parse_args()
if xArgs.simplify:
xArgs.mode = 0
xArgs.uncompress = True
echo("Python: " + sys.version)
|
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
|
oStatsLex.addLexFromFile('lexique/corpus_data/stats_frwikisource.txt', 'S', 'Wikisource')
oStatsLex.addLexFromFile('lexique/corpus_data/stats_litterature.txt', 'L', 'Littérature')
oStatsLex.write(spBuild+'/test_lex.txt')
oFrenchDict.calculateStats(oStatsLex, spfStats)
### écriture des paquets
echo("Création des paquets...")
if not xArgs.uncompress:
oFrenchDict.defineAbreviatedTags(xArgs.mode, spfStats)
oFrenchDict.createFiles(spBuild, [dMODERNE, dTOUTESVAR, dCLASSIQUE, dREFORME1990], xArgs.mode, xArgs.simplify)
oFrenchDict.createLibreOfficeExtension(spBuild, dMOZEXT, [dMODERNE, dTOUTESVAR, dCLASSIQUE, dREFORME1990], "../oxt/Dictionnaires/dictionaries")
oFrenchDict.createMozillaExtensions(spBuild, dMOZEXT, [dMODERNE, dTOUTESVAR, dCLASSIQUE, dREFORME1990], "../xpi/data/dictionaries")
oFrenchDict.createLexiconPackages(spBuild, xArgs.verdic, oStatsLex, "../../../lexicons")
oFrenchDict.createFileIfqForDB(spBuild)
oFrenchDict.createDictConj(spBuild, "../data")
oFrenchDict.createDictDecl(spBuild, "../data")
if __name__ == '__main__':
main()
|
>
>
>
>
>
>
<
<
|
>
>
|
|
|
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
|
oStatsLex.addLexFromFile('lexique/corpus_data/stats_frwikisource.txt', 'S', 'Wikisource')
oStatsLex.addLexFromFile('lexique/corpus_data/stats_litterature.txt', 'L', 'Littérature')
oStatsLex.write(spBuild+'/test_lex.txt')
oFrenchDict.calculateStats(oStatsLex, spfStats)
### écriture des paquets
echo("Création des paquets...")
spLexiconDestGL = "../../../lexicons" if xArgs.grammalecte else ""
spLibreOfficeExtDestGL = "../oxt/Dictionnaires/dictionaries" if xArgs.grammalecte else ""
spMozillaExtDestGL = "../xpi/data/dictionaries" if xArgs.grammalecte else ""
spDataDestGL = "../data" if xArgs.grammalecte else ""
if not xArgs.uncompress:
oFrenchDict.defineAbreviatedTags(xArgs.mode, spfStats)
oFrenchDict.createFiles(spBuild, [dMODERNE, dTOUTESVAR, dCLASSIQUE, dREFORME1990], xArgs.mode, xArgs.simplify)
oFrenchDict.createLexiconPackages(spBuild, xArgs.verdic, oStatsLex, spLexiconDestGL)
oFrenchDict.createFileIfqForDB(spBuild)
oFrenchDict.createLibreOfficeExtension(spBuild, dMOZEXT, [dMODERNE, dTOUTESVAR, dCLASSIQUE, dREFORME1990], spLibreOfficeExtDestGL)
oFrenchDict.createMozillaExtensions(spBuild, dMOZEXT, [dMODERNE, dTOUTESVAR, dCLASSIQUE, dREFORME1990], spMozillaExtDestGL)
oFrenchDict.createDictConj(spBuild, spDataDestGL)
oFrenchDict.createDictDecl(spBuild, spDataDestGL)
if __name__ == '__main__':
main()
|