Grammalecte  Diff

Differences From Artifact [23c6a753b0]:

To Artifact [7c1521954d]:


9
10
11
12
13
14
15

16
17
18
19
20
21
22
23
24
25
26
9
10
11
12
13
14
15
16
17



18
19
20
21
22
23
24







+

-
-
-







import sys
import re
import collections
import zipfile
import math
import argparse
import tags
import shutil
from enum import Enum

from distutils import dir_util
from distutils import file_util
from string import Template

import tags

import metagraphe
import metaphone2
import thes_build
193
194
195
196
197
198
199





200
201
202
203
204
205
206
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209







+
+
+
+
+







    if os.path.isfile(spf):
        with open(spf, "r", encoding="utf-8") as hSrc:
            for sLine in hSrc:
                yield sLine
    else:
        print("# Error: file not found.")


def createFolder (sp):
    "make a folder if it doesn’t exist; don’t change anything if it exists"
    if not os.path.exists(sp):
        os.mkdir(sp)


class Dictionnaire:
    def __init__ (self, version, name):
        # Dictionary
        self.sName = name
        self.lEntry = []
523
524
525
526
527
528
529
530

531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555










556
557
558

559
560
561
562
563


564
565
566
567
568
569
570
571






572
573
574
575



576
577
578
579
580
581

582
583
584
585
586
587
588
589

590
591
592
593


594
595
596
597
598
599
600
601


602
603
604
605
606
607
608
609
610
611
612
613
614
615

616
617
618
619
620
621
622
623
624
625
626


627
628
629
630
631
632
633
634
635
636

637
638
639
640
641
642
643
644
645
646

647
648
649
650
651
652
653
526
527
528
529
530
531
532

533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548










549
550
551
552
553
554
555
556
557
558
559
560

561
562
563
564


565
566
567
568






569
570
571
572
573
574
575



576
577
578
579
580
581
582
583

584
585
586
587
588
589
590
591

592
593
594


595
596
597
598
599
600
601
602


603
604
605
606
607
608
609
610
611
612
613
614
615
616
617

618
619
620
621
622
623
624
625
626
627


628
629
630
631
632
633
634
635
636
637
638

639
640
641
642
643
644
645
646
647
648

649
650
651
652
653
654
655
656







-
+















-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+


-
+



-
-
+
+


-
-
-
-
-
-
+
+
+
+
+
+

-
-
-
+
+
+





-
+







-
+


-
-
+
+






-
-
+
+













-
+









-
-
+
+









-
+









-
+







            hDst.write(Flexion.simpleHeader())
            for oFlex in self.lFlexions:
                hDst.write(oFlex.getGrammarCheckerRepr())

    def createFiles (self, spDst, lDictVars, nMode, bSimplified):
        sDicName = PREFIX_DICT_PATH + self.sVersion
        spDic = spDst + '/' + sDicName
        dir_util.mkpath(spDic)
        createFolder(spDic)
        for dVars in lDictVars:
            # template vars
            dVars['version'] = self.sVersion
            # Dictionaries files (.dic) (.aff)
            self.writeAffixes(spDic, dVars, nMode, bSimplified)
            self.writeDictionary(spDic, dVars, nMode, bSimplified)
        copyTemplate('orthographe', spDic, 'README_dict_fr.txt', dVars)
        createZipFiles(spDic, spDst, sDicName + '.zip')

    def createLibreOfficeExtension (self, spBuild, dTplVars, lDictVars, sThesVer, spDestGL=""):
        # LibreOffice extension
        echo(" * Dictionnaire >> extension pour LibreOffice")
        dTplVars['version'] = self.sVersion
        sExtensionName = EXT_PREFIX_OOO + self.sVersion
        spExt = spBuild + '/' + sExtensionName
        dir_util.mkpath(spExt+'/META-INF')
        dir_util.mkpath(spExt+'/ui')
        dir_util.mkpath(spExt+'/dictionaries')
        dir_util.mkpath(spExt+'/pythonpath')
        file_util.copy_file('_templates/ooo/manifest.xml', spExt+'/META-INF')
        file_util.copy_file('_templates/ooo/DictionarySwitcher.py', spExt)
        file_util.copy_file('_templates/ooo/ds_strings.py', spExt+'/pythonpath')
        file_util.copy_file('_templates/ooo/addons.xcu', spExt+'/ui')
        file_util.copy_file('_templates/ooo/french_flag.png', spExt)
        file_util.copy_file('_templates/ooo/french_flag_16.bmp', spExt+'/ui')
        createFolder(spExt+'/META-INF')
        createFolder(spExt+'/ui')
        createFolder(spExt+'/dictionaries')
        createFolder(spExt+'/pythonpath')
        shutil.copy2('_templates/ooo/manifest.xml', spExt+'/META-INF')
        shutil.copy2('_templates/ooo/DictionarySwitcher.py', spExt)
        shutil.copy2('_templates/ooo/ds_strings.py', spExt+'/pythonpath')
        shutil.copy2('_templates/ooo/addons.xcu', spExt+'/ui')
        shutil.copy2('_templates/ooo/french_flag.png', spExt)
        shutil.copy2('_templates/ooo/french_flag_16.bmp', spExt+'/ui')
        copyTemplate('_templates/ooo', spExt, 'description.xml', dTplVars)
        copyTemplate('_templates/ooo', spExt, 'dictionaries.xcu', dTplVars)
        #file_util.copy_file('_templates/ooo/dictionaries.xcu.tpl.xml', spExt)
        #shutil.copy2('_templates/ooo/dictionaries.xcu.tpl.xml', spExt)
        copyTemplate('_templates/ooo', spExt, 'package-description.txt', dTplVars)
        for dVars in lDictVars:
            dicPath = spBuild + '/' + PREFIX_DICT_PATH + self.sVersion
            file_util.copy_file(dicPath+'/'+dVars['asciiName']+'.dic', spExt+'/dictionaries/'+dVars['asciiName']+'.dic')
            file_util.copy_file(dicPath+'/'+dVars['asciiName']+'.aff', spExt+'/dictionaries/'+dVars['asciiName']+'.aff')
            shutil.copy2(dicPath+'/'+dVars['asciiName']+'.dic', spExt+'/dictionaries/'+dVars['asciiName']+'.dic')
            shutil.copy2(dicPath+'/'+dVars['asciiName']+'.aff', spExt+'/dictionaries/'+dVars['asciiName']+'.aff')
        copyTemplate('orthographe', spExt+'/dictionaries', 'README_dict_fr.txt', dTplVars)
        # hyphenation
        file_util.copy_file('césures/hyph_fr.dic', spExt+'/dictionaries')
        file_util.copy_file('césures/hyph_fr.iso8859-1.dic', spExt+'/dictionaries')
        file_util.copy_file('césures/frhyph.tex', spExt+'/dictionaries')
        file_util.copy_file('césures/hyph-fr.tex', spExt+'/dictionaries')
        file_util.copy_file('césures/README_hyph_fr-3.0.txt', spExt+'/dictionaries')
        file_util.copy_file('césures/README_hyph_fr-2.9.txt', spExt+'/dictionaries')
        shutil.copy2('césures/hyph_fr.dic', spExt+'/dictionaries')
        shutil.copy2('césures/hyph_fr.iso8859-1.dic', spExt+'/dictionaries')
        shutil.copy2('césures/frhyph.tex', spExt+'/dictionaries')
        shutil.copy2('césures/hyph-fr.tex', spExt+'/dictionaries')
        shutil.copy2('césures/README_hyph_fr-3.0.txt', spExt+'/dictionaries')
        shutil.copy2('césures/README_hyph_fr-2.9.txt', spExt+'/dictionaries')
        # thesaurus
        file_util.copy_file(spBuild+"/thesaurus-v"+sThesVer+'/thes_fr.dat', spExt+"/dictionaries")
        file_util.copy_file(spBuild+"/thesaurus-v"+sThesVer+'/thes_fr.idx', spExt+"/dictionaries")
        file_util.copy_file(spBuild+"/thesaurus-v"+sThesVer+'/README_thes_fr.txt', spExt+"/dictionaries")
        shutil.copy2(spBuild+"/thesaurus-v"+sThesVer+'/thes_fr.dat', spExt+"/dictionaries")
        shutil.copy2(spBuild+"/thesaurus-v"+sThesVer+'/thes_fr.idx', spExt+"/dictionaries")
        shutil.copy2(spBuild+"/thesaurus-v"+sThesVer+'/README_thes_fr.txt', spExt+"/dictionaries")
        # zip
        createZipFiles(spExt, spBuild, sExtensionName + '.oxt')
        # copy to Grammalecte Project
        if spDestGL:
            echo("   Dictionnaires Hunspell copiés dans Grammalecte pour LibreOffice...")
            dir_util.copy_tree(spExt+'/dictionaries', spDestGL)
            shutil.copytree(spExt+'/dictionaries', spDestGL, dirs_exist_ok=True)

    def createMozillaExtensions (self, spBuild, dTplVars, lDictVars, spDestGL=""):
        # Mozilla extension 1
        echo(" * Dictionnaire >> extension pour Mozilla")
        dTplVars['version'] = self.sVersion
        sExtensionName = EXT_PREFIX_MOZ + self.sVersion
        spExt = spBuild + '/' + sExtensionName
        dir_util.mkpath(spExt+'/dictionaries')
        createFolder(spExt+'/dictionaries')
        copyTemplate('_templates/moz', spExt, 'manifest.json', dTplVars)
        spDict = spBuild + '/' + PREFIX_DICT_PATH + self.sVersion
        file_util.copy_file(spDict+'/fr-classique.dic', spExt+'/dictionaries/fr-classic.dic')
        file_util.copy_file(spDict+'/fr-classique.aff', spExt+'/dictionaries/fr-classic.aff')
        shutil.copy2(spDict+'/fr-classique.dic', spExt+'/dictionaries/fr-classic.dic')
        shutil.copy2(spDict+'/fr-classique.aff', spExt+'/dictionaries/fr-classic.aff')
        copyTemplate('orthographe', spExt, 'README_dict_fr.txt', dTplVars)
        createZipFiles(spExt, spBuild, sExtensionName + '.xpi')
        # Grammalecte
        if spDestGL:
            echo("   Dictionnaires Hunspell copiés dans Grammalecte pour Mozilla")
            for dVars in lDictVars:
                file_util.copy_file(spDict+'/'+dVars['asciiName']+'.dic', spDestGL+'/'+dVars['mozAsciiName']+"/"+dVars['mozAsciiName']+'.dic')
                file_util.copy_file(spDict+'/'+dVars['asciiName']+'.aff', spDestGL+'/'+dVars['mozAsciiName']+"/"+dVars['mozAsciiName']+'.aff')
                shutil.copy2(spDict+'/'+dVars['asciiName']+'.dic', spDestGL+'/'+dVars['mozAsciiName']+"/"+dVars['mozAsciiName']+'.dic')
                shutil.copy2(spDict+'/'+dVars['asciiName']+'.aff', spDestGL+'/'+dVars['mozAsciiName']+"/"+dVars['mozAsciiName']+'.aff')

    def createFileIfqForDB (self, spBuild):
        echo(" * Dictionnaire >> indices de fréquence pour la DB...")
        with open(spBuild+'/dictIdxIfq-'+self.sVersion+'.diff.txt', 'w', encoding='utf-8', newline="\n") as hDiff, \
             open(spBuild+'/dictIdxIfq-'+self.sVersion+'.notes.txt', 'w', encoding='utf-8', newline="\n") as hNotes:
            for oEntry in self.lEntry:
                if oEntry.fq != oEntry.oldFq:
                    hDiff.write("{0.iD}\t{0.fq}\n".format(oEntry))
                    hNotes.write("{0.lemma}/{0.flags}\t{0.oldFq} > {0.fq}\n".format(oEntry))

    def createLexiconPackages (self, spBuild, version, oStatsLex, spDestGL=""):
        sLexName = LEX_PREFIX + version
        spLex = spBuild + '/' + sLexName
        dir_util.mkpath(spLex)
        createFolder(spLex)
        # write lexicon
        self.sortLexiconByFreq()
        self.writeLexicon(spLex + '/' + sLexName + '.txt', version, oStatsLex)
        self.writeGrammarCheckerLexicon(spBuild + '/' + sLexName + '.lex', version)
        copyTemplate('lexique', spLex, 'README_lexique.txt', {'version': version})
        # zip
        createZipFiles(spLex, spBuild, sLexName + '.zip')
        # copy GC lexicon to Grammalecte
        if spDestGL:
            file_util.copy_file(spBuild + '/' + sLexName + '.lex', spDestGL + '/French.lex')
            file_util.copy_file('lexique/French.tagset.txt', spDestGL)
            shutil.copy2(spBuild + '/' + sLexName + '.lex', spDestGL + '/French.lex')
            shutil.copy2('lexique/French.tagset.txt', spDestGL)

    def createDictConj (self, spBuild, spDestGL=""):
        echo(" * Dictionnaire >> fichier de conjugaison...")
        with open(spBuild+'/dictConj.txt', 'w', encoding='utf-8', newline="\n") as hDst:
            for oEntry in self.lEntry:
                if oEntry.po.startswith("v"):
                    hDst.write(oEntry.getConjugation())
        if spDestGL:
            echo("   Fichier de conjugaison copié dans Grammalecte...")
            file_util.copy_file(spBuild+'/dictConj.txt', spDestGL)
            shutil.copy2(spBuild+'/dictConj.txt', spDestGL)

    def createDictDecl (self, spBuild, spDestGL=""):
        echo(" * Dictionnaire >> fichier de déclinaison...")
        with open(spBuild+'/dictDecl.txt', 'w', encoding='utf-8', newline="\n") as hDst:
            for oEntry in self.lEntry:
                if re.match("[SXFWIA]", oEntry.flags) and (oEntry.po.startswith("nom") or oEntry.po.startswith("adj")):
                    hDst.write(oEntry.getDeclination())
        if spDestGL:
            echo("   Fichier de déclinaison copié dans Grammalecte...")
            file_util.copy_file(spBuild+'/dictDecl.txt', spDestGL)
            shutil.copy2(spBuild+'/dictDecl.txt', spDestGL)


class Entree:
    def __init__ (self, sLine):
        self.lemma = ''
        self.flags = ''
        # champs morphologiques Hunspell
1409
1410
1411
1412
1413
1414
1415
1416

1417
1418

1419
1420
1421
1422
1423
1424
1425




1426
1427

1428
1429
1430
1431
1432
1433
1434
1412
1413
1414
1415
1416
1417
1418

1419
1420

1421
1422
1423
1424




1425
1426
1427
1428
1429

1430
1431
1432
1433
1434
1435
1436
1437







-
+

-
+



-
-
-
-
+
+
+
+

-
+







            for e in self.dFlexions.items():
                hDst.write("{} - {}\n".format(e[0], e[1]))


def createThesaurusPackage (spBuild, sVersion, spCopy="", spDataDestGL=""):
    print(" * Création du thésaurus")
    spThesaurus = spBuild+"/thesaurus-v"+sVersion
    dir_util.mkpath(spThesaurus)
    createFolder(spThesaurus)
    thes_build.build("thesaurus/thes_fr.dat", "thesaurus/synsets_fr.dat", spThesaurus)
    file_util.copy_file('thesaurus/README_thes_fr.txt', spThesaurus)
    shutil.copy2('thesaurus/README_thes_fr.txt', spThesaurus)
    if spCopy:
        # copy in libreoffice extension package
        print("   Copie du thésaurus dans:", spCopy)
        file_util.copy_file(spThesaurus+'/thes_fr.dat', spCopy)
        file_util.copy_file(spThesaurus+'/thes_fr.idx', spCopy)
        file_util.copy_file(spThesaurus+'/README_thes_fr.txt', spCopy)
    if spModulesDestGL:
        shutil.copy2(spThesaurus+'/thes_fr.dat', spCopy)
        shutil.copy2(spThesaurus+'/thes_fr.idx', spCopy)
        shutil.copy2(spThesaurus+'/README_thes_fr.txt', spCopy)
    if spDataDestGL:
        # copy in data source folder of Grammalecte
        file_util.copy_file(spThesaurus+'/thes_fr.json', spDataDestGL)
        shutil.copy2(spThesaurus+'/thes_fr.json', spDataDestGL)


def main ():
    xParser = argparse.ArgumentParser()
    xParser.add_argument("-v", "--verdic", help="set dictionary version, i.e. 5.4", type=str, default="X.Y.z")
    xParser.add_argument("-m", "--mode", help="0: no tags,  1: Hunspell tags (default),  2: All tags", type=int, choices=[0, 1, 2], default=1)
    xParser.add_argument("-u", "--uncompress", help="do not use Hunspell compression", action="store_true")
1444
1445
1446
1447
1448
1449
1450
1451

1452
1453
1454
1455
1456
1457
1458
1447
1448
1449
1450
1451
1452
1453

1454
1455
1456
1457
1458
1459
1460
1461







-
+







    echo("Version: " + xArgs.verdic)
    echo("Simplify: " + str(xArgs.simplify))
    echo("Mode: " + str(xArgs.mode))
    echo("Compression: " + str(not(xArgs.uncompress)))

    ### création du répertoire
    spBuild = BUILD_PATH + '/' + xArgs.verdic
    dir_util.mkpath(spBuild)
    createFolder(spBuild)

    ### Lecture des fichiers et création du dictionnaire
    oFrenchDict = Dictionnaire(xArgs.verdic, "French dictionary")
    for sFile in ['orthographe/FRANCAIS.dic']:
        oFrenchDict.readDictionary(sFile)
    oFrenchDict.readAffixes('orthographe/FRANCAIS_7.aff')

1476
1477
1478
1479
1480
1481
1482
1483

1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496


1497
1498
1499
1500
1501
1502
1503
1479
1480
1481
1482
1483
1484
1485

1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497


1498
1499
1500
1501
1502
1503
1504
1505
1506







-
+











-
-
+
+







    oStatsLex.addLexFromFile('lexique/corpus_data/stats_litterature.txt', 'L', 'Littérature')
    oStatsLex.write(spBuild+'/test_lex.txt')
    oFrenchDict.calculateStats(oStatsLex, spfStats)

    ### Écriture des paquets
    echo("Création des paquets...")

    nThesaurusVersion = 2.4
    sThesaurusVersion = "3.0"
    spLexiconDestGL = "../../../lexicons"  if xArgs.grammalecte  else ""
    spLibreOfficeExtDestGL = "../oxt/Dictionnaires/dictionaries"  if xArgs.grammalecte  else ""
    spMozillaExtDestGL = ""  if xArgs.grammalecte  else "" # no more Hunspell dictionaries in Mozilla extensions for now
    spDataDestGL = "../data"  if xArgs.grammalecte  else ""

    ### dictionnaires
    if not xArgs.uncompress:
        oFrenchDict.defineAbreviatedTags(xArgs.mode, spfStats)
    oFrenchDict.createFiles(spBuild, [dTOUTESVAR, dCLASSIQUE, dREFORME1990], xArgs.mode, xArgs.simplify)
    oFrenchDict.createLexiconPackages(spBuild, xArgs.verdic, oStatsLex, spLexiconDestGL)
    oFrenchDict.createFileIfqForDB(spBuild)
    createThesaurusPackage(spBuild, nThesaurusVersion, spLibreOfficeExtDestGL, spDataDestGL)
    oFrenchDict.createLibreOfficeExtension(spBuild, dMOZEXT, [dTOUTESVAR, dCLASSIQUE, dREFORME1990], nThesaurusVersion, spLibreOfficeExtDestGL)
    createThesaurusPackage(spBuild, sThesaurusVersion, spLibreOfficeExtDestGL, spDataDestGL)
    oFrenchDict.createLibreOfficeExtension(spBuild, dMOZEXT, [dTOUTESVAR, dCLASSIQUE, dREFORME1990], sThesaurusVersion, spLibreOfficeExtDestGL)
    oFrenchDict.createMozillaExtensions(spBuild, dMOZEXT, [dTOUTESVAR, dCLASSIQUE, dREFORME1990], spMozillaExtDestGL)
    oFrenchDict.createDictConj(spBuild, spDataDestGL)
    oFrenchDict.createDictDecl(spBuild, spDataDestGL)


if __name__ == '__main__':
    main()