Grammalecte  Diff

Differences From Artifact [642a6504a5]:

To Artifact [877bd56310]:


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#!python3

__author__ = "Olivier R."
__license__ = "MPL 2"



import os
import sys
import time
import re
import collections
import zipfile
import math
import argparse
from enum import Enum










<







1
2
3
4
5
6
7
8
9

10
11
12
13
14
15
16
#!python3

__author__ = "Olivier R."
__license__ = "MPL 2"



import os
import sys

import re
import collections
import zipfile
import math
import argparse
from enum import Enum

41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

# Les dictionnaires
dSUBDIC = { '*': 'Commun',
            'R': 'Réforme1990',
            'M': 'Moderne',
            'C': 'Classique',
            'A': 'Annexe',
            'P': 'Multimots',
            'X': 'Contributeurs' }

dMODERNE = { 'name': 'DICTIONNAIRE ORTHOGRAPHIQUE FRANÇAIS “MODERNE”',
             'shortname': '“Moderne”',
             'asciiName': 'fr-moderne',
             'mozAsciiName': 'fr-FR-modern',
             'subDicts': '*MX',
             'mozId': 'fr-dicollecte-moderne',
             'description': "Dictionnaire français “Moderne”" }

dCLASSIQUE = { 'name': 'DICTIONNAIRE ORTHOGRAPHIQUE FRANÇAIS “CLASSIQUE”',
               'shortname': '“Classique”',
               'asciiName': 'fr-classique',
               'mozAsciiName': 'fr-FR-classic',
               'subDicts': '*MCX',
               'mozId': 'fr-dicollecte-classique',
               'description': "Dictionnaire français “Classique”" }







<


<
<
<
<
<
<
<
<







40
41
42
43
44
45
46

47
48








49
50
51
52
53
54
55

# Les dictionnaires
dSUBDIC = { '*': 'Commun',
            'R': 'Réforme1990',
            'M': 'Moderne',
            'C': 'Classique',
            'A': 'Annexe',

            'X': 'Contributeurs' }









dCLASSIQUE = { 'name': 'DICTIONNAIRE ORTHOGRAPHIQUE FRANÇAIS “CLASSIQUE”',
               'shortname': '“Classique”',
               'asciiName': 'fr-classique',
               'mozAsciiName': 'fr-FR-classic',
               'subDicts': '*MCX',
               'mozId': 'fr-dicollecte-classique',
               'description': "Dictionnaire français “Classique”" }
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
            'mozId': 'fr-dicollecte',
            'description': "Dictionnaire orthographique de la langue française" }


BUILD_PATH = '_build'
PREFIX_DICT_PATH = 'hunspell-french-dictionaries-v'
EXT_PREFIX_OOO = 'lo-oo-ressources-linguistiques-fr-v'
EXT_PREFIX_MOZ = 'moz-hunspell-fr-dicollecte-v'
LEX_PREFIX = 'lexique-dicollecte-fr-v'
STATS_NAME = 'statistiques-v'

MPLHEADER = "# This Source Code Form is subject to the terms of the Mozilla Public\n" + \
            "# License, v. 2.0. If a copy of the MPL was not distributed with this\n" + \
            "# file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n"









|
|







74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
            'mozId': 'fr-dicollecte',
            'description': "Dictionnaire orthographique de la langue française" }


BUILD_PATH = '_build'
PREFIX_DICT_PATH = 'hunspell-french-dictionaries-v'
EXT_PREFIX_OOO = 'lo-oo-ressources-linguistiques-fr-v'
EXT_PREFIX_MOZ = 'moz-hunspell-fr-v'
LEX_PREFIX = 'lexique-grammalecte-fr-v'
STATS_NAME = 'statistiques-v'

MPLHEADER = "# This Source Code Form is subject to the terms of the Mozilla Public\n" + \
            "# License, v. 2.0. If a copy of the MPL was not distributed with this\n" + \
            "# file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n"


311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
        "Écrire le fichier des affixes (.aff)"
        echo(' * Dictionnaire >> [ {}.aff ]'.format(dTplVars['asciiName']))
        info = "# This Source Code Form is subject to the terms of the Mozilla Public\n" + \
               "# License, v. 2.0. If a copy of the MPL was not distributed with this\n" + \
               "# file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n" + \
               "# AFFIXES DU {} v{}\n".format(dTplVars['name'], self.sVersion) + \
               "# par Olivier R. -- licence MPL 2.0\n" + \
               "# Généré le " + time.strftime("%d-%m-%Y à %H:%M") + "\n" \
               "# Pour améliorer le dictionnaire, allez sur https://grammalecte.net/\n\n"

        with open(spDst+'/'+dTplVars['asciiName']+'.aff', 'w', encoding='utf-8', newline="\n") as hDst:
            hDst.write(info)
            hDst.write(self.sSettings + "\n")
            if self.bShortenTags:
                hDst.write("AM {}\n".format(len(self.dAM)))







<







301
302
303
304
305
306
307

308
309
310
311
312
313
314
        "Écrire le fichier des affixes (.aff)"
        echo(' * Dictionnaire >> [ {}.aff ]'.format(dTplVars['asciiName']))
        info = "# This Source Code Form is subject to the terms of the Mozilla Public\n" + \
               "# License, v. 2.0. If a copy of the MPL was not distributed with this\n" + \
               "# file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n" + \
               "# AFFIXES DU {} v{}\n".format(dTplVars['name'], self.sVersion) + \
               "# par Olivier R. -- licence MPL 2.0\n" + \

               "# Pour améliorer le dictionnaire, allez sur https://grammalecte.net/\n\n"

        with open(spDst+'/'+dTplVars['asciiName']+'.aff', 'w', encoding='utf-8', newline="\n") as hDst:
            hDst.write(info)
            hDst.write(self.sSettings + "\n")
            if self.bShortenTags:
                hDst.write("AM {}\n".format(len(self.dAM)))
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
                    hDst.write(str(dRefW[key]))
                    hDst.write("\n")

    def writeLexicon (self, spfDst, version, oStatsLex):
        echo(' * Lexique >> [ {} ] '.format(spfDst))
        with open(spfDst, 'w', encoding='utf-8', newline="\n") as hDst:
            hDst.write(MPLHEADER)
            hDst.write("# Lexique des formes fléchies du français - Dicollecte v{}\n# Licence : MPL v2.0\n\n".format(version))
            hDst.write(oStatsLex.getInfo())
            hDst.write(Flexion.header(oStatsLex))
            for oFlex in self.lFlexions:
                hDst.write(oFlex.__str__(oStatsLex))

    def writeGrammarCheckerLexicon (self, spfDst, version):
        echo(' * Lexique simplifié >> [ {} ] '.format(spfDst))







|







501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
                    hDst.write(str(dRefW[key]))
                    hDst.write("\n")

    def writeLexicon (self, spfDst, version, oStatsLex):
        echo(' * Lexique >> [ {} ] '.format(spfDst))
        with open(spfDst, 'w', encoding='utf-8', newline="\n") as hDst:
            hDst.write(MPLHEADER)
            hDst.write("# Lexique des formes fléchies du français - Grammalecte v{}\n# Licence : MPL v2.0\n\n".format(version))
            hDst.write(oStatsLex.getInfo())
            hDst.write(Flexion.header(oStatsLex))
            for oFlex in self.lFlexions:
                hDst.write(oFlex.__str__(oStatsLex))

    def writeGrammarCheckerLexicon (self, spfDst, version):
        echo(' * Lexique simplifié >> [ {} ] '.format(spfDst))
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
    def createMozillaExtensions (self, spBuild, dTplVars, lDictVars, spDestGL=""):
        # Mozilla extension 1
        echo(" * Dictionnaire >> extension pour Mozilla")
        dTplVars['version'] = self.sVersion
        sExtensionName = EXT_PREFIX_MOZ + self.sVersion
        spExt = spBuild + '/' + sExtensionName
        dir_util.mkpath(spExt+'/dictionaries')
        copyTemplate('_templates/moz', spExt, 'install.rdf', dTplVars)
        spDict = spBuild + '/' + PREFIX_DICT_PATH + self.sVersion
        file_util.copy_file(spDict+'/fr-classique.dic', spExt+'/dictionaries/fr-classic.dic')
        file_util.copy_file(spDict+'/fr-classique.aff', spExt+'/dictionaries/fr-classic.aff')
        copyTemplate('orthographe', spExt, 'README_dict_fr.txt', dTplVars)
        createZipFiles(spExt, spBuild, sExtensionName + '.xpi')
        # Grammalecte
        if spDestGL:







|







575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
    def createMozillaExtensions (self, spBuild, dTplVars, lDictVars, spDestGL=""):
        # Mozilla extension 1
        echo(" * Dictionnaire >> extension pour Mozilla")
        dTplVars['version'] = self.sVersion
        sExtensionName = EXT_PREFIX_MOZ + self.sVersion
        spExt = spBuild + '/' + sExtensionName
        dir_util.mkpath(spExt+'/dictionaries')
        copyTemplate('_templates/moz', spExt, 'manifest.json', dTplVars)
        spDict = spBuild + '/' + PREFIX_DICT_PATH + self.sVersion
        file_util.copy_file(spDict+'/fr-classique.dic', spExt+'/dictionaries/fr-classic.dic')
        file_util.copy_file(spDict+'/fr-classique.aff', spExt+'/dictionaries/fr-classic.aff')
        copyTemplate('orthographe', spExt, 'README_dict_fr.txt', dTplVars)
        createZipFiles(spExt, spBuild, sExtensionName + '.xpi')
        # Grammalecte
        if spDestGL:
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
                    hDiff.write("{0.iD}\t{0.fq}\n".format(oEntry))
                    hNotes.write("{0.lemma}/{0.flags}\t{0.oldFq} > {0.fq}\n".format(oEntry))

    def createLexiconPackages (self, spBuild, version, oStatsLex, spDestGL=""):
        sLexName = LEX_PREFIX + version
        spLex = spBuild + '/' + sLexName
        dir_util.mkpath(spLex)
        # write Dicollecte lexicon
        self.sortLexiconByFreq()
        self.writeLexicon(spLex + '/' + sLexName + '.txt', version, oStatsLex)
        self.writeGrammarCheckerLexicon(spBuild + '/' + sLexName + '.lex', version)
        copyTemplate('lexique', spLex, 'README_lexique.txt', {'version': version})
        # zip
        createZipFiles(spLex, spBuild, sLexName + '.zip')
        # copy GC lexicon to Grammalecte







|







601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
                    hDiff.write("{0.iD}\t{0.fq}\n".format(oEntry))
                    hNotes.write("{0.lemma}/{0.flags}\t{0.oldFq} > {0.fq}\n".format(oEntry))

    def createLexiconPackages (self, spBuild, version, oStatsLex, spDestGL=""):
        sLexName = LEX_PREFIX + version
        spLex = spBuild + '/' + sLexName
        dir_util.mkpath(spLex)
        # write lexicon
        self.sortLexiconByFreq()
        self.writeLexicon(spLex + '/' + sLexName + '.txt', version, oStatsLex)
        self.writeGrammarCheckerLexicon(spBuild + '/' + sLexName + '.lex', version)
        copyTemplate('lexique', spLex, 'README_lexique.txt', {'version': version})
        # zip
        createZipFiles(spLex, spBuild, sLexName + '.zip')
        # copy GC lexicon to Grammalecte
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
        with open(spBuild+'/dictDecl.txt', 'w', encoding='utf-8', newline="\n") as hDst:
            for oEntry in self.lEntry:
                if re.match("[SXFWIA]", oEntry.flags) and (oEntry.po.startswith("nom") or oEntry.po.startswith("adj")):
                    hDst.write(oEntry.getDeclination())
        if spDestGL:
            echo("   Fichier de déclinaison copié dans Grammalecte...")
            file_util.copy_file(spBuild+'/dictDecl.txt', spDestGL)

    def generateSpellVariants (self, nReq, spBuild):
        if nReq < 1: nReq = 1
        if nReq > 2: nReq = 2
        echo(" * Lexique >> variantes par suppression... n = " + str(nReq))
        with open(spBuild+'/dictSpellVariants-'+str(nReq)+'.txt', 'w', encoding='utf-8', newline="\n") as hDst:
            for oFlex in frozenset(self.lFlexions):
                hDst.write(oFlex.sFlexion+"\t_\t_\n")
                if len(oFlex.sFlexion) <= 2:
                    n = 0
                elif len(oFlex.sFlexion) <= 5:
                    n = 1
                else:
                    n = nReq
                #lTup = self._generatePhonetVariants(oFlex.sFlexion)
                lTup = self._generateDeleteVariants(oFlex.sFlexion, oFlex.sFlexion, n)
                for t in lTup:
                    sTag = t[1]  if "\t" in t[1]  else t[1]+"\t_"
                    hDst.write(t[0]+"\t"+sTag+"\n")

    _lTupPhonet = [ ("ph", "f"), ("qu", "k"), ("ss", "c"), ("ss", "ç"), ("ct", "x"),
        ("oe", "œ"), ("ae", "æ"), ("ei", "é"), ("ai", "é"), ("au", "o"), ("eau", "o"),
    ]

    def _generatePhonetVariants (self, s):
        l = []
        for torep, rep in self._lTupPhonet():
            for m in torep.finditer(s):
                l.append( (s[:m.start(0)] + rep + s[m.end(0):], str(m.start(0))+":"+str(m.start(0)+len(rep))+">"+torep) )
        return l

    def _generateDeleteVariants (self, sWord0, sWordCur, n):
        "renvoie une liste de tuples : (forme dégradée de sWord, code de genèse de sWord)"
        # caution: recursive function
        if n == 0:
            return []
        lTup = []
        for i in range(len(sWordCur)):
            sNew = sWordCur[0:i]+sWordCur[i+1:]
            lTup.append( ( sNew, self._generateAddCode(sWord0, sNew) ) )
            lTup += self._generateDeleteVariants(sWord0, sNew, n-1)
        return lTup

    def _generateAddCode (self, sWord, sCrippled):
        "returns addCode to generate sWord from sCrippled"
        sAdd = ""
        for i in range(len(sWord)):
            if sWord[i] != sCrippled[i:i+1]:
                sCrippled = sCrippled[:i] + sWord[i] + sCrippled[i:]
                if sAdd:
                    sAdd += "\t"
                sAdd += str(i)+"+"+sWord[i]
        return sAdd  if sAdd  else "0"



class Entree:
    def __init__ (self, sLine):
        self.lemma = ''
        self.flags = ''
        # champs morphologiques Hunspell







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







632
633
634
635
636
637
638






















































639
640
641
642
643
644
645
        with open(spBuild+'/dictDecl.txt', 'w', encoding='utf-8', newline="\n") as hDst:
            for oEntry in self.lEntry:
                if re.match("[SXFWIA]", oEntry.flags) and (oEntry.po.startswith("nom") or oEntry.po.startswith("adj")):
                    hDst.write(oEntry.getDeclination())
        if spDestGL:
            echo("   Fichier de déclinaison copié dans Grammalecte...")
            file_util.copy_file(spBuild+'/dictDecl.txt', spDestGL)
























































class Entree:
    def __init__ (self, sLine):
        self.lemma = ''
        self.flags = ''
        # champs morphologiques Hunspell
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
            sErr += 'lemme vide'
        if re.match(r"^\s", self.lemma):
            sErr += 'premier caractère un espace dans <' + self.lemma + '>'
        if re.search(r"\s$", self.lemma):
            sErr += 'espace en fin de lemme'
        if re.match(r"v[0123]", self.po) and not re.match(r"[eas_][ix_][tx_][nx_][pqreuvx_][mx_][ex_z][ax_z]\b", self.po[2:]):
            sErr += 'verbe inconnu: ' + self.po
        if (re.match(r"S[*.]", self.flags) and re.search("[sxz]$", self.lemma)) or (re.match(r"X[*.]", self.flags) and not re.search("[ul]$", self.lemma)):
            sErr += 'drapeau inutile'
        if self.iz == '' and re.match(r"[SXAI](?!=)", self.flags) and self.po:
            sErr += '[is] vide'
        if re.match(r"pl|sg|inv", self.iz):
            sErr += '[is] incomplet'
        if re.match(r"[FW]", self.flags) and re.search(r"epi|mas|fem|inv|sg|pl", self.iz):
            sErr += '[is] incohérent'
        if re.match(r".\*", self.flags) and re.match(r"[bcdfgjklmnpqrstvwxz]", self.lemma):
            sErr += 'drapeau pour lemme commençant par une voyelle'
        if re.search(r"pl|sg|inv", self.iz) and re.match(r"[SXAIFW](?!=)", self.flags):
            sErr += '[is] incohérent'
        if re.search(r"nom|adj", self.po) and re.match(r"(?i)[aâàäáeéèêëiîïíìoôöóòuûüúù]", self.lemma) and re.match("[SFWXAI][.]", self.flags) \
           and "pel" not in self.lx:
            sErr += 'le drapeau derait finir avec *'
        if self.iz.endswith(("mas", "fem", "epi")) and (not self.flags or not self.flags.startswith(("S", "X", "F", "W", "A", "I", "U"))):
            sErr += '[is] incomplet'
        if self.flags.startswith(("a", "b", "c", "d")) and not self.lemma.endswith("er"):
            sErr += "drapeau pour verbe du 1ᵉʳ groupe sur un lemme non conforme"
        if self.flags.startswith("f") and not self.lemma.endswith(("ir", "ïr")):
            sErr += "drapeau pour verbe du 2ᵉ groupe sur un lemme non conforme"
        if sErr:
            echo('   error -  id: ' + self.iD, end = "")
            echo('  ' + sErr + '  in  ' + self.__str__())








|







<
<


<
<
<


|







743
744
745
746
747
748
749
750
751
752
753
754
755
756
757


758
759



760
761
762
763
764
765
766
767
768
769
            sErr += 'lemme vide'
        if re.match(r"^\s", self.lemma):
            sErr += 'premier caractère un espace dans <' + self.lemma + '>'
        if re.search(r"\s$", self.lemma):
            sErr += 'espace en fin de lemme'
        if re.match(r"v[0123]", self.po) and not re.match(r"[eas_][ix_][tx_][nx_][pqreuvx_][mx_][ex_z][ax_z]\b", self.po[2:]):
            sErr += 'verbe inconnu: ' + self.po
        if (re.match(r"S[.]", self.flags) and re.search("[sxz]$", self.lemma)) or (re.match(r"X[.]", self.flags) and not re.search("[ul]$", self.lemma)):
            sErr += 'drapeau inutile'
        if self.iz == '' and re.match(r"[SXAI](?!=)", self.flags) and self.po:
            sErr += '[is] vide'
        if re.match(r"pl|sg|inv", self.iz):
            sErr += '[is] incomplet'
        if re.match(r"[FW]", self.flags) and re.search(r"epi|mas|fem|inv|sg|pl", self.iz):
            sErr += '[is] incohérent'


        if re.search(r"pl|sg|inv", self.iz) and re.match(r"[SXAIFW](?!=)", self.flags):
            sErr += '[is] incohérent'



        if self.iz.endswith(("mas", "fem", "epi")) and (not self.flags or not self.flags.startswith(("S", "X", "F", "W", "A", "I", "U"))):
            sErr += '[is] incomplet'
        if self.flags.startswith(("a0", "b0", "c0", "d0")) and not self.lemma.endswith("er"):
            sErr += "drapeau pour verbe du 1ᵉʳ groupe sur un lemme non conforme"
        if self.flags.startswith("f") and not self.lemma.endswith(("ir", "ïr")):
            sErr += "drapeau pour verbe du 2ᵉ groupe sur un lemme non conforme"
        if sErr:
            echo('   error -  id: ' + self.iD, end = "")
            echo('  ' + sErr + '  in  ' + self.__str__())

938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
            morph = self.lexMorph()
        lFlexions = [(self.lemma, morph, self.di)]  if iPR == 0 and not self.flags.endswith('()')  else []
        lFlexPrefix = []
        lFlexSuffix = []
        for sFlag in makeLongFlags(self.flags):
            if sFlag not in dFlags:
                if sFlag not in ['**', '()', '||', '--']:
                    lFlexions.append( (self.lemma, '[unknown flag: {}]'.format(sFlag)) )
                    echo("ERROR: "  + self.lemma + ' - unknown flag: ' + sFlag)
            else:
                oFlag = dFlags[sFlag]
                if not oFlag.bSfx:
                    # cas des préfixes
                    for oRule in oFlag.lRules:
                        if oRule.motif.search(self.lemma):







|







868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
            morph = self.lexMorph()
        lFlexions = [(self.lemma, morph, self.di)]  if iPR == 0 and not self.flags.endswith('()')  else []
        lFlexPrefix = []
        lFlexSuffix = []
        for sFlag in makeLongFlags(self.flags):
            if sFlag not in dFlags:
                if sFlag not in ['**', '()', '||', '--']:
                    lFlexions.append( (self.lemma, '[unknown flag: {}]'.format(sFlag), self.di) )
                    echo("ERROR: "  + self.lemma + ' - unknown flag: ' + sFlag)
            else:
                oFlag = dFlags[sFlag]
                if not oFlag.bSfx:
                    # cas des préfixes
                    for oRule in oFlag.lRules:
                        if oRule.motif.search(self.lemma):
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223

    def getGrammarCheckerRepr (self):
        return "{0.sFlexion}\t{0.oEntry.lemma}\t{1}\n".format(self, self._getSimpleTags())

    _dTagReplacement = {
        # POS
        "nom": ":N", "adj": ":A", "adv": ":W", "negadv": ":X", "mg": ":G", "nb": ":B", "nbro": ":Br",
        "loc.nom": ":Ñ", "loc.adj": ":Â", "loc.adv": ":Ŵ", "loc.verb": ":",
        "interj": ":J", "loc.interj": ":Ĵ", "titr": ":T",
        "mas": ":m", "fem": ":f", "epi": ":e", "sg": ":s", "pl": ":p", "inv": ":i",
        "infi": ":Y",
        "ppre": ":P", "ppas": ":Q",
        "ipre": ":Ip", "iimp": ":Iq", "ipsi": ":Is", "ifut": ":If",
        "spre": ":Sp", "simp": ":Sq", "cond": ":K", "impe": ":E",
        "1sg": ":1s", "1isg": ":1ś", "1jsg": ":1ŝ", "2sg": ":2s", "3sg": ":3s", "1pl": ":1p", "2pl": ":2p", "3pl": ":3p", "3pl!": ":3p!",
        "prepv": ":Rv", "prep": ":R", "loc.prep": ":Ŕ", "loc.prepv": "Ŕv",
        "detpos": ":Dp", "detdem": ":Dd", "detind": ":Di", "detneg": ":Dn", "detex": ":De", "det": ":D",
        "advint": ":U",
        "prodem": ":Od", "proind": ":Oi", "proint": ":Ot", "proneg": ":On", "prorel": ":Or", "proadv": ":Ow",
        "properobj": ":Oo", "propersuj": ":Os", "1pe": ":O1", "2pe": ":O2", "3pe": ":O3", "preverb": ":Ov",
        "cjco": ":Cc", "cjsub": ":Cs", "cj": ":C", "loc.cj": ":Ĉ", "loc.cjsub": ":Ĉs",
        "prn": ":M1", "patr": ":M2", "loc.patr": ":2", "npr": ":MP", "nompr": ":NM",
        "pfx": ":Zp", "sfx": ":Zs",
        "div": ":H",
        "err": ":F",
        "ponc": ":@p", "sign": ":@s",
        # LEX
        "symb": ";S"
    }

    def _getSimpleTags (self):
        s = ""
        # POS
        for sTag in self.sMorph.split():
            if sTag.startswith("v"):







|
|






|




|
|





|







1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153

    def getGrammarCheckerRepr (self):
        return "{0.sFlexion}\t{0.oEntry.lemma}\t{1}\n".format(self, self._getSimpleTags())

    _dTagReplacement = {
        # POS
        "nom": ":N", "adj": ":A", "adv": ":W", "negadv": ":X", "mg": ":G", "nb": ":B", "nbro": ":Br",
        "loc.nom": ":ÉN", "loc.adj": ":ÉA", "loc.adv": ":ÉW", "loc.verb": ":ÉV",
        "interj": ":J", "loc.interj": ":ÉJ", "titr": ":T",
        "mas": ":m", "fem": ":f", "epi": ":e", "sg": ":s", "pl": ":p", "inv": ":i",
        "infi": ":Y",
        "ppre": ":P", "ppas": ":Q",
        "ipre": ":Ip", "iimp": ":Iq", "ipsi": ":Is", "ifut": ":If",
        "spre": ":Sp", "simp": ":Sq", "cond": ":K", "impe": ":E",
        "1sg": ":1s", "1isg": ":1ś", "1jsg": ":1ŝ", "2sg": ":2s", "3sg": ":3s", "1pl": ":1p", "2pl": ":2p", "3pl": ":3p", "3pl!": ":3p!",
        "prepv": ":Rv", "prep": ":R", "loc.prep": ":ÉR", "loc.prepv": "ÉRv",
        "detpos": ":Dp", "detdem": ":Dd", "detind": ":Di", "detneg": ":Dn", "detex": ":De", "det": ":D",
        "advint": ":U",
        "prodem": ":Od", "proind": ":Oi", "proint": ":Ot", "proneg": ":On", "prorel": ":Or", "proadv": ":Ow",
        "properobj": ":Oo", "propersuj": ":Os", "1pe": ":O1", "2pe": ":O2", "3pe": ":O3", "preverb": ":Ov",
        "cjco": ":Cc", "cjsub": ":Cs", "cj": ":C", "loc.cj": ":ÉC", "loc.cjsub": ":ÉCs",
        "prn": ":M1", "patr": ":M2", "loc.patr": ":ÉM2", "npr": ":MP", "nompr": ":NM",
        "pfx": ":Zp", "sfx": ":Zs",
        "div": ":H",
        "err": ":F",
        "ponc": ":@p", "sign": ":@s",
        # LEX
        "symb": ";S", "unit": ";U"
    }

    def _getSimpleTags (self):
        s = ""
        # POS
        for sTag in self.sMorph.split():
            if sTag.startswith("v"):
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519

def main ():
    xParser = argparse.ArgumentParser()
    xParser.add_argument("-v", "--verdic", help="set dictionary version, i.e. 5.4", type=str, default="X.Y.z")
    xParser.add_argument("-m", "--mode", help="0: no tags,  1: Hunspell tags (default),  2: All tags", type=int, choices=[0, 1, 2], default=1)
    xParser.add_argument("-u", "--uncompress", help="do not use Hunspell compression", action="store_true")
    xParser.add_argument("-s", "--simplify", help="no virtual lemmas", action="store_true")
    xParser.add_argument("-sv", "--spellvariants", help="generate spell variants", action="store_true")
    xParser.add_argument("-gl", "--grammalecte", help="copy generated files to Grammalecte folders", action="store_true")
    xArgs = xParser.parse_args()

    if xArgs.simplify:
        xArgs.mode = 0
        xArgs.uncompress = True








<







1435
1436
1437
1438
1439
1440
1441

1442
1443
1444
1445
1446
1447
1448

def main ():
    xParser = argparse.ArgumentParser()
    xParser.add_argument("-v", "--verdic", help="set dictionary version, i.e. 5.4", type=str, default="X.Y.z")
    xParser.add_argument("-m", "--mode", help="0: no tags,  1: Hunspell tags (default),  2: All tags", type=int, choices=[0, 1, 2], default=1)
    xParser.add_argument("-u", "--uncompress", help="do not use Hunspell compression", action="store_true")
    xParser.add_argument("-s", "--simplify", help="no virtual lemmas", action="store_true")

    xParser.add_argument("-gl", "--grammalecte", help="copy generated files to Grammalecte folders", action="store_true")
    xArgs = xParser.parse_args()

    if xArgs.simplify:
        xArgs.mode = 0
        xArgs.uncompress = True

1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
    spBuild = BUILD_PATH + '/' + xArgs.verdic
    dir_util.mkpath(spBuild)

    ### Lecture des fichiers et création du dictionnaire
    oFrenchDict = Dictionnaire(xArgs.verdic, "French dictionary")
    for sFile in ['orthographe/FRANCAIS.dic']:
        oFrenchDict.readDictionary(sFile)
    oFrenchDict.readAffixes('orthographe/FRANCAIS_5.aff')

    ### Contrôle
    oFrenchDict.sortEntriesNatural()
    oFrenchDict.checkEntries()

    ### Lexique
    oFrenchDict.generateFlexions()
    oFrenchDict.calcMetagraphe()
    oFrenchDict.calcMetaphone2()

    #oFrenchDict.createNgrams(spBuild, 3)
    if xArgs.spellvariants:
        oFrenchDict.generateSpellVariants(1, spBuild)

    ### Statistiques
    spfStats = spBuild+'/'+STATS_NAME+xArgs.verdic+'.txt'
    oStatsLex = StatsLex(oFrenchDict)
    oStatsLex.addLexFromFile('lexique/corpus_data/stats_google_ngram_1.txt', 'G', 'Google 1-grams')
    oStatsLex.addLexFromFile('lexique/corpus_data/stats_frwiki.txt', 'W', 'Wikipédia')
    oStatsLex.addLexFromFile('lexique/corpus_data/stats_frwikisource.txt', 'S', 'Wikisource')







|











<
<







1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474


1475
1476
1477
1478
1479
1480
1481
    spBuild = BUILD_PATH + '/' + xArgs.verdic
    dir_util.mkpath(spBuild)

    ### Lecture des fichiers et création du dictionnaire
    oFrenchDict = Dictionnaire(xArgs.verdic, "French dictionary")
    for sFile in ['orthographe/FRANCAIS.dic']:
        oFrenchDict.readDictionary(sFile)
    oFrenchDict.readAffixes('orthographe/FRANCAIS_7.aff')

    ### Contrôle
    oFrenchDict.sortEntriesNatural()
    oFrenchDict.checkEntries()

    ### Lexique
    oFrenchDict.generateFlexions()
    oFrenchDict.calcMetagraphe()
    oFrenchDict.calcMetaphone2()

    #oFrenchDict.createNgrams(spBuild, 3)



    ### Statistiques
    spfStats = spBuild+'/'+STATS_NAME+xArgs.verdic+'.txt'
    oStatsLex = StatsLex(oFrenchDict)
    oStatsLex.addLexFromFile('lexique/corpus_data/stats_google_ngram_1.txt', 'G', 'Google 1-grams')
    oStatsLex.addLexFromFile('lexique/corpus_data/stats_frwiki.txt', 'W', 'Wikipédia')
    oStatsLex.addLexFromFile('lexique/corpus_data/stats_frwikisource.txt', 'S', 'Wikisource')