Grammalecte  Diff

Differences From Artifact [4630ffd3ad]:

To Artifact [1dab8fa65c]:


294
295
296
297
298
299
300
301

302
303
304
305
306
307


308
309
310
311
312
313
314
294
295
296
297
298
299
300

301
302
303
304
305


306
307
308
309
310
311
312
313
314







-
+




-
-
+
+







                hDst.write("  > {0[1]:>8} : {0[0]}\n".format(elem))

    def writeDictionary (self, spDst, dTplVars, nMode, bSimplified):
        "Écrire le fichier dictionnaire (.dic)"
        echo(' * Dictionnaire >> [ {}.dic ] ({})'.format(dTplVars['asciiName'], dTplVars['subDicts']))
        nEntry = 0
        for oEntry in self.lEntry:
            if oEntry.di in dTplVars['subDicts']:
            if oEntry.di in dTplVars['subDicts'] and " " not in oEntry.lemma:
                nEntry += 1
        with open(spDst+'/'+dTplVars['asciiName']+'.dic', 'w', encoding='utf-8', newline="\n") as hDst:
            hDst.write(str(nEntry)+"\n")
            for oEntry in self.lEntry:
                if oEntry.di in dTplVars['subDicts']:
                    hDst.write(oEntry.getEntryLine(self, nMode, bSimplified))
                if oEntry.di in dTplVars['subDicts'] and " " not in oEntry.lemma:
                    hDst.write(oEntry.getHunspellLine(self, nMode, bSimplified))

    def writeAffixes (self, spDst, dTplVars, nMode, bSimplified):
        "Écrire le fichier des affixes (.aff)"
        echo(' * Dictionnaire >> [ {}.aff ]'.format(dTplVars['asciiName']))
        info = "# This Source Code Form is subject to the terms of the Mozilla Public\n" + \
               "# License, v. 2.0. If a copy of the MPL was not distributed with this\n" + \
               "# file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n" + \
732
733
734
735
736
737
738
739

740
741
742
743
744
745
746
747
748
749
750
751

752
753
754
755
756
757
758
732
733
734
735
736
737
738

739
740
741
742
743
744
745
746
747
748
749
750

751
752
753
754
755
756
757
758







-
+











-
+







        self.iD = '0'

        # autres
        self.comment = ''
        self.err = ''
        self.nFlexions = 0
        self.lFlexions = []
        self.sRadical = ''
        self.sStem = ''
        self.nOccur = 0
        self.nAKO = -1   # Average known occurrences
        self.fFreq = 0
        self.oldFq = ''

        sLine = sLine.rstrip(" \n")
        # commentaire
        if '#' in sLine:
            sLine, comment = sLine.split('#', 1)
            self.comment = comment.strip()
        # éléments de la ligne
        elems = sLine.split()
        elems = sLine.split("\t")
        nElems = len(elems)
        # lemme et drapeaux
        firstElems = elems[0].split('/')
        self.lemma = firstElems[0]
        self.flags = firstElems[1]  if len(firstElems) > 1  else ''
        # morph
        for i in range(1, nElems):
814
815
816
817
818
819
820
821

822
823

824
825

826
827
828
829
830
831

832
833
834
835
836
837
838
814
815
816
817
818
819
820

821
822

823
824

825
826
827
828
829
830

831
832
833
834
835
836
837
838







-
+

-
+

-
+





-
+







        if re.search(r"\s$", self.lemma):
            sErr += 'espace en fin de lemme'
        if re.match(r"v[0123]", self.po) and not re.match(r"[eas_][ix_][tx_][nx_][pqreuvx_][mx_][ex_z][ax_z]\b", self.po[2:]):
            sErr += 'verbe inconnu: ' + self.po
        if (re.match(r"S[*.]", self.flags) and re.search("[sxz]$", self.lemma)) or (re.match(r"X[*.]", self.flags) and not re.search("[ul]$", self.lemma)):
            sErr += 'drapeau inutile'
        if self.iz == '' and re.match(r"[SXAI](?!=)", self.flags) and self.po:
            sErr += '[is]'
            sErr += '[is] vide'
        if re.match(r"pl|sg|inv", self.iz):
            sErr += '[is]'
            sErr += '[is] incomplet'
        if re.match(r"[FW]", self.flags) and re.search(r"epi|mas|fem|inv|sg|pl", self.iz):
            sErr += '[is]'
            sErr += '[is] incohérent'
        if re.match(r"[FW]", self.flags) and re.search(r"[^eë]$", self.lemma):
            sErr += "fin de lemme inapproprié"
        if re.match(r".\*", self.flags) and re.match(r"[bcdfgjklmnpqrstvwxz]", self.lemma):
            sErr += 'drapeau pour lemme commençant par une voyelle'
        if re.search(r"pl|sg|inv", self.iz) and re.match(r"[SXAIFW](?!=)", self.flags):
            sErr += '[is]'
            sErr += '[is] incohérent'
        if re.search(r"nom|adj", self.po) and re.match(r"(?i)[aâàäáeéèêëiîïíìoôöóòuûüúù]", self.lemma) and re.match("[SFWXAI][.]", self.flags) \
           and "pel" not in self.lx:
            sErr += 'le drapeau derait finir avec *'
        if not self.flags and self.iz.endswith(("mas", "fem", "epi")):
            sErr += '[is] incomplet'
        if self.flags.startswith(("a", "b", "c", "d")) and not self.lemma.endswith("er"):
            sErr += "drapeau pour verbe du 1ᵉʳ groupe sur un lemme non conforme"
863
864
865
866
867
868
869
870
871


872
873
874
875
876
877
878
863
864
865
866
867
868
869


870
871
872
873
874
875
876
877
878







-
-
+
+








    def keyTriNat (self):
        return (self.lemma.translate(CHARMAP), self.flags, self.po)

    def keyTriNum (self):
        return (self.lemma, self.flags, self.po)

    def getEntryLine (self, oDict, nMode, bSimplified=False):
        sLine = self.lemma
    def getHunspellLine (self, oDict, nMode, bSimplified=False):
        sLine = self.lemma.replace("’", "'")
        if self.flags:
            sLine += '/'
            sLine += self.flags  if not oDict.bShortenTags or bSimplified  else oDict.dAF[self.flags]
        if bSimplified:
            return sLine.replace("()", "") + "\n"
        if nMode > 0:
            sMorph = self.getMorph(nMode)
922
923
924
925
926
927
928



929
930
931
932
933
934
935
936








937
938
939
940
941
942
943
922
923
924
925
926
927
928
929
930
931








932
933
934
935
936
937
938
939
940
941
942
943
944
945
946







+
+
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+







                if not sMorph.endswith((" mas", " fem", " epi")):
                    self.lFlexions.append( Flexion(self, sFlex, sMorph, sDic) )
                    self.nFlexions += 1
                else:
                    #echo(sFlex + " " + sMorph + ", ")
                    pass
        # Drapeaux dont le lemme féminin doit être remplacé par le masculin dans la gestion des formes fléchies
        if self.st:
            self.sStem = self.st
        else:
        if self.flags.startswith(("F.", "F*", "W.", "W*")):
            # recherche de la forme masculine
            for t in lTuples:
                sMorph = self.clean(t[1])
                if sMorph.endswith('mas') or sMorph.endswith('mas sg') or sMorph.endswith('mas inv'):
                    self.sRadical = t[0]
        else:
            self.sRadical = self.lemma
            if self.flags.startswith(("F.", "F*", "W.", "W*")):
                # recherche de la forme masculine
                for t in lTuples:
                    sMorph = self.clean(t[1])
                    if sMorph.endswith(('mas', 'mas sg', 'mas inv')):
                        self.sStem = t[0]
            else:
                self.sStem = self.lemma
        # Tag duplicates
        d = {}
        for oFlex in self.lFlexions:
            d[oFlex.sFlexion] = d.get(oFlex.sFlexion, 0) + 1
        for oFlex in self.lFlexions:
            oFlex.nDup = d[oFlex.sFlexion]

1188
1189
1190
1191
1192
1193
1194
1195

1196
1197
1198
1199
1200
1201
1202
1191
1192
1193
1194
1195
1196
1197

1198
1199
1200
1201
1202
1203
1204
1205







-
+







            sOccurs += t[1] + "\t"
        return "id\tFlexion\tLemme\tÉtiquettes\tMétagraphe (β)\tMetaphone2\tNotes\tSémantique\tÉtymologie\tSous-dictionnaire\t" + sOccurs + "Total occurrences\tDoublons\tMultiples\tFréquence\tIndice de fréquence\n"

    def __str__ (self, oStatsLex):
        sOccurs = ''
        for v in oStatsLex.dFlexions[self.sFlexion]:
            sOccurs += str(v) + "\t"
        return "{0.oEntry.iD}\t{0.sFlexion}\t{0.oEntry.sRadical}\t{0.sMorph}\t{0.metagfx}\t{0.metaph2}\t{0.oEntry.lx}\t{0.oEntry.se}\t{0.oEntry.et}\t{0.oEntry.di}{2}\t{1}{0.nOccur}\t{0.nDup}\t{0.nMulti}\t{0.fFreq:.15f}\t{0.cFq}\n".format(self, sOccurs, "/"+self.cDic if self.cDic != "*" else "")
        return "{0.oEntry.iD}\t{0.sFlexion}\t{0.oEntry.sStem}\t{0.sMorph}\t{0.metagfx}\t{0.metaph2}\t{0.oEntry.lx}\t{0.oEntry.se}\t{0.oEntry.et}\t{0.oEntry.di}{2}\t{1}{0.nOccur}\t{0.nDup}\t{0.nMulti}\t{0.fFreq:.15f}\t{0.cFq}\n".format(self, sOccurs, "/"+self.cDic if self.cDic != "*" else "")

    @classmethod
    def simpleHeader (cls):
        return "# :POS ;LEX ~SEM =FQ /DIC\n"

    def getGrammarCheckerRepr (self):
        return "{0.sFlexion}\t{0.oEntry.lemma}\t{1}\n".format(self, self._getSimpleTags())
1212
1213
1214
1215
1216
1217
1218
1219

1220
1221
1222
1223
1224
1225
1226
1215
1216
1217
1218
1219
1220
1221

1222
1223
1224
1225
1226
1227
1228
1229







-
+







        "ipre": ":Ip", "iimp": ":Iq", "ipsi": ":Is", "ifut": ":If",
        "spre": ":Sp", "simp": ":Sq", "cond": ":K", "impe": ":E",
        "1sg": ":1s", "1isg": ":1ś", "1jsg": ":1ŝ", "2sg": ":2s", "3sg": ":3s", "1pl": ":1p", "2pl": ":2p", "3pl": ":3p", "3pl!": ":3p!",
        "prepv": ":Rv", "prep": ":R", "loc.prep": ":Ŕ",
        "detpos": ":Dp", "detdem": ":Dd", "detind": ":Di", "detneg": ":Dn", "detex": ":De", "det": ":D",
        "advint": ":U",
        "prodem": ":Od", "proind": ":Oi", "proint": ":Ot", "proneg": ":On", "prorel": ":Or", "proadv": ":Ow",
        "properobj": ":Oo", "propersuj": ":Os", "1pe": ":O1", "2pe": ":O2", "3pe": ":O3",
        "properobj": ":Oo", "propersuj": ":Os", "1pe": ":O1", "2pe": ":O2", "3pe": ":O3", "preverb": ":Ov",
        "cjco": ":Cc", "cjsub": ":Cs", "cj": ":C", "loc.cj": ":Ĉ", "loc.cjsub": ":Ĉs",
        "prn": ":M1", "patr": ":M2", "loc.patr": ":Ḿ2", "npr": ":MP", "nompr": ":NM",
        "pfx": ":Zp", "sfx": ":Zs",
        "div": ":H",
        "err": ":#",
        # LEX
        "symb": ";S"
1254
1255
1256
1257
1258
1259
1260
1261

1262
1263
1264

1265
1266
1267
1268
1269
1270
1271
1257
1258
1259
1260
1261
1262
1263

1264
1265
1266

1267
1268
1269
1270
1271
1272
1273
1274







-
+


-
+







            s += "/" + self.oEntry.di
        return s

    def keyTriNat (self):
        return (self.sFlexion.translate(CHARMAP), self.sMorph)

    def keyFreq (self):
        return (100-self.fFreq, self.oEntry.sRadical, self.sFlexion)
        return (100-self.fFreq, self.oEntry.sStem, self.sFlexion)

    def keyOcc (self):
        return (self.nOccur, self.oEntry.sRadical, self.sFlexion)
        return (self.nOccur, self.oEntry.sStem, self.sFlexion)

    def keyIdx (self):
        return self.oEntry.iD

    def keyFlexion (self):
        return self.sFlexion

1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1500
1501
1502
1503
1504
1505
1506

1507
1508
1509
1510
1511
1512
1513







-







                hDst.write(str(t)+"\n")
            for e in self.dFlexions.items():
                hDst.write("{} - {}\n".format(e[0], e[1]))



def main ():

    xParser = argparse.ArgumentParser()
    xParser.add_argument("-v", "--verdic", help="set dictionary version, i.e. 5.4", type=str, default="X.Y.z")
    xParser.add_argument("-m", "--mode", help="0: no tags,  1: Hunspell tags (default),  2: All tags", type=int, choices=[0, 1, 2], default=1)
    xParser.add_argument("-u", "--uncompress", help="do not use Hunspell compression", action="store_true")
    xParser.add_argument("-s", "--simplify", help="no virtual lemmas", action="store_true")
    xParser.add_argument("-sv", "--spellvariants", help="generate spell variants", action="store_true")
    xParser.add_argument("-gl", "--grammalecte", help="copy generated files to Grammalecte folders", action="store_true")
1555
1556
1557
1558
1559
1560
1561
1562

1563
1564
1565
1566
1567
1568
1569
1557
1558
1559
1560
1561
1562
1563

1564
1565
1566
1567
1568
1569
1570
1571







-
+







    oFrenchDict.calculateStats(oStatsLex, spfStats)

    ### écriture des paquets
    echo("Création des paquets...")

    spLexiconDestGL = "../../../lexicons"  if xArgs.grammalecte  else ""
    spLibreOfficeExtDestGL = "../oxt/Dictionnaires/dictionaries"  if xArgs.grammalecte  else ""
    spMozillaExtDestGL = ""  # les dictionnaires pour Hunspell ne sont plus utilisés pour l’instant dans Firefox / Thunderbird
    spMozillaExtDestGL = ""  if xArgs.grammalecte  else "" # no more Hunspell dictionaries in Mozilla extensions for now
    spDataDestGL = "../data"  if xArgs.grammalecte  else ""

    if not xArgs.uncompress:
        oFrenchDict.defineAbreviatedTags(xArgs.mode, spfStats)
    oFrenchDict.createFiles(spBuild, [dMODERNE, dTOUTESVAR, dCLASSIQUE, dREFORME1990], xArgs.mode, xArgs.simplify)
    oFrenchDict.createLexiconPackages(spBuild, xArgs.verdic, oStatsLex, spLexiconDestGL)
    oFrenchDict.createFileIfqForDB(spBuild)