Grammalecte  Check-in [0f24ce1e2c]

Overview
Comment:[fr] genfrdic: add flexion id to lexicon
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | fr
Files: files | file ages | folders
SHA3-256: 0f24ce1e2c2f4e978c4fe9982afd3a9af7e841cd7b1c8b97e5cf708c571def2b
User & Date: olr on 2019-04-04 12:16:49
Other Links: manifest | tags
Context
2019-04-09
13:39
[fr] faux positif check-in: 7ee30b8b77 user: olr tags: trunk, fr
2019-04-04
12:16
[fr] genfrdic: add flexion id to lexicon check-in: 0f24ce1e2c user: olr tags: trunk, fr
08:03
[graphspell] suggestions with trailing numbers: avoid to repeat splitting for each dictionary check-in: e32c092585 user: olr tags: trunk, graphspell
Changes

Modified gc_lang/fr/dictionnaire/genfrdic.py from [fef905a8c3] to [a88bd785e0].

909
910
911
912
913
914
915
916

917
918
919
920
921
922

923

924
925
926
927
928
929
930
931
909
910
911
912
913
914
915

916
917
918
919
920
921
922
923

924

925
926
927
928
929
930
931







-
+






+
-
+
-







        if self.di != '*':
            txt += ' di:' + self.di
        return txt

    def generateFlexions (self, dFlags):
        lTuples = self._flechir(dFlags)
        # création des objects flexions
        self.nFlexions = 0
        self.nFlexion = 0
        self.lFlexions = []
        sReject = ""
        for sFlex, sMorph, sDic in lTuples:
            if '+' not in sMorph:
                sMorph = self.clean(sMorph)
                if not sMorph.endswith((" mas", " fem", " epi")):
                    self.nFlexion += 1
                    self.lFlexions.append( Flexion(self, sFlex, sMorph, sDic) )
                    self.lFlexions.append( Flexion(self, sFlex, sMorph, sDic, self.nFlexion) )
                    self.nFlexions += 1
                else:
                    #echo(sFlex + " " + sMorph + ", ")
                    pass
        # Drapeaux dont le lemme féminin doit être remplacé par le masculin dans la gestion des formes fléchies
        if self.st:
            self.sStem = self.st
        else:
1143
1144
1145
1146
1147
1148
1149
1150

1151
1152
1153
1154

1155
1156
1157
1158
1159
1160
1161
1143
1144
1145
1146
1147
1148
1149

1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162







-
+




+







        self.fFreq = (self.nOccur * 100) / nTot
        self.oldFq = self.fq
        self.fq = getIfq(self.fFreq)



class Flexion:
    def __init__ (self, oEntry, sFlex='', sMorph='', cDic=''):
    def __init__ (self, oEntry, sFlex='', sMorph='', cDic='', nFlexId=0):
        self.oEntry = oEntry
        self.sFlexion = sFlex
        self.sMorph = sMorph
        self.cDic    = cDic
        self.nFlexId = nFlexId
        self.nOccur  = 0
        self.bBlocked  = False
        self.nDup    = 0    # duplicates in the same entry
        self.nMulti  = 0    # duplicates with other entries
        self.lMulti  = []   # list of similar flexions
        self.fFreq   = 0
        self.cFq     = ''
1185
1186
1187
1188
1189
1190
1191
1192

1193
1194
1195
1196
1197
1198

1199
1200
1201
1202
1203
1204
1205
1186
1187
1188
1189
1190
1191
1192

1193
1194
1195
1196
1197
1198

1199
1200
1201
1202
1203
1204
1205
1206







-
+





-
+







        self.metaph2 = t[0]  if not t[1]  else t[0]+"/"+t[1]

    @classmethod
    def header (cls, oStatsLex):
        sOccurs = ''
        for t in oStatsLex.lLex:
            sOccurs += t[1] + "\t"
        return "id\tFlexion\tLemme\tÉtiquettes\tMétagraphe (β)\tMetaphone2\tNotes\tSémantique\tÉtymologie\tSous-dictionnaire\t" + sOccurs + "Total occurrences\tDoublons\tMultiples\tFréquence\tIndice de fréquence\n"
        return "id\tfid\tFlexion\tLemme\tÉtiquettes\tMétagraphe (β)\tMetaphone2\tNotes\tSémantique\tÉtymologie\tSous-dictionnaire\t" + sOccurs + "Total occurrences\tDoublons\tMultiples\tFréquence\tIndice de fréquence\n"

    def __str__ (self, oStatsLex):
        sOccurs = ''
        for v in oStatsLex.dFlexions[self.sFlexion]:
            sOccurs += str(v) + "\t"
        return "{0.oEntry.iD}\t{0.sFlexion}\t{0.oEntry.sStem}\t{0.sMorph}\t{0.metagfx}\t{0.metaph2}\t{0.oEntry.lx}\t{0.oEntry.se}\t{0.oEntry.et}\t{0.oEntry.di}{2}\t{1}{0.nOccur}\t{0.nDup}\t{0.nMulti}\t{0.fFreq:.15f}\t{0.cFq}\n".format(self, sOccurs, "/"+self.cDic if self.cDic != "*" else "")
        return "{0.oEntry.iD}\t{0.nFlexId}\t{0.sFlexion}\t{0.oEntry.sStem}\t{0.sMorph}\t{0.metagfx}\t{0.metaph2}\t{0.oEntry.lx}\t{0.oEntry.se}\t{0.oEntry.et}\t{0.oEntry.di}{2}\t{1}{0.nOccur}\t{0.nDup}\t{0.nMulti}\t{0.fFreq:.15f}\t{0.cFq}\n".format(self, sOccurs, "/"+self.cDic if self.cDic != "*" else "")

    @classmethod
    def simpleHeader (cls):
        return "# :POS ;LEX ~SEM =FQ /DIC\n"

    def getGrammarCheckerRepr (self):
        return "{0.sFlexion}\t{0.oEntry.lemma}\t{1}\n".format(self, self._getSimpleTags())