Grammalecte: Diff

Differences From Artifact [4630ffd3ad]:

File gc_lang/fr/dictionnaire/genfrdic.py — part of check-in [825f7ecc91] at 2018-08-20 07:43:25 on branch trunk — [fr] ne plus copier les dictionnaires Hunspell dans Grammalecte (user: olr, size: 70348) [annotate] [blame] [check-ins using]

To Artifact [1dab8fa65c]:

File gc_lang/fr/dictionnaire/genfrdic.py — part of check-in [2000f120a1] at 2018-08-28 08:16:05 on branch rg — [fr][bug] gendicfr: oubli du signe <:> avant le tag Ov (user: olr, size: 70526) [annotate] [blame] [check-ins using] [more...]

︙
294 295 296 297 298 299 300 ~~301~~ 302 303 304 305 ~~306 307~~ 308 309 310 311 312 313 314	294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314	- + - - + +	hDst.write(" > {0[1]:>8} : {0[0]}\n".format(elem)) def writeDictionary (self, spDst, dTplVars, nMode, bSimplified): "Écrire le fichier dictionnaire (.dic)" echo(' * Dictionnaire >> [ {}.dic ] ({})'.format(dTplVars['asciiName'], dTplVars['subDicts'])) nEntry = 0 for oEntry in self.lEntry: ~~if oEntry.di in dTplVars['subDicts']:~~ if oEntry.di in dTplVars['subDicts'] and " " not in oEntry.lemma: nEntry += 1 with open(spDst+'/'+dTplVars['asciiName']+'.dic', 'w', encoding='utf-8', newline="\n") as hDst: hDst.write(str(nEntry)+"\n") for oEntry in self.lEntry: ~~if oEntry.di in dTplVars['subDicts']: hDst.write(oEntry.get~~Entry~~Line(self, nMode, bSimplified))~~ if oEntry.di in dTplVars['subDicts'] and " " not in oEntry.lemma: hDst.write(oEntry.getHunspellLine(self, nMode, bSimplified)) def writeAffixes (self, spDst, dTplVars, nMode, bSimplified): "Écrire le fichier des affixes (.aff)" echo(' * Dictionnaire >> [ {}.aff ]'.format(dTplVars['asciiName'])) info = "# This Source Code Form is subject to the terms of the Mozilla Public\n" + \ "# License, v. 2.0. If a copy of the MPL was not distributed with this\n" + \ "# file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n" + \
︙
732 733 734 735 736 737 738 ~~739~~ 740 741 742 743 744 745 746 747 748 749 750 ~~751~~ 752 753 754 755 756 757 758	732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758	- + - +	self.iD = '0' # autres self.comment = '' self.err = '' self.nFlexions = 0 self.lFlexions = [] ~~self.s~~Radical~~ = ''~~ self.sStem = '' self.nOccur = 0 self.nAKO = -1 # Average known occurrences self.fFreq = 0 self.oldFq = '' sLine = sLine.rstrip(" \n") # commentaire if '#' in sLine: sLine, comment = sLine.split('#', 1) self.comment = comment.strip() # éléments de la ligne ~~elems = sLine.split()~~ elems = sLine.split("\t") nElems = len(elems) # lemme et drapeaux firstElems = elems[0].split('/') self.lemma = firstElems[0] self.flags = firstElems[1] if len(firstElems) > 1 else '' # morph for i in range(1, nElems):
︙
814 815 816 817 818 819 820 ~~821~~ 822 ~~823~~ 824 ~~825~~ 826 827 828 829 830 ~~831~~ 832 833 834 835 836 837 838	814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838	- + - + - + - +	if re.search(r"\s$", self.lemma): sErr += 'espace en fin de lemme' if re.match(r"v[0123]", self.po) and not re.match(r"[eas_][ix_][tx_][nx_][pqreuvx_][mx_][ex_z][ax_z]\b", self.po[2:]): sErr += 'verbe inconnu: ' + self.po if (re.match(r"S[.]", self.flags) and re.search("[sxz]$", self.lemma)) or (re.match(r"X[.]", self.flags) and not re.search("[ul]$", self.lemma)): sErr += 'drapeau inutile' if self.iz == '' and re.match(r"[SXAI](?!=)", self.flags) and self.po: ~~sErr += '[is]'~~ sErr += '[is] vide' if re.match(r"pl\|sg\|inv", self.iz): ~~sErr += '[is]'~~ sErr += '[is] incomplet' if re.match(r"[FW]", self.flags) and re.search(r"epi\|mas\|fem\|inv\|sg\|pl", self.iz): ~~sErr += '[is]'~~ sErr += '[is] incohérent' if re.match(r"[FW]", self.flags) and re.search(r"[^eë]$", self.lemma): sErr += "fin de lemme inapproprié" if re.match(r".\", self.flags) and re.match(r"[bcdfgjklmnpqrstvwxz]", self.lemma): sErr += 'drapeau pour lemme commençant par une voyelle' if re.search(r"pl\|sg\|inv", self.iz) and re.match(r"[SXAIFW](?!=)", self.flags): ~~sErr += '[is]'~~ sErr += '[is] incohérent' if re.search(r"nom\|adj", self.po) and re.match(r"(?i)[aâàäáeéèêëiîïíìoôöóòuûüúù]", self.lemma) and re.match("[SFWXAI][.]", self.flags) \ and "pel" not in self.lx: sErr += 'le drapeau derait finir avec ' if not self.flags and self.iz.endswith(("mas", "fem", "epi")): sErr += '[is] incomplet' if self.flags.startswith(("a", "b", "c", "d")) and not self.lemma.endswith("er"): sErr += "drapeau pour verbe du 1ᵉʳ groupe sur un lemme non conforme"
︙
863 864 865 866 867 868 869 ~~870 871~~ 872 873 874 875 876 877 878	863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878	- - + +	def keyTriNat (self): return (self.lemma.translate(CHARMAP), self.flags, self.po) def keyTriNum (self): return (self.lemma, self.flags, self.po) ~~def get~~Entry~~Line (self, oDict, nMode, bSimplified=False): sLine = self.lemma~~ def getHunspellLine (self, oDict, nMode, bSimplified=False): sLine = self.lemma.replace("’", "'") if self.flags: sLine += '/' sLine += self.flags if not oDict.bShortenTags or bSimplified else oDict.dAF[self.flags] if bSimplified: return sLine.replace("()", "") + "\n" if nMode > 0: sMorph = self.getMorph(nMode)
︙
922 923 924 925 926 927 928 ~~929 930 931 932 933 934 935 936~~ 937 938 939 940 941 942 943	922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946	+ + + - - - - - - - - + + + + + + + +	if not sMorph.endswith((" mas", " fem", " epi")): self.lFlexions.append( Flexion(self, sFlex, sMorph, sDic) ) self.nFlexions += 1 else: #echo(sFlex + " " + sMorph + ", ") pass # Drapeaux dont le lemme féminin doit être remplacé par le masculin dans la gestion des formes fléchies if self.st: self.sStem = self.st else: if self.flags.startswith(("F.", "F", "W.", "W")): # recherche de la forme masculine for t in lTuples: sMorph = self.clean(t[1]) if sMorph.endswith('mas'~~) or sMorph.endswith(~~'mas sg'~~) or sMorph.endswith(~~'mas inv'): self.s~~Radical~~ = t[0] else: self.s~~Radical~~ = self.lemma if self.flags.startswith(("F.", "F", "W.", "W")): # recherche de la forme masculine for t in lTuples: sMorph = self.clean(t[1]) if sMorph.endswith(('mas', 'mas sg', 'mas inv')): self.sStem = t[0] else: self.sStem = self.lemma # Tag duplicates d = {} for oFlex in self.lFlexions: d[oFlex.sFlexion] = d.get(oFlex.sFlexion, 0) + 1 for oFlex in self.lFlexions: oFlex.nDup = d[oFlex.sFlexion]
︙
1188 1189 1190 1191 1192 1193 1194 ~~1195~~ 1196 1197 1198 1199 1200 1201 1202	1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205	- +	sOccurs += t[1] + "\t" return "id\tFlexion\tLemme\tÉtiquettes\tMétagraphe (β)\tMetaphone2\tNotes\tSémantique\tÉtymologie\tSous-dictionnaire\t" + sOccurs + "Total occurrences\tDoublons\tMultiples\tFréquence\tIndice de fréquence\n" def __str__ (self, oStatsLex): sOccurs = '' for v in oStatsLex.dFlexions[self.sFlexion]: sOccurs += str(v) + "\t" return "{0.oEntry.iD}\t{0.sFlexion}\t{0.oEntry.s~~Radical~~}\t{0.sMorph}\t{0.metagfx}\t{0.metaph2}\t{0.oEntry.lx}\t{0.oEntry.se}\t{0.oEntry.et}\t{0.oEntry.di}{2}\t{1}{0.nOccur}\t{0.nDup}\t{0.nMulti}\t{0.fFreq:.15f}\t{0.cFq}\n".format(self, sOccurs, "/"+self.cDic if self.cDic != "" else "") return "{0.oEntry.iD}\t{0.sFlexion}\t{0.oEntry.sStem}\t{0.sMorph}\t{0.metagfx}\t{0.metaph2}\t{0.oEntry.lx}\t{0.oEntry.se}\t{0.oEntry.et}\t{0.oEntry.di}{2}\t{1}{0.nOccur}\t{0.nDup}\t{0.nMulti}\t{0.fFreq:.15f}\t{0.cFq}\n".format(self, sOccurs, "/"+self.cDic if self.cDic != "" else "") @classmethod def simpleHeader (cls): return "# :POS ;LEX ~SEM =FQ /DIC\n" def getGrammarCheckerRepr (self): return "{0.sFlexion}\t{0.oEntry.lemma}\t{1}\n".format(self, self._getSimpleTags())
︙
1212 1213 1214 1215 1216 1217 1218 ~~1219~~ 1220 1221 1222 1223 1224 1225 1226	1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229	- +	"ipre": ":Ip", "iimp": ":Iq", "ipsi": ":Is", "ifut": ":If", "spre": ":Sp", "simp": ":Sq", "cond": ":K", "impe": ":E", "1sg": ":1s", "1isg": ":1ś", "1jsg": ":1ŝ", "2sg": ":2s", "3sg": ":3s", "1pl": ":1p", "2pl": ":2p", "3pl": ":3p", "3pl!": ":3p!", "prepv": ":Rv", "prep": ":R", "loc.prep": ":Ŕ", "detpos": ":Dp", "detdem": ":Dd", "detind": ":Di", "detneg": ":Dn", "detex": ":De", "det": ":D", "advint": ":U", "prodem": ":Od", "proind": ":Oi", "proint": ":Ot", "proneg": ":On", "prorel": ":Or", "proadv": ":Ow", ~~"properobj": ":Oo", "propersuj": ":Os", "1pe": ":O1", "2pe": ":O2", "3pe": ":O3",~~ "properobj": ":Oo", "propersuj": ":Os", "1pe": ":O1", "2pe": ":O2", "3pe": ":O3", "preverb": ":Ov", "cjco": ":Cc", "cjsub": ":Cs", "cj": ":C", "loc.cj": ":Ĉ", "loc.cjsub": ":Ĉs", "prn": ":M1", "patr": ":M2", "loc.patr": ":Ḿ2", "npr": ":MP", "nompr": ":NM", "pfx": ":Zp", "sfx": ":Zs", "div": ":H", "err": ":#", # LEX "symb": ";S"
︙
1254 1255 1256 1257 1258 1259 1260 ~~1261~~ 1262 1263 ~~1264~~ 1265 1266 1267 1268 1269 1270 1271	1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274	- + - +	s += "/" + self.oEntry.di return s def keyTriNat (self): return (self.sFlexion.translate(CHARMAP), self.sMorph) def keyFreq (self): ~~return (100-self.fFreq, self.oEntry.s~~Radical~~, self.sFlexion)~~ return (100-self.fFreq, self.oEntry.sStem, self.sFlexion) def keyOcc (self): ~~return (self.nOccur, self.oEntry.s~~Radical~~, self.sFlexion)~~ return (self.nOccur, self.oEntry.sStem, self.sFlexion) def keyIdx (self): return self.oEntry.iD def keyFlexion (self): return self.sFlexion
︙
1497 1498 1499 1500 1501 1502 1503 ~~1504~~ 1505 1506 1507 1508 1509 1510 1511	1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513	-	hDst.write(str(t)+"\n") for e in self.dFlexions.items(): hDst.write("{} - {}\n".format(e[0], e[1])) def main (): xParser = argparse.ArgumentParser() xParser.add_argument("-v", "--verdic", help="set dictionary version, i.e. 5.4", type=str, default="X.Y.z") xParser.add_argument("-m", "--mode", help="0: no tags, 1: Hunspell tags (default), 2: All tags", type=int, choices=[0, 1, 2], default=1) xParser.add_argument("-u", "--uncompress", help="do not use Hunspell compression", action="store_true") xParser.add_argument("-s", "--simplify", help="no virtual lemmas", action="store_true") xParser.add_argument("-sv", "--spellvariants", help="generate spell variants", action="store_true") xParser.add_argument("-gl", "--grammalecte", help="copy generated files to Grammalecte folders", action="store_true")
︙
1555 1556 1557 1558 1559 1560 1561 ~~1562~~ 1563 1564 1565 1566 1567 1568 1569	1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571	- +	oFrenchDict.calculateStats(oStatsLex, spfStats) ### écriture des paquets echo("Création des paquets...") spLexiconDestGL = "../../../lexicons" if xArgs.grammalecte else "" spLibreOfficeExtDestGL = "../oxt/Dictionnaires/dictionaries" if xArgs.grammalecte else "" ~~spMozillaExtDestGL = "" # ~~les dictionnaires pour~~ Hunspell ~~ne s~~on~~t plu~~s util~~isés pour l’instant da~~ns ~~Firefox / Thunderbird~~~~ spMozillaExtDestGL = "" if xArgs.grammalecte else "" # no more Hunspell dictionaries in Mozilla extensions for now spDataDestGL = "../data" if xArgs.grammalecte else "" if not xArgs.uncompress: oFrenchDict.defineAbreviatedTags(xArgs.mode, spfStats) oFrenchDict.createFiles(spBuild, [dMODERNE, dTOUTESVAR, dCLASSIQUE, dREFORME1990], xArgs.mode, xArgs.simplify) oFrenchDict.createLexiconPackages(spBuild, xArgs.verdic, oStatsLex, spLexiconDestGL) oFrenchDict.createFileIfqForDB(spBuild)
︙

Grammalecte Diff

Differences From Artifact [4630ffd3ad]:

To Artifact [1dab8fa65c]: