Grammalecte: Diff

Differences From Artifact [4630ffd3ad]:

File gc_lang/fr/dictionnaire/genfrdic.py — part of check-in [825f7ecc91] at 2018-08-20 07:43:25 on branch trunk — [fr] ne plus copier les dictionnaires Hunspell dans Grammalecte (user: olr, size: 70348) [annotate] [blame] [check-ins using]

To Artifact [1dab8fa65c]:

File gc_lang/fr/dictionnaire/genfrdic.py — part of check-in [2000f120a1] at 2018-08-28 08:16:05 on branch rg — [fr][bug] gendicfr: oubli du signe <:> avant le tag Ov (user: olr, size: 70526) [annotate] [blame] [check-ins using] [more...]

︙			︙
294 295 296 297 298 299 300 ~~301~~ 302 303 304 305 ~~306 307~~ 308 309 310 311 312 313 314	hDst.write(" > {0[1]:>8} : {0[0]}\n".format(elem)) def writeDictionary (self, spDst, dTplVars, nMode, bSimplified): "Écrire le fichier dictionnaire (.dic)" echo(' * Dictionnaire >> [ {}.dic ] ({})'.format(dTplVars['asciiName'], dTplVars['subDicts'])) nEntry = 0 for oEntry in self.lEntry: ~~if oEntry.di in dTplVars['subDicts']:~~ nEntry += 1 with open(spDst+'/'+dTplVars['asciiName']+'.dic', 'w', encoding='utf-8', newline="\n") as hDst: hDst.write(str(nEntry)+"\n") for oEntry in self.lEntry: ~~if oEntry.di in dTplVars['subDicts']: hDst.write(oEntry.get~~Entry~~Line(self, nMode, bSimplified))~~ def writeAffixes (self, spDst, dTplVars, nMode, bSimplified): "Écrire le fichier des affixes (.aff)" echo(' * Dictionnaire >> [ {}.aff ]'.format(dTplVars['asciiName'])) info = "# This Source Code Form is subject to the terms of the Mozilla Public\n" + \ "# License, v. 2.0. If a copy of the MPL was not distributed with this\n" + \ "# file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n" + \	\| \| \|	294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314	hDst.write(" > {0[1]:>8} : {0[0]}\n".format(elem)) def writeDictionary (self, spDst, dTplVars, nMode, bSimplified): "Écrire le fichier dictionnaire (.dic)" echo(' * Dictionnaire >> [ {}.dic ] ({})'.format(dTplVars['asciiName'], dTplVars['subDicts'])) nEntry = 0 for oEntry in self.lEntry: if oEntry.di in dTplVars['subDicts'] and " " not in oEntry.lemma: nEntry += 1 with open(spDst+'/'+dTplVars['asciiName']+'.dic', 'w', encoding='utf-8', newline="\n") as hDst: hDst.write(str(nEntry)+"\n") for oEntry in self.lEntry: if oEntry.di in dTplVars['subDicts'] and " " not in oEntry.lemma: hDst.write(oEntry.getHunspellLine(self, nMode, bSimplified)) def writeAffixes (self, spDst, dTplVars, nMode, bSimplified): "Écrire le fichier des affixes (.aff)" echo(' * Dictionnaire >> [ {}.aff ]'.format(dTplVars['asciiName'])) info = "# This Source Code Form is subject to the terms of the Mozilla Public\n" + \ "# License, v. 2.0. If a copy of the MPL was not distributed with this\n" + \ "# file, You can obtain one at http://mozilla.org/MPL/2.0/.\n\n" + \
︙			︙
732 733 734 735 736 737 738 ~~739~~ 740 741 742 743 744 745 746 747 748 749 750 ~~751~~ 752 753 754 755 756 757 758	self.iD = '0' # autres self.comment = '' self.err = '' self.nFlexions = 0 self.lFlexions = [] ~~self.s~~Radical~~ = ''~~ self.nOccur = 0 self.nAKO = -1 # Average known occurrences self.fFreq = 0 self.oldFq = '' sLine = sLine.rstrip(" \n") # commentaire if '#' in sLine: sLine, comment = sLine.split('#', 1) self.comment = comment.strip() # éléments de la ligne ~~elems = sLine.split()~~ nElems = len(elems) # lemme et drapeaux firstElems = elems[0].split('/') self.lemma = firstElems[0] self.flags = firstElems[1] if len(firstElems) > 1 else '' # morph for i in range(1, nElems):	\| \|	732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758	self.iD = '0' # autres self.comment = '' self.err = '' self.nFlexions = 0 self.lFlexions = [] self.sStem = '' self.nOccur = 0 self.nAKO = -1 # Average known occurrences self.fFreq = 0 self.oldFq = '' sLine = sLine.rstrip(" \n") # commentaire if '#' in sLine: sLine, comment = sLine.split('#', 1) self.comment = comment.strip() # éléments de la ligne elems = sLine.split("\t") nElems = len(elems) # lemme et drapeaux firstElems = elems[0].split('/') self.lemma = firstElems[0] self.flags = firstElems[1] if len(firstElems) > 1 else '' # morph for i in range(1, nElems):
︙			︙
814 815 816 817 818 819 820 ~~821~~ 822 ~~823~~ 824 ~~825~~ 826 827 828 829 830 ~~831~~ 832 833 834 835 836 837 838	if re.search(r"\s$", self.lemma): sErr += 'espace en fin de lemme' if re.match(r"v[0123]", self.po) and not re.match(r"[eas_][ix_][tx_][nx_][pqreuvx_][mx_][ex_z][ax_z]\b", self.po[2:]): sErr += 'verbe inconnu: ' + self.po if (re.match(r"S[.]", self.flags) and re.search("[sxz]$", self.lemma)) or (re.match(r"X[.]", self.flags) and not re.search("[ul]$", self.lemma)): sErr += 'drapeau inutile' if self.iz == '' and re.match(r"[SXAI](?!=)", self.flags) and self.po: ~~sErr += '[is]'~~ if re.match(r"pl\|sg\|inv", self.iz): ~~sErr += '[is]'~~ if re.match(r"[FW]", self.flags) and re.search(r"epi\|mas\|fem\|inv\|sg\|pl", self.iz): ~~sErr += '[is]'~~ if re.match(r"[FW]", self.flags) and re.search(r"[^eë]$", self.lemma): sErr += "fin de lemme inapproprié" if re.match(r".\", self.flags) and re.match(r"[bcdfgjklmnpqrstvwxz]", self.lemma): sErr += 'drapeau pour lemme commençant par une voyelle' if re.search(r"pl\|sg\|inv", self.iz) and re.match(r"[SXAIFW](?!=)", self.flags): ~~sErr += '[is]'~~ if re.search(r"nom\|adj", self.po) and re.match(r"(?i)[aâàäáeéèêëiîïíìoôöóòuûüúù]", self.lemma) and re.match("[SFWXAI][.]", self.flags) \ and "pel" not in self.lx: sErr += 'le drapeau derait finir avec ' if not self.flags and self.iz.endswith(("mas", "fem", "epi")): sErr += '[is] incomplet' if self.flags.startswith(("a", "b", "c", "d")) and not self.lemma.endswith("er"): sErr += "drapeau pour verbe du 1ᵉʳ groupe sur un lemme non conforme"	\| \| \| \|	814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838	if re.search(r"\s$", self.lemma): sErr += 'espace en fin de lemme' if re.match(r"v[0123]", self.po) and not re.match(r"[eas_][ix_][tx_][nx_][pqreuvx_][mx_][ex_z][ax_z]\b", self.po[2:]): sErr += 'verbe inconnu: ' + self.po if (re.match(r"S[.]", self.flags) and re.search("[sxz]$", self.lemma)) or (re.match(r"X[.]", self.flags) and not re.search("[ul]$", self.lemma)): sErr += 'drapeau inutile' if self.iz == '' and re.match(r"[SXAI](?!=)", self.flags) and self.po: sErr += '[is] vide' if re.match(r"pl\|sg\|inv", self.iz): sErr += '[is] incomplet' if re.match(r"[FW]", self.flags) and re.search(r"epi\|mas\|fem\|inv\|sg\|pl", self.iz): sErr += '[is] incohérent' if re.match(r"[FW]", self.flags) and re.search(r"[^eë]$", self.lemma): sErr += "fin de lemme inapproprié" if re.match(r".\", self.flags) and re.match(r"[bcdfgjklmnpqrstvwxz]", self.lemma): sErr += 'drapeau pour lemme commençant par une voyelle' if re.search(r"pl\|sg\|inv", self.iz) and re.match(r"[SXAIFW](?!=)", self.flags): sErr += '[is] incohérent' if re.search(r"nom\|adj", self.po) and re.match(r"(?i)[aâàäáeéèêëiîïíìoôöóòuûüúù]", self.lemma) and re.match("[SFWXAI][.]", self.flags) \ and "pel" not in self.lx: sErr += 'le drapeau derait finir avec ' if not self.flags and self.iz.endswith(("mas", "fem", "epi")): sErr += '[is] incomplet' if self.flags.startswith(("a", "b", "c", "d")) and not self.lemma.endswith("er"): sErr += "drapeau pour verbe du 1ᵉʳ groupe sur un lemme non conforme"
︙			︙
863 864 865 866 867 868 869 ~~870 871~~ 872 873 874 875 876 877 878	def keyTriNat (self): return (self.lemma.translate(CHARMAP), self.flags, self.po) def keyTriNum (self): return (self.lemma, self.flags, self.po) ~~def get~~Entry~~Line (self, oDict, nMode, bSimplified=False): sLine = self.lemma~~ if self.flags: sLine += '/' sLine += self.flags if not oDict.bShortenTags or bSimplified else oDict.dAF[self.flags] if bSimplified: return sLine.replace("()", "") + "\n" if nMode > 0: sMorph = self.getMorph(nMode)	\| \|	863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878	def keyTriNat (self): return (self.lemma.translate(CHARMAP), self.flags, self.po) def keyTriNum (self): return (self.lemma, self.flags, self.po) def getHunspellLine (self, oDict, nMode, bSimplified=False): sLine = self.lemma.replace("’", "'") if self.flags: sLine += '/' sLine += self.flags if not oDict.bShortenTags or bSimplified else oDict.dAF[self.flags] if bSimplified: return sLine.replace("()", "") + "\n" if nMode > 0: sMorph = self.getMorph(nMode)
︙			︙
922 923 924 925 926 927 928 ~~929 930 931 932 933 934 935 936~~ 937 938 939 940 941 942 943	if not sMorph.endswith((" mas", " fem", " epi")): self.lFlexions.append( Flexion(self, sFlex, sMorph, sDic) ) self.nFlexions += 1 else: #echo(sFlex + " " + sMorph + ", ") pass # Drapeaux dont le lemme féminin doit être remplacé par le masculin dans la gestion des formes fléchies if self.flags.startswith(("F.", "F", "W.", "W")): # recherche de la forme masculine for t in lTuples: sMorph = self.clean(t[1]) if sMorph.endswith('mas'~~) or sMorph.endswith(~~'mas sg'~~) or sMorph.endswith(~~'mas inv'): self.s~~Radical~~ = t[0] else: self.s~~Radical~~ = self.lemma # Tag duplicates d = {} for oFlex in self.lFlexions: d[oFlex.sFlexion] = d.get(oFlex.sFlexion, 0) + 1 for oFlex in self.lFlexions: oFlex.nDup = d[oFlex.sFlexion]	> > > \| \| \| \| \| \| \| \|	922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946	if not sMorph.endswith((" mas", " fem", " epi")): self.lFlexions.append( Flexion(self, sFlex, sMorph, sDic) ) self.nFlexions += 1 else: #echo(sFlex + " " + sMorph + ", ") pass # Drapeaux dont le lemme féminin doit être remplacé par le masculin dans la gestion des formes fléchies if self.st: self.sStem = self.st else: if self.flags.startswith(("F.", "F", "W.", "W")): # recherche de la forme masculine for t in lTuples: sMorph = self.clean(t[1]) if sMorph.endswith(('mas', 'mas sg', 'mas inv')): self.sStem = t[0] else: self.sStem = self.lemma # Tag duplicates d = {} for oFlex in self.lFlexions: d[oFlex.sFlexion] = d.get(oFlex.sFlexion, 0) + 1 for oFlex in self.lFlexions: oFlex.nDup = d[oFlex.sFlexion]
︙			︙
1188 1189 1190 1191 1192 1193 1194 ~~1195~~ 1196 1197 1198 1199 1200 1201 1202	sOccurs += t[1] + "\t" return "id\tFlexion\tLemme\tÉtiquettes\tMétagraphe (β)\tMetaphone2\tNotes\tSémantique\tÉtymologie\tSous-dictionnaire\t" + sOccurs + "Total occurrences\tDoublons\tMultiples\tFréquence\tIndice de fréquence\n" def __str__ (self, oStatsLex): sOccurs = '' for v in oStatsLex.dFlexions[self.sFlexion]: sOccurs += str(v) + "\t" return "{0.oEntry.iD}\t{0.sFlexion}\t{0.oEntry.s~~Radical~~}\t{0.sMorph}\t{0.metagfx}\t{0.metaph2}\t{0.oEntry.lx}\t{0.oEntry.se}\t{0.oEntry.et}\t{0.oEntry.di}{2}\t{1}{0.nOccur}\t{0.nDup}\t{0.nMulti}\t{0.fFreq:.15f}\t{0.cFq}\n".format(self, sOccurs, "/"+self.cDic if self.cDic != "*" else "") @classmethod def simpleHeader (cls): return "# :POS ;LEX ~SEM =FQ /DIC\n" def getGrammarCheckerRepr (self): return "{0.sFlexion}\t{0.oEntry.lemma}\t{1}\n".format(self, self._getSimpleTags())	\|	1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205	sOccurs += t[1] + "\t" return "id\tFlexion\tLemme\tÉtiquettes\tMétagraphe (β)\tMetaphone2\tNotes\tSémantique\tÉtymologie\tSous-dictionnaire\t" + sOccurs + "Total occurrences\tDoublons\tMultiples\tFréquence\tIndice de fréquence\n" def __str__ (self, oStatsLex): sOccurs = '' for v in oStatsLex.dFlexions[self.sFlexion]: sOccurs += str(v) + "\t" return "{0.oEntry.iD}\t{0.sFlexion}\t{0.oEntry.sStem}\t{0.sMorph}\t{0.metagfx}\t{0.metaph2}\t{0.oEntry.lx}\t{0.oEntry.se}\t{0.oEntry.et}\t{0.oEntry.di}{2}\t{1}{0.nOccur}\t{0.nDup}\t{0.nMulti}\t{0.fFreq:.15f}\t{0.cFq}\n".format(self, sOccurs, "/"+self.cDic if self.cDic != "*" else "") @classmethod def simpleHeader (cls): return "# :POS ;LEX ~SEM =FQ /DIC\n" def getGrammarCheckerRepr (self): return "{0.sFlexion}\t{0.oEntry.lemma}\t{1}\n".format(self, self._getSimpleTags())
︙			︙
1212 1213 1214 1215 1216 1217 1218 ~~1219~~ 1220 1221 1222 1223 1224 1225 1226	"ipre": ":Ip", "iimp": ":Iq", "ipsi": ":Is", "ifut": ":If", "spre": ":Sp", "simp": ":Sq", "cond": ":K", "impe": ":E", "1sg": ":1s", "1isg": ":1ś", "1jsg": ":1ŝ", "2sg": ":2s", "3sg": ":3s", "1pl": ":1p", "2pl": ":2p", "3pl": ":3p", "3pl!": ":3p!", "prepv": ":Rv", "prep": ":R", "loc.prep": ":Ŕ", "detpos": ":Dp", "detdem": ":Dd", "detind": ":Di", "detneg": ":Dn", "detex": ":De", "det": ":D", "advint": ":U", "prodem": ":Od", "proind": ":Oi", "proint": ":Ot", "proneg": ":On", "prorel": ":Or", "proadv": ":Ow", ~~"properobj": ":Oo", "propersuj": ":Os", "1pe": ":O1", "2pe": ":O2", "3pe": ":O3",~~ "cjco": ":Cc", "cjsub": ":Cs", "cj": ":C", "loc.cj": ":Ĉ", "loc.cjsub": ":Ĉs", "prn": ":M1", "patr": ":M2", "loc.patr": ":Ḿ2", "npr": ":MP", "nompr": ":NM", "pfx": ":Zp", "sfx": ":Zs", "div": ":H", "err": ":#", # LEX "symb": ";S"	\|	1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229	"ipre": ":Ip", "iimp": ":Iq", "ipsi": ":Is", "ifut": ":If", "spre": ":Sp", "simp": ":Sq", "cond": ":K", "impe": ":E", "1sg": ":1s", "1isg": ":1ś", "1jsg": ":1ŝ", "2sg": ":2s", "3sg": ":3s", "1pl": ":1p", "2pl": ":2p", "3pl": ":3p", "3pl!": ":3p!", "prepv": ":Rv", "prep": ":R", "loc.prep": ":Ŕ", "detpos": ":Dp", "detdem": ":Dd", "detind": ":Di", "detneg": ":Dn", "detex": ":De", "det": ":D", "advint": ":U", "prodem": ":Od", "proind": ":Oi", "proint": ":Ot", "proneg": ":On", "prorel": ":Or", "proadv": ":Ow", "properobj": ":Oo", "propersuj": ":Os", "1pe": ":O1", "2pe": ":O2", "3pe": ":O3", "preverb": ":Ov", "cjco": ":Cc", "cjsub": ":Cs", "cj": ":C", "loc.cj": ":Ĉ", "loc.cjsub": ":Ĉs", "prn": ":M1", "patr": ":M2", "loc.patr": ":Ḿ2", "npr": ":MP", "nompr": ":NM", "pfx": ":Zp", "sfx": ":Zs", "div": ":H", "err": ":#", # LEX "symb": ";S"
︙			︙
1254 1255 1256 1257 1258 1259 1260 ~~1261~~ 1262 1263 ~~1264~~ 1265 1266 1267 1268 1269 1270 1271	s += "/" + self.oEntry.di return s def keyTriNat (self): return (self.sFlexion.translate(CHARMAP), self.sMorph) def keyFreq (self): ~~return (100-self.fFreq, self.oEntry.s~~Radical~~, self.sFlexion)~~ def keyOcc (self): ~~return (self.nOccur, self.oEntry.s~~Radical~~, self.sFlexion)~~ def keyIdx (self): return self.oEntry.iD def keyFlexion (self): return self.sFlexion	\| \|	1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274	s += "/" + self.oEntry.di return s def keyTriNat (self): return (self.sFlexion.translate(CHARMAP), self.sMorph) def keyFreq (self): return (100-self.fFreq, self.oEntry.sStem, self.sFlexion) def keyOcc (self): return (self.nOccur, self.oEntry.sStem, self.sFlexion) def keyIdx (self): return self.oEntry.iD def keyFlexion (self): return self.sFlexion
︙			︙
1497 1498 1499 1500 1501 1502 1503 ~~1504~~ 1505 1506 1507 1508 1509 1510 1511	hDst.write(str(t)+"\n") for e in self.dFlexions.items(): hDst.write("{} - {}\n".format(e[0], e[1])) def main (): xParser = argparse.ArgumentParser() xParser.add_argument("-v", "--verdic", help="set dictionary version, i.e. 5.4", type=str, default="X.Y.z") xParser.add_argument("-m", "--mode", help="0: no tags, 1: Hunspell tags (default), 2: All tags", type=int, choices=[0, 1, 2], default=1) xParser.add_argument("-u", "--uncompress", help="do not use Hunspell compression", action="store_true") xParser.add_argument("-s", "--simplify", help="no virtual lemmas", action="store_true") xParser.add_argument("-sv", "--spellvariants", help="generate spell variants", action="store_true") xParser.add_argument("-gl", "--grammalecte", help="copy generated files to Grammalecte folders", action="store_true")	<	1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513	hDst.write(str(t)+"\n") for e in self.dFlexions.items(): hDst.write("{} - {}\n".format(e[0], e[1])) def main (): xParser = argparse.ArgumentParser() xParser.add_argument("-v", "--verdic", help="set dictionary version, i.e. 5.4", type=str, default="X.Y.z") xParser.add_argument("-m", "--mode", help="0: no tags, 1: Hunspell tags (default), 2: All tags", type=int, choices=[0, 1, 2], default=1) xParser.add_argument("-u", "--uncompress", help="do not use Hunspell compression", action="store_true") xParser.add_argument("-s", "--simplify", help="no virtual lemmas", action="store_true") xParser.add_argument("-sv", "--spellvariants", help="generate spell variants", action="store_true") xParser.add_argument("-gl", "--grammalecte", help="copy generated files to Grammalecte folders", action="store_true")
︙			︙
1555 1556 1557 1558 1559 1560 1561 ~~1562~~ 1563 1564 1565 1566 1567 1568 1569	oFrenchDict.calculateStats(oStatsLex, spfStats) ### écriture des paquets echo("Création des paquets...") spLexiconDestGL = "../../../lexicons" if xArgs.grammalecte else "" spLibreOfficeExtDestGL = "../oxt/Dictionnaires/dictionaries" if xArgs.grammalecte else "" ~~spMozillaExtDestGL = "" # ~~les dictionnaires pour~~ Hunspell ~~ne s~~on~~t plu~~s util~~isés pour l’instant da~~ns ~~Firefox / Thunderbird~~~~ spDataDestGL = "../data" if xArgs.grammalecte else "" if not xArgs.uncompress: oFrenchDict.defineAbreviatedTags(xArgs.mode, spfStats) oFrenchDict.createFiles(spBuild, [dMODERNE, dTOUTESVAR, dCLASSIQUE, dREFORME1990], xArgs.mode, xArgs.simplify) oFrenchDict.createLexiconPackages(spBuild, xArgs.verdic, oStatsLex, spLexiconDestGL) oFrenchDict.createFileIfqForDB(spBuild)	\|	1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571	oFrenchDict.calculateStats(oStatsLex, spfStats) ### écriture des paquets echo("Création des paquets...") spLexiconDestGL = "../../../lexicons" if xArgs.grammalecte else "" spLibreOfficeExtDestGL = "../oxt/Dictionnaires/dictionaries" if xArgs.grammalecte else "" spMozillaExtDestGL = "" if xArgs.grammalecte else "" # no more Hunspell dictionaries in Mozilla extensions for now spDataDestGL = "../data" if xArgs.grammalecte else "" if not xArgs.uncompress: oFrenchDict.defineAbreviatedTags(xArgs.mode, spfStats) oFrenchDict.createFiles(spBuild, [dMODERNE, dTOUTESVAR, dCLASSIQUE, dREFORME1990], xArgs.mode, xArgs.simplify) oFrenchDict.createLexiconPackages(spBuild, xArgs.verdic, oStatsLex, spLexiconDestGL) oFrenchDict.createFileIfqForDB(spBuild)
︙			︙

Grammalecte Diff

Differences From Artifact [4630ffd3ad]:

To Artifact [1dab8fa65c]: