Overview
Comment: | [fr][build] merge genfrdic |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | fr |
Files: | files | file ages | folders |
SHA3-256: |
3a75d57243bd954d2f685c70fc77c503 |
User & Date: | olr on 2017-06-08 17:52:52 |
Other Links: | manifest | tags |
Context
2017-06-08
| ||
21:17 | [fr] pt: taux d’absorption check-in: c7f6fd414a user: olr tags: trunk, fr | |
19:38 | merge trunk check-in: ab9feb3d66 user: olr tags: fr_killtricks | |
17:52 | [fr][build] merge genfrdic check-in: 3a75d57243 user: olr tags: trunk, fr | |
17:51 | [fr][bug] calcul des occurrences des flexions existant dans plusieurs lemmes Closed-Leaf check-in: 435b1fde99 user: olr tags: fr, genfrdic | |
05:03 | [fr] correction bug regex sur taux de qqch check-in: 80a7b8c83d user: olr tags: trunk, fr | |
Changes
Modified gc_lang/fr/build_data.py from [040b9153d1] to [9294fbef92].
︙ | ︙ | |||
24 25 26 27 28 29 30 | def __exit__ (self, etype, value, traceback): os.chdir(self.savedPath) def makeDictionaries (sp, sVersion): with cd(sp+"/dictionnaire"): | | | 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | def __exit__ (self, etype, value, traceback): os.chdir(self.savedPath) def makeDictionaries (sp, sVersion): with cd(sp+"/dictionnaire"): os.system("genfrdic.py -s -gl -v "+sVersion) def makeConj (sp, bJS=False): print("> Conjugaisons ", end="") print("(Python et JavaScript)" if bJS else "(Python seulement)") dVerb = {} lVtyp = []; dVtyp = {}; nVtyp = 0 |
︙ | ︙ |
Modified gc_lang/fr/dictionnaire/genfrdic.py from [38f9af18d9] to [5036afecd5].
︙ | ︙ | |||
547 548 549 550 551 552 553 | dVars['version'] = self.sVersion # Dictionaries files (.dic) (.aff) self.writeAffixes(spDic, dVars, nMode, bSimplified) self.writeDictionary(spDic, dVars, nMode, bSimplified) copyTemplate('orthographe', spDic, 'README_dict_fr.txt', dVars) createZipFiles(spDic, spDst, sDicName + '.zip') | | | 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 | dVars['version'] = self.sVersion # Dictionaries files (.dic) (.aff) self.writeAffixes(spDic, dVars, nMode, bSimplified) self.writeDictionary(spDic, dVars, nMode, bSimplified) copyTemplate('orthographe', spDic, 'README_dict_fr.txt', dVars) createZipFiles(spDic, spDst, sDicName + '.zip') def createLibreOfficeExtension (self, spBuild, dTplVars, lDictVars, spDestGL=""): # LibreOffice extension echo(" * Dictionnaire >> extension pour LibreOffice") dTplVars['version'] = self.sVersion sExtensionName = EXT_PREFIX_OOO + self.sVersion spExt = spBuild + '/' + sExtensionName dir_util.mkpath(spExt+'/META-INF') dir_util.mkpath(spExt+'/ui') |
︙ | ︙ | |||
586 587 588 589 590 591 592 | file_util.copy_file('césures/frhyph.tex', spExt+'/dictionaries') file_util.copy_file('césures/hyph-fr.tex', spExt+'/dictionaries') file_util.copy_file('césures/README_hyph_fr-3.0.txt', spExt+'/dictionaries') file_util.copy_file('césures/README_hyph_fr-2.9.txt', spExt+'/dictionaries') # zip createZipFiles(spExt, spBuild, sExtensionName + '.oxt') # copy to Grammalecte Project | | | | > | | | | | > | | | > | | | > | | | 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 | file_util.copy_file('césures/frhyph.tex', spExt+'/dictionaries') file_util.copy_file('césures/hyph-fr.tex', spExt+'/dictionaries') file_util.copy_file('césures/README_hyph_fr-3.0.txt', spExt+'/dictionaries') file_util.copy_file('césures/README_hyph_fr-2.9.txt', spExt+'/dictionaries') # zip createZipFiles(spExt, spBuild, sExtensionName + '.oxt') # copy to Grammalecte Project if spDestGL: echo(" extension copiée dans Grammalecte...") dir_util.copy_tree(spExt+'/dictionaries', spDestGL) def createMozillaExtensions (self, spBuild, dTplVars, lDictVars, spDestGL=""): # Mozilla extension 1 echo(" * Dictionnaire >> extension pour Mozilla") dTplVars['version'] = self.sVersion sExtensionName = EXT_PREFIX_MOZ + self.sVersion spExt = spBuild + '/' + sExtensionName dir_util.mkpath(spExt+'/dictionaries') copyTemplate('_templates/moz', spExt, 'install.rdf', dTplVars) spDict = spBuild + '/' + PREFIX_DICT_PATH + self.sVersion file_util.copy_file(spDict+'/fr-classique.dic', spExt+'/dictionaries/fr-classic.dic') file_util.copy_file(spDict+'/fr-classique.aff', spExt+'/dictionaries/fr-classic.aff') copyTemplate('orthographe', spExt, 'README_dict_fr.txt', dTplVars) createZipFiles(spExt, spBuild, sExtensionName + '.xpi') # Grammalecte if spDestGL: echo(" * Dictionnaire >> copie des dicos dans Grammalecte") for dVars in lDictVars: file_util.copy_file(spDict+'/'+dVars['asciiName']+'.dic', spDestGL+'/'+dVars['mozAsciiName']+"/"+dVars['mozAsciiName']+'.dic') file_util.copy_file(spDict+'/'+dVars['asciiName']+'.aff', spDestGL+'/'+dVars['mozAsciiName']+"/"+dVars['mozAsciiName']+'.aff') def createFileIfqForDB (self, spBuild): echo(" * Dictionnaire >> indices de fréquence pour la DB...") with open(spBuild+'/dictIdxIfq-'+self.sVersion+'.diff.txt', 'w', encoding='utf-8', newline="\n") as hDiff, \ open(spBuild+'/dictIdxIfq-'+self.sVersion+'.notes.txt', 'w', encoding='utf-8', newline="\n") as hNotes: for oEntry in self.lEntry: if oEntry.fq != oEntry.oldFq: hDiff.write("{0.iD}\t{0.fq}\n".format(oEntry)) hNotes.write("{0.lemma}/{0.flags}\t{0.oldFq} > {0.fq}\n".format(oEntry)) def createLexiconPackages (self, spBuild, version, oStatsLex, spDestGL=""): sLexName = LEX_PREFIX + version spLex = spBuild + '/' + sLexName dir_util.mkpath(spLex) # write Dicollecte lexicon self.sortLexiconByFreq() self.writeLexicon(spLex + '/' + sLexName + '.txt', version, oStatsLex) self.writeGrammarCheckerLexicon(spBuild + '/' + sLexName + '.lex', version) copyTemplate('lexique', spLex, 'README_lexique.txt', {'version': version}) # zip createZipFiles(spLex, spBuild, sLexName + '.zip') # copy GC lexicon to Grammalecte if spDestGL: file_util.copy_file(spBuild + '/' + sLexName + '.lex', spDestGL + '/French.lex') file_util.copy_file('lexique/French.tagset.txt', spDestGL) def createDictConj (self, spBuild, spDestGL=""): echo(" * Dictionnaire >> fichier de conjugaison...") with open(spBuild+'/dictConj.txt', 'w', encoding='utf-8', newline="\n") as hDst: for oEntry in self.lEntry: if oEntry.po.startswith("v"): hDst.write(oEntry.getConjugation()) if spDestGL: echo(" Fichier de conjugaison copié dans Grammalecte...") file_util.copy_file(spBuild+'/dictConj.txt', spDestGL) def createDictDecl (self, spBuild, spDestGL=""): echo(" * Dictionnaire >> fichier de déclinaison...") with open(spBuild+'/dictDecl.txt', 'w', encoding='utf-8', newline="\n") as hDst: for oEntry in self.lEntry: if re.match("[SXFWIA]", oEntry.flags) and (oEntry.po.startswith("nom") or oEntry.po.startswith("adj")): hDst.write(oEntry.getDeclination()) if spDestGL: echo(" Fichier de déclinaison copié dans Grammalecte...") file_util.copy_file(spBuild+'/dictDecl.txt', spDestGL) def generateSpellVariants (self, nReq, spBuild): if nReq < 1: nReq = 1 if nReq > 2: nReq = 2 echo(" * Lexique >> variantes par suppression... n = " + str(nReq)) with open(spBuild+'/dictSpellVariants-'+str(nReq)+'.txt', 'w', encoding='utf-8', newline="\n") as hDst: for oFlex in frozenset(self.lFlexions): |
︙ | ︙ | |||
805 806 807 808 809 810 811 | if self.err: echo("\n## Erreur dans le dictionnaire : {}".format(self.err)) echo(" dans : " + self.lemma) def __str__ (self): return "{0.lemma}/{0.flags} {1}".format(self, self.getMorph(2)) | < < < | 809 810 811 812 813 814 815 816 817 818 819 820 821 822 | if self.err: echo("\n## Erreur dans le dictionnaire : {}".format(self.err)) echo(" dans : " + self.lemma) def __str__ (self): return "{0.lemma}/{0.flags} {1}".format(self, self.getMorph(2)) def check (self): sErr = '' if self.lemma == '': sErr += 'lemme vide' if not re.match(r"[a-zA-ZéÉôÔàâÂîÎïèÈêÊÜœŒæÆçÇ0-9µåÅΩ&αβγδεζηθικλμνξοπρστυφχψωΔℓΩ_]", self.lemma): sErr += 'premier caractère inconnu: ' + self.lemma[0] if re.search(r"\s$", self.lemma): |
︙ | ︙ | |||
1075 1076 1077 1078 1079 1080 1081 | # moyenne des formes fléchies sans équivalent ou -1 self.nAKO = math.ceil(nOccur / nFlex) if nFlex > 0 else -1 def solveOccurMultipleFlexions (self, hDst, oStatsLex): sBlank = " " if self.nAKO >= 0: for oFlex in self.lFlexions: | | | 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 | # moyenne des formes fléchies sans équivalent ou -1 self.nAKO = math.ceil(nOccur / nFlex) if nFlex > 0 else -1 def solveOccurMultipleFlexions (self, hDst, oStatsLex): sBlank = " " if self.nAKO >= 0: for oFlex in self.lFlexions: if oFlex.nMulti > 0 and not oFlex.bBlocked: # on trie les entrées avec AKO et sans AKO lEntWithAKO = [] lEntNoAKO = [] for oEntry in oFlex.lMulti: if oEntry.nAKO >= 0: lEntWithAKO.append(oEntry) else: |
︙ | ︙ | |||
1099 1100 1101 1102 1103 1104 1105 | if nDiff > 0: # on peut passer à les formes fléchies à AKO hDst.write(" * {0.sFlexion}\n".format(oFlex)) hDst.write(" moyenne connue\n") for oFlexD in self.lFlexions: if oFlex.sFlexion == oFlexD.sFlexion: hDst.write(sBlank + "{2:<30} {0.sMorph:<30} {0.nOccur:>10} >> {1:>10}\n".format(oFlexD, self.nAKO, self.getShortDescr())) | | | | | | | | > > > | | 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 | if nDiff > 0: # on peut passer à les formes fléchies à AKO hDst.write(" * {0.sFlexion}\n".format(oFlex)) hDst.write(" moyenne connue\n") for oFlexD in self.lFlexions: if oFlex.sFlexion == oFlexD.sFlexion: hDst.write(sBlank + "{2:<30} {0.sMorph:<30} {0.nOccur:>10} >> {1:>10}\n".format(oFlexD, self.nAKO, self.getShortDescr())) oFlexD.setOccurAndBlock(self.nAKO) for oEntry in lEntWithAKO: hDst.write(" moyenne connue\n") for oFlexM in oEntry.lFlexions: if oFlex.sFlexion == oFlexM.sFlexion: hDst.write(sBlank + "{2:<30} {0.sMorph:<30} {0.nOccur:>10} >> {1:>10}\n".format(oFlexM, oEntry.nAKO, oEntry.getShortDescr())) oFlexM.setOccurAndBlock(oEntry.nAKO) # on répercute nDiff sur les flexions sans AKO for oEntry in lEntNoAKO: hDst.write(" sans moyenne connue\n") for oFlexM in oEntry.lFlexions: if oFlex.sFlexion == oFlexM.sFlexion: nNewOccur = oFlexM.nOccur + math.ceil((nDiff / len(lEntNoAKO)) / oFlexM.nDup) hDst.write(sBlank + "{2:<30} {0.sMorph:<30} {0.nOccur:>10} +> {1:>10}\n".format(oFlexM, nNewOccur, oEntry.getShortDescr())) oFlexM.setOccurAndBlock(nNewOccur) else: # Toutes les entrées sont avec AKO : on pondère nFlexOccur = oStatsLex.getFlexionOccur(oFlex.sFlexion) nTotAKO = self.nAKO for oEnt in oFlex.lMulti: nTotAKO += oEnt.nAKO hDst.write(" = {0.sFlexion}\n".format(oFlex)) hDst.write(" moyennes connues\n") for oFlexD in self.lFlexions: if oFlex.sFlexion == oFlexD.sFlexion: nNewOccur = math.ceil((nFlexOccur * (self.nAKO / nTotAKO)) / oFlexD.nDup) if nTotAKO else 0 hDst.write(sBlank + "{2:<30} {0.sMorph:<30} {0.nOccur:>10} %> {1:>10}\n".format(oFlexD, nNewOccur, self.getShortDescr())) oFlexD.setOccurAndBlock(nNewOccur) for oEntry in oFlex.lMulti: for oFlexM in oEntry.lFlexions: if oFlex.sFlexion == oFlexM.sFlexion: nNewOccur = math.ceil((nFlexOccur * (oEntry.nAKO / nTotAKO)) / oFlexM.nDup) if nTotAKO else 0 hDst.write(sBlank + "{2:<30} {0.sMorph:<30} {0.nOccur:>10} %> {1:>10}\n".format(oFlexM, nNewOccur, oEntry.getShortDescr())) oFlexM.setOccurAndBlock(nNewOccur) def calcFreq (self, nTot): self.fFreq = (self.nOccur * 100) / nTot self.oldFq = self.fq self.fq = getIfq(self.fFreq) class Flexion: def __init__ (self, oEntry, sFlex='', sMorph='', cDic=''): self.oEntry = oEntry self.sFlexion = sFlex self.sMorph = sMorph self.cDic = cDic self.nOccur = 0 self.bBlocked = False self.nDup = 0 # duplicates in the same entry self.nMulti = 0 # duplicates with other entries self.lMulti = [] # list of similar flexions self.fFreq = 0 self.cFq = '' self.metagfx = '' # métagraphe self.metaph2 = '' # métaphone 2 def setOccur (self, n): self.nOccur = n def setOccurAndBlock (self, n): self.nOccur = n self.bBlocked = True def calcOccur (self): self.nOccur = math.ceil((self.nOccur / (self.nMulti+1)) / self.nDup) def calcFreq (self, nTot): self.fFreq = (self.nOccur * 100) / nTot self.cFq = getIfq(self.fFreq) |
︙ | ︙ | |||
1190 1191 1192 1193 1194 1195 1196 | def __str__ (self, oStatsLex): sOccurs = '' for v in oStatsLex.dFlexions[self.sFlexion]: sOccurs += str(v) + "\t" return "{0.oEntry.iD}\t{0.sFlexion}\t{0.oEntry.sRadical}\t{0.sMorph}\t{0.metagfx}\t{0.metaph2}\t{0.oEntry.lx}\t{0.oEntry.se}\t{0.oEntry.et}\t{0.oEntry.di}{2}\t{1}{0.nOccur}\t{0.nDup}\t{0.nMulti}\t{0.fFreq:.15f}\t{0.cFq}\n".format(self, sOccurs, "/"+self.cDic if self.cDic != "*" else "") | < < < | 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 | def __str__ (self, oStatsLex): sOccurs = '' for v in oStatsLex.dFlexions[self.sFlexion]: sOccurs += str(v) + "\t" return "{0.oEntry.iD}\t{0.sFlexion}\t{0.oEntry.sRadical}\t{0.sMorph}\t{0.metagfx}\t{0.metaph2}\t{0.oEntry.lx}\t{0.oEntry.se}\t{0.oEntry.et}\t{0.oEntry.di}{2}\t{1}{0.nOccur}\t{0.nDup}\t{0.nMulti}\t{0.fFreq:.15f}\t{0.cFq}\n".format(self, sOccurs, "/"+self.cDic if self.cDic != "*" else "") @classmethod def simpleHeader (cls): return "# :POS ;LEX ~SEM =FQ /DIC\n" def getGrammarCheckerRepr (self): return "{0.sFlexion}\t{0.oEntry.lemma}\t{1}\n".format(self, self._getSimpleTags()) |
︙ | ︙ | |||
1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 | xParser = argparse.ArgumentParser() xParser.add_argument("-v", "--verdic", help="set dictionary version, i.e. 5.4", type=str, default="X.Y.z") xParser.add_argument("-m", "--mode", help="0: no tags, 1: Hunspell tags (default), 2: All tags", type=int, choices=[0, 1, 2], default=1) xParser.add_argument("-u", "--uncompress", help="do not use Hunspell compression", action="store_true") xParser.add_argument("-s", "--simplify", help="no virtual lemmas", action="store_true") xParser.add_argument("-sv", "--spellvariants", help="generate spell variants", action="store_true") xArgs = xParser.parse_args() if xArgs.simplify: xArgs.mode = 0 xArgs.uncompress = True echo("Python: " + sys.version) | > | 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 | xParser = argparse.ArgumentParser() xParser.add_argument("-v", "--verdic", help="set dictionary version, i.e. 5.4", type=str, default="X.Y.z") xParser.add_argument("-m", "--mode", help="0: no tags, 1: Hunspell tags (default), 2: All tags", type=int, choices=[0, 1, 2], default=1) xParser.add_argument("-u", "--uncompress", help="do not use Hunspell compression", action="store_true") xParser.add_argument("-s", "--simplify", help="no virtual lemmas", action="store_true") xParser.add_argument("-sv", "--spellvariants", help="generate spell variants", action="store_true") xParser.add_argument("-gl", "--grammalecte", help="copy generated files to Grammalecte folders", action="store_true") xArgs = xParser.parse_args() if xArgs.simplify: xArgs.mode = 0 xArgs.uncompress = True echo("Python: " + sys.version) |
︙ | ︙ | |||
1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 | oStatsLex.addLexFromFile('lexique/corpus_data/stats_frwikisource.txt', 'S', 'Wikisource') oStatsLex.addLexFromFile('lexique/corpus_data/stats_litterature.txt', 'L', 'Littérature') oStatsLex.write(spBuild+'/test_lex.txt') oFrenchDict.calculateStats(oStatsLex, spfStats) ### écriture des paquets echo("Création des paquets...") if not xArgs.uncompress: oFrenchDict.defineAbreviatedTags(xArgs.mode, spfStats) oFrenchDict.createFiles(spBuild, [dMODERNE, dTOUTESVAR, dCLASSIQUE, dREFORME1990], xArgs.mode, xArgs.simplify) | > > > > > > < < | > > | | | 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 | oStatsLex.addLexFromFile('lexique/corpus_data/stats_frwikisource.txt', 'S', 'Wikisource') oStatsLex.addLexFromFile('lexique/corpus_data/stats_litterature.txt', 'L', 'Littérature') oStatsLex.write(spBuild+'/test_lex.txt') oFrenchDict.calculateStats(oStatsLex, spfStats) ### écriture des paquets echo("Création des paquets...") spLexiconDestGL = "../../../lexicons" if xArgs.grammalecte else "" spLibreOfficeExtDestGL = "../oxt/Dictionnaires/dictionaries" if xArgs.grammalecte else "" spMozillaExtDestGL = "../xpi/data/dictionaries" if xArgs.grammalecte else "" spDataDestGL = "../data" if xArgs.grammalecte else "" if not xArgs.uncompress: oFrenchDict.defineAbreviatedTags(xArgs.mode, spfStats) oFrenchDict.createFiles(spBuild, [dMODERNE, dTOUTESVAR, dCLASSIQUE, dREFORME1990], xArgs.mode, xArgs.simplify) oFrenchDict.createLexiconPackages(spBuild, xArgs.verdic, oStatsLex, spLexiconDestGL) oFrenchDict.createFileIfqForDB(spBuild) oFrenchDict.createLibreOfficeExtension(spBuild, dMOZEXT, [dMODERNE, dTOUTESVAR, dCLASSIQUE, dREFORME1990], spLibreOfficeExtDestGL) oFrenchDict.createMozillaExtensions(spBuild, dMOZEXT, [dMODERNE, dTOUTESVAR, dCLASSIQUE, dREFORME1990], spMozillaExtDestGL) oFrenchDict.createDictConj(spBuild, spDataDestGL) oFrenchDict.createDictDecl(spBuild, spDataDestGL) if __name__ == '__main__': main() |