Overview
Comment: | [build][fr] include lemmas of words that are also verbal forms |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | fr | build |
Files: | files | file ages | folders |
SHA3-256: |
69affb5433f7262264ef8fb1b6747063 |
User & Date: | olr on 2021-02-15 13:46:17 |
Other Links: | manifest | tags |
Context
2021-02-15
| ||
16:32 | [core][fr] fix text formtatter check-in: f069a117e4 user: olr tags: trunk, fr, core | |
13:46 | [build][fr] include lemmas of words that are also verbal forms check-in: 69affb5433 user: olr tags: trunk, fr, build | |
13:32 | [fr] +1 test check-in: 160407dd67 user: olr tags: trunk, fr | |
Changes
Modified gc_lang/fr/build_data.py from [94a1ff7b31] to [5e658c4d18].
︙ | ︙ | |||
88 89 90 91 92 93 94 | # read lexicon nStop = 0 for n, sLine in enumerate(readFile(sp+"/data/dictConj.txt")): nTab = sLine.count("\t") if nTab == 1: # new entry | | | | | | | | | < | > | > | | | | | 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 | # read lexicon nStop = 0 for n, sLine in enumerate(readFile(sp+"/data/dictConj.txt")): nTab = sLine.count("\t") if nTab == 1: # new entry sInfi, sVinfo = sLine.split("\t") dConj = { ":P": { ":P": "" }, ":Q": { ":m:s": "", ":f:s": "", ":m:p": "", ":f:p": "" }, ":Ip": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "", ":1ś": "" }, ":Iq": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" }, ":Is": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" }, ":If": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" }, ":K": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" }, ":Sp": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "", ":1ś": "" }, ":Sq": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "", ":1ś": "" }, ":E": { ":2s": "", ":1p": "", ":2p": "" } } if sVinfo not in lVinfo: dVinfo[sVinfo] = nVinfo lVinfo.append(sVinfo) nVinfo += 1 # looking for names derivating from verb for sMorph in oDict.getMorph(sInfi): if ":N" in sMorph: dVerbNames[sInfi] = { sInfi } break elif nTab == 2: # flexion _, sTag, sFlex = sLine.split("\t") if sTag.count(" ") == 0: if sTag == "ppre": dConj[":P"][":P"] = defineSuffixCode(sInfi, sFlex) else: try: mode, g = sTag.split(maxsplit=1) mode = dTrad[mode] g = dTrad[g] if dConj[mode][g] == "": dConj[mode][g] = defineSuffixCode(sInfi, sFlex) else: # comment gérer les autres graphies ? pass except: echo(sInfi, " - ", sTag, " - non géré: ", mode, " / ", g) # looking for names derivating from verb for sMorph in oDict.getMorph(sFlex): if ":N" in sMorph: if sInfi not in dVerbNames: dVerbNames[sInfi] = set() dVerbNames[sInfi].add(sFlex) sLemma = sMorph[1:sMorph.find("/")] if sFlex != sLemma: dVerbNames[sInfi].add(sLemma) elif sLine == "$": # we store the dictionary of rules for this lemma if dConj[":Ip"][":1ś"] == "2è": dConj[":Ip"][":1ś"] = "2é" elif sInfi == "pouvoir": dConj[":Ip"][":1ś"] = "6uis" lConjTags = [] for sTense in [":P", ":Q", ":Ip", ":Iq", ":Is", ":If", ":K", ":Sp", ":Sq", ":E"]: bFound = False for i, d in enumerate(dPatternList[sTense]): if dConj[sTense] == d: bFound = True lConjTags.append(i) break if not bFound: lConjTags.append(len(dPatternList[sTense])) dPatternList[sTense].append(dConj[sTense]) tConjTags = tuple(lConjTags) if tConjTags not in lTags: dTags[tConjTags] = nTags lTags.append(tConjTags) nTags += 1 dVerb[sInfi] = (dVinfo[sVinfo], dTags[tConjTags]) else: print("# Error - unknown line", n) for sInfi, aNames in dVerbNames.items(): dVerbNames[sInfi] = tuple(aNames) # convert set to tuple ## write file for Python sCode = "## generated data (do not edit)\n\n" + \ "# Informations about verbs\n" + \ "lVtyp = " + str(lVinfo) + "\n\n" + \ "# indexes of tenses in _dPatternConj\n" + \ "lTags = " + str(lTags) + "\n\n" + \ |
︙ | ︙ | |||
339 340 341 342 343 344 345 | ## write file for JavaScript sCode = "{\n" + \ ' "dWord": ' + json.dumps(dWord, ensure_ascii=False) + ",\n" + \ ' "lSet": ' + json.dumps(lSet, ensure_ascii=False) + ",\n" + \ ' "dMorph": ' + json.dumps(dMorph, ensure_ascii=False) + "\n}" open(sp+"/modules-js/phonet_data.json", "w", encoding="utf-8", newline="\n").write(sCode) | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 | ## write file for JavaScript sCode = "{\n" + \ ' "dWord": ' + json.dumps(dWord, ensure_ascii=False) + ",\n" + \ ' "lSet": ' + json.dumps(lSet, ensure_ascii=False) + ",\n" + \ ' "dMorph": ' + json.dumps(dMorph, ensure_ascii=False) + "\n}" open(sp+"/modules-js/phonet_data.json", "w", encoding="utf-8", newline="\n").write(sCode) def before (spLaunch, dVars, bJS=False): print("========== Build Hunspell dictionaries ==========") makeDictionaries(spLaunch, dVars['oxt_version']) def after (spLaunch, dVars, bJS=False): print("========== Build French data ==========") makeMfsp(spLaunch, bJS) makeConj(spLaunch, bJS) makePhonetTable(spLaunch, bJS) |
Modified gc_lang/fr/modules/conj_data.py from [17f0d4c069] to [8bdfafb3cd].
cannot compute difference between binary files
Modified gc_lang/fr/modules/phonet_data.py from [7fb8239e70] to [16ca1dac13].
cannot compute difference between binary files
Modified gc_lang/fr/rules.grx from [b7b290a56f] to [2b9fcab6e4].
︙ | ︙ | |||
8291 8292 8293 8294 8295 8296 8297 | __conf_de_vconj__ [de|d’] @:[123][sp]¬:[GNA] <<- /conf/ not \2.istitle() and not \2.isupper() and not value(\2, "|jure|") and not tag(\2, "eg1mot") -2>> =suggSimil(\2, ":[NA]", True)+"|"+suggVerbInfi(\2) && Incohérence avec « \1 » : “\2” est une forme verbale conjuguée. | | | 8291 8292 8293 8294 8295 8296 8297 8298 8299 8300 8301 8302 8303 8304 8305 | __conf_de_vconj__ [de|d’] @:[123][sp]¬:[GNA] <<- /conf/ not \2.istitle() and not \2.isupper() and not value(\2, "|jure|") and not tag(\2, "eg1mot") -2>> =suggSimil(\2, ":[NA]", True)+"|"+suggVerbInfi(\2) && Incohérence avec « \1 » : “\2” est une forme verbale conjuguée. TEST: il s’agit de {{mette}} en évidence. ->> mettre|mets|mise|mises|miss|misses|mission|missions TEST: sa façon de {{nettoyez}} était inefficace. ->> nettoyer|nettoyant [de|d’] [l’|leur] @:[123][sp]¬:[GNAQ] <<- /conf/ not \3.istitle() and not \3.isupper() -3>> =suggSimil(\3, ":[NA].*:[si]", True) && Incohérence avec « \1 \2 » : “\3” est une forme verbale conjuguée. TEST: de l’{{arrivait}} ->> arrivée|arrivant |
︙ | ︙ |