Overview
Comment: | [build][core][fr] suggestion engine: register common names derivated from verbs |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | fr | core | build |
Files: | files | file ages | folders |
SHA3-256: |
19dcb29939a151beed33943537d1759b |
User & Date: | olr on 2019-06-16 09:31:57 |
Original Comment: | [build][core][fr] suggestion engine: register rames derivated from verbs |
Other Links: | manifest | tags |
Context
2019-06-16
| ||
16:04 | [fr] faux positifs et ajustements check-in: 9da3251271 user: olr tags: trunk, fr | |
09:31 | [build][core][fr] suggestion engine: register common names derivated from verbs check-in: 19dcb29939 user: olr tags: trunk, fr, core, build | |
2019-06-15
| ||
20:14 | [build] prevent false warning check-in: bd8a228792 user: olr tags: trunk, build | |
Changes
Modified gc_lang/fr/build_data.py from [8989f0e8f3] to [c6ec423025].
︙ | ︙ | |||
11 12 13 14 15 16 17 18 19 20 21 22 23 24 | import traceback import graphspell.ibdawg as ibdawg from graphspell.echo import echo from graphspell.str_transform import defineSuffixCode import graphspell.tokenizer as tkz class cd: """Context manager for changing the current working directory""" def __init__ (self, newPath): self.newPath = os.path.expanduser(newPath) def __enter__ (self): | > > > | 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | import traceback import graphspell.ibdawg as ibdawg from graphspell.echo import echo from graphspell.str_transform import defineSuffixCode import graphspell.tokenizer as tkz oDict = None class cd: """Context manager for changing the current working directory""" def __init__ (self, newPath): self.newPath = os.path.expanduser(newPath) def __enter__ (self): |
︙ | ︙ | |||
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | if sLine == "__END__": break if sLine and not sLine.startswith("#"): yield sLine else: raise OSError("# Error. File not found or not loadable: " + spf) def makeDictionaries (sp, sVersion): with cd(sp+"/dictionnaire"): os.system("genfrdic.py -s -gl -v "+sVersion) def makeConj (sp, bJS=False): print("> Conjugaisons ", end="") print("(Python et JavaScript)" if bJS else "(Python seulement)") dVerb = {} | > > > > > > > > > | > > | > > | | | | | | | > > | | | | | | | > > > > > | > > > > > > > > | | | | | | | < < < < < < < < < < < < | < < < < < < < < < | | | > > | > | 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 | if sLine == "__END__": break if sLine and not sLine.startswith("#"): yield sLine else: raise OSError("# Error. File not found or not loadable: " + spf) def loadDictionary (): global oDict if not oDict: try: oDict = ibdawg.IBDAWG("fr-allvars.bdic") except: traceback.print_exc() def makeDictionaries (sp, sVersion): with cd(sp+"/dictionnaire"): os.system("genfrdic.py -s -gl -v "+sVersion) def makeConj (sp, bJS=False): print("> Conjugaisons ", end="") print("(Python et JavaScript)" if bJS else "(Python seulement)") dVerb = {} lVinfo = []; dVinfo = {}; nVinfo = 0 lTags = []; dTags = {}; nTags = 0 dVerbNames = {} dPatternList = { ":PQ": [], ":Ip": [], ":Iq": [], ":Is": [], ":If": [], ":K": [], ":Sp": [], ":Sq": [], ":E": [] } dTrad = { "infi": ":Y", "ppre": ":PQ", "ppas": ":PQ", "ipre": ":Ip", "iimp": ":Iq", "ipsi": ":Is", "ifut": ":If", "spre": ":Sp", "simp": ":Sq", "cond": ":K", "impe": ":E", "1sg": ":1s", "2sg": ":2s", "3sg": ":3s", "1pl": ":1p", "2pl": ":2p", "3pl": ":3p", "1isg": ":1ś", "mas sg": ":Q1", "mas pl": ":Q2", "mas inv": ":Q1", "fem sg": ":Q3", "fem pl": ":Q4", "epi inv": ":Q1" } loadDictionary() # read lexicon nStop = 0 for n, sLine in enumerate(readFile(sp+"/data/dictConj.txt")): nTab = sLine.count("\t") if nTab == 1: # new entry sLemma, sVinfo = sLine.split("\t") dConj = { ":PQ": { ":P": "", ":Q1": "", ":Q2": "", ":Q3": "", ":Q4": ""}, ":Ip": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "", ":1ś": "" }, ":Iq": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" }, ":Is": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" }, ":If": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" }, ":K": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" }, ":Sp": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "", ":1ś": "" }, ":Sq": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "", ":1ś": "" }, ":E": { ":2s": "", ":1p": "", ":2p": "" } } if sVinfo not in lVinfo: dVinfo[sVinfo] = nVinfo lVinfo.append(sVinfo) nVinfo += 1 # looking for names derivating from verb for sMorph in oDict.getMorph(sLemma): if ":N" in sMorph: dVerbNames[sLemma] = { sLemma } break elif nTab == 2: # flexion _, sTag, sFlex = sLine.split("\t") if sTag.count(" ") == 0: if sTag == "ppre": dConj[":PQ"][":P"] = defineSuffixCode(sLemma, sFlex) else: try: mode, g = sTag.split(maxsplit=1) mode = dTrad[mode] g = dTrad[g] if dConj[mode][g] == "": dConj[mode][g] = defineSuffixCode(sLemma, sFlex) else: # comment gérer les autres graphies ? pass except: echo(sLemma, " - ", sTag, " - non géré: ", mode, " / ", g) # looking for names derivating from verb for sMorph in oDict.getMorph(sFlex): if ":N" in sMorph: if sLemma not in dVerbNames: dVerbNames[sLemma] = { sFlex } else: dVerbNames[sLemma].add(sFlex) break elif sLine == "$": # we store the dictionary of rules for this lemma if dConj[":Ip"][":1ś"] == "2è": dConj[":Ip"][":1ś"] = "2é" elif sLemma == "pouvoir": dConj[":Ip"][":1ś"] = "6uis" lConjTags = [] for sTense in [":PQ", ":Ip", ":Iq", ":Is", ":If", ":K", ":Sp", ":Sq", ":E"]: bFound = False for i, d in enumerate(dPatternList[sTense]): if dConj[sTense] == d: bFound = True lConjTags.append(i) break if not bFound: lConjTags.append(len(dPatternList[sTense])) dPatternList[sTense].append(dConj[sTense]) tConjTags = tuple(lConjTags) if tConjTags not in lTags: dTags[tConjTags] = nTags lTags.append(tConjTags) nTags += 1 dVerb[sLemma] = (dVinfo[sVinfo], dTags[tConjTags]) else: print("# Error - unknown line", n) for sLemma, aNames in dVerbNames.items(): dVerbNames[sLemma] = tuple(aNames) # convert set to tuple ## write file for Python sCode = "## generated data (do not edit)\n\n" + \ "# Informations about verbs\n" + \ "lVtyp = " + str(lVinfo) + "\n\n" + \ "# indexes of tenses in _dPatternConj\n" + \ "lTags = " + str(lTags) + "\n\n" + \ "# lists of affix codes to generate inflected forms\n" + \ "dPatternConj = " + str(dPatternList) + "\n\n" + \ "# dictionary of verbs : (index of Vtyp, index of Tags)\n" + \ "dVerb = " + str(dVerb) + "\n\n" + \ "# names as derivatives from verbs\n" + \ "dVerbNames = " + str(dVerbNames) + "\n" open(sp+"/modules/conj_data.py", "w", encoding="utf-8", newline="\n").write(sCode) if bJS: ## write file for JavaScript with open(sp+"/modules-js/conj_data.json", "w", encoding="utf-8", newline="\n") as hDst: hDst.write("{\n") hDst.write(' "lVtyp": ' + json.dumps(lVinfo, ensure_ascii=False) + ",\n") hDst.write(' "lTags": ' + json.dumps(lTags, ensure_ascii=False) + ",\n") hDst.write(' "dPatternConj": ' + json.dumps(dPatternList, ensure_ascii=False) + ",\n") hDst.write(' "dVerb": ' + json.dumps(dVerb, ensure_ascii=False) + "\n") hDst.write(' "dVerbNames": ' + json.dumps(dVerbNames, ensure_ascii=False) + "\n") hDst.write("}\n") def makeMfsp (sp, bJS=False): print("> Pluriel/singulier/masculin/féminin ", end="") print("(Python et JavaScript)" if bJS else "(Python seulement)") aPlurS = set() |
︙ | ︙ | |||
264 265 266 267 268 269 270 | ' "dMasForm": ' + json.dumps(dMasForm, ensure_ascii=False) + "\n}" open(sp+"/modules-js/mfsp_data.json", "w", encoding="utf-8", newline="\n").write(sCode) def makePhonetTable (sp, bJS=False): print("> Correspondances phonétiques ", end="") print("(Python et JavaScript)" if bJS else "(Python seulement)") | | | < < < < | 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 | ' "dMasForm": ' + json.dumps(dMasForm, ensure_ascii=False) + "\n}" open(sp+"/modules-js/mfsp_data.json", "w", encoding="utf-8", newline="\n").write(sCode) def makePhonetTable (sp, bJS=False): print("> Correspondances phonétiques ", end="") print("(Python et JavaScript)" if bJS else "(Python seulement)") import gc_lang.fr.modules.conj as conj loadDictionary() # set of homophonic words lSet = [] for sLine in readFile(sp+"/data/phonet_simil.txt"): lWord = sLine.split() aMore = set() for sWord in lWord: |
︙ | ︙ |
Modified gc_lang/fr/modules-js/conj.js from [68b70111ba] to [27133631b8].
︙ | ︙ | |||
17 18 19 20 21 22 23 24 25 26 27 | } var conj = { _lVtyp: [], _lTags: [], _dPatternConj: {}, _dVerb: {}, bInit: false, init: function (sJSONData) { try { | > | | | | | > | 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | } var conj = { _lVtyp: [], _lTags: [], _dPatternConj: {}, _dVerb: {}, _dVerbNames: {}, bInit: false, init: function (sJSONData) { try { let oData = JSON.parse(sJSONData); this._lVtyp = oData.lVtyp; this._lTags = oData.lTags; this._dPatternConj = oData.dPatternConj; this._dVerb = oData.dVerb; this._dVerbNames = oData.dVerbNames; this.bInit = true; } catch (e) { console.error(e); } }, |
︙ | ︙ | |||
117 118 119 120 121 122 123 | aSugg.add("eut"); } else { aSugg.add("étais"); aSugg.add("était"); } aSugg.delete(""); } else { | > > > > | | | | | | | | | > | 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | aSugg.add("eut"); } else { aSugg.add("étais"); aSugg.add("était"); } aSugg.delete(""); } else { if (this._dVerbNames.hasOwnProperty(sInfi)) { // there are names derivated from the verb aSugg.update(this._dVerbNames[sInfi]); } else { // we suggest past participles aSugg.add(this._getConjWithTags(sInfi, tTags, ":PQ", ":Q1")); aSugg.add(this._getConjWithTags(sInfi, tTags, ":PQ", ":Q2")); aSugg.add(this._getConjWithTags(sInfi, tTags, ":PQ", ":Q3")); aSugg.add(this._getConjWithTags(sInfi, tTags, ":PQ", ":Q4")); aSugg.delete(""); // if there is only one past participle (epi inv), unreliable. if (aSugg.size === 1) { aSugg.clear(); } } } } return aSugg; }, _getTags: function (sVerb) { |
︙ | ︙ |
Modified gc_lang/fr/modules-js/conj_data.json from [adb2bc7075] to [e1b7a4dd30].
cannot compute difference between binary files
Modified gc_lang/fr/modules/conj.py from [ae150ced95] to [7b2f58ec61].
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | """ Grammalecte - Conjugueur """ # License: GPL 3 import re import traceback from .conj_data import lVtyp as _lVtyp from .conj_data import lTags as _lTags from .conj_data import dPatternConj as _dPatternConj from .conj_data import dVerb as _dVerb _zStartVoy = re.compile("^[aeéiouœê]") _zNeedTeuph = re.compile("[tdc]$") #_zNEEDACCENTWITHJE = re.compile("[^i]e$") _dProSuj = { ":1s": "je", ":1ś": "je", ":2s": "tu", ":3s": "il", ":1p": "nous", ":2p": "vous", ":3p": "ils" } | > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | """ Grammalecte - Conjugueur """ # License: GPL 3 import re import traceback from .conj_data import lVtyp as _lVtyp from .conj_data import lTags as _lTags from .conj_data import dPatternConj as _dPatternConj from .conj_data import dVerb as _dVerb from .conj_data import dVerbNames as _dVerbNames _zStartVoy = re.compile("^[aeéiouœê]") _zNeedTeuph = re.compile("[tdc]$") #_zNEEDACCENTWITHJE = re.compile("[^i]e$") _dProSuj = { ":1s": "je", ":1ś": "je", ":2s": "tu", ":3s": "il", ":1p": "nous", ":2p": "vous", ":3p": "ils" } |
︙ | ︙ | |||
88 89 90 91 92 93 94 | aSugg.add("eus") aSugg.add("eut") else: aSugg.add("étais") aSugg.add("était") aSugg.discard("") else: | > > > > | | | | | | | | | | 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | aSugg.add("eus") aSugg.add("eut") else: aSugg.add("étais") aSugg.add("était") aSugg.discard("") else: if sInfi in _dVerbNames: # there are names derivated from the verb aSugg.update(_dVerbNames[sInfi]) else: # we suggest past participles aSugg.add(_getConjWithTags(sInfi, tTags, ":PQ", ":Q1")) aSugg.add(_getConjWithTags(sInfi, tTags, ":PQ", ":Q2")) aSugg.add(_getConjWithTags(sInfi, tTags, ":PQ", ":Q3")) aSugg.add(_getConjWithTags(sInfi, tTags, ":PQ", ":Q4")) aSugg.discard("") # if there is only one past participle (epi inv), unreliable. if len(aSugg) == 1: aSugg.clear() return aSugg def getConjSimilInfiV1 (sInfi): "returns verbal forms phonetically similar to infinitive form (for verb in group 1)" if sInfi not in _dVerb: return set() |
︙ | ︙ |
Modified gc_lang/fr/modules/conj_data.py from [bcb17f9b98] to [f9502006bf].
cannot compute difference between binary files