Index: graphspell-js/lexgraph_fr.js ================================================================== --- graphspell-js/lexgraph_fr.js +++ graphspell-js/lexgraph_fr.js @@ -367,11 +367,11 @@ _zPartDemForm: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)-(là|ci)$", "i"), _aPartDemExceptList: new Set(["celui", "celle", "ceux", "celles", "de", "jusque", "par", "marie-couche-toi"]), _zInterroVerb: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$", "i"), _zImperatifVerb: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)(-(?:l(?:es?|a)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts]['’ʼ‘‛´`′‵՚ꞌꞋ](?:y|en)|les?|la|[mt]oi|leur|lui))$", "i"), - _zTag: new RegExp("[:;/][a-zA-Z0-9ÑÂĴĈŔÔṼŴ!][^:;/]*", "g"), + _zTag: new RegExp("[:;/][a-zA-Z0-9É@*!][^:;/]*", "g"), split: function (sWord) { // returns an arry of strings (prefix, trimed_word, suffix) let sPrefix = ""; let sSuffix = ""; @@ -403,27 +403,42 @@ readableMorph: function (sMorph) { if (!sMorph) { return " mot inconnu"; } let sRes = ""; - sMorph = sMorph.replace(/:V([0-3][ea_])[itpqnmr_eaxz]+/, ":V$1"); + let sVType = ""; + if (sMorph.includes(":V")) { + sMorph = sMorph.replace(/:V([0-3][ea_])[itpqnmr_eaxz]+/, ":V$1"); + } + if (sMorph.includes(":Q")) { + let nVerbTag = sMorph.indexOf(":V") + sVType = sMorph.slice(nVerbTag, nVerbTag+4); + sMorph = sMorph.replace(/:V[0123]./, "").replace(/:1[ŝś]/, ""); + } let m; while ((m = this._zTag.exec(sMorph)) !== null) { - if (this.dTag.has(m[0])) { - sRes += this.dTag.get(m[0])[0]; + sRes += this._readableTag(m[0]); + } + if ((sRes.startsWith(" verbe") && !sRes.includes("infinitif")) || sRes.startsWith(" participe")) { + if (sVType) { + sRes += " [" + sMorph.slice(1, sMorph.indexOf("/")) + " : " + this._readableTag(sVType).gl_trimRight(",") + "]"; } else { - sRes += " [" + m[0] + "]?"; + sRes += " [" + sMorph.slice(1, sMorph.indexOf("/")) + "]"; } } - if (sRes.startsWith(" verbe") && !sRes.includes("infinitif")) { - sRes += " [" + sMorph.slice(1, sMorph.indexOf("/")) + "]"; - } if (!sRes) { return " [" + sMorph + "]: étiquettes inconnues"; } return sRes.gl_trimRight(","); }, + + _readableTag: function (sTag) { + if (this.dTag.has(sTag)) { + return this.dTag.get(sTag)[0]; + } + return " [" + sTag + "]?"; + }, setLabelsOnToken (oToken) { // Token: .sType, .sValue, .nStart, .nEnd, .lMorph let m = null; try { Index: graphspell/lexgraph_fr.py ================================================================== --- graphspell/lexgraph_fr.py +++ graphspell/lexgraph_fr.py @@ -371,11 +371,11 @@ } _zElidedPrefix = re.compile("(?i)^([ldmtsnjcç]|lorsqu|presqu|jusqu|puisqu|quoiqu|quelqu|qu)[’'‘`ʼ]([\\w-]+)") _zCompoundWord = re.compile("(?i)(\\w+)(-(?:(?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts]’(?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous|ce))$") -_zTag = re.compile("[:;/][\\w*][^:;/]*") +_zTag = re.compile("[:;/][\\w@*!][^:;/]*") def split (sWord): "split word in 3 parts: prefix, root, suffix" sPrefix = "" sSuffix = "" @@ -403,22 +403,34 @@ def readableMorph (sMorph): "returns string: readable tags" if not sMorph: return "mot inconnu" sRes = "" - sMorph = re.sub("(?<=V[0123][ea_])[itpqnmr_eaxz]+", "", sMorph) + sVType = "" + if ":V" in sMorph: + sMorph = re.sub("(?<=V[0123][ea_])[itpqnmr_eaxz]+", "", sMorph) + if ":Q" in sMorph: + nVerbTag = sMorph.find(":V") + sVType = sMorph[nVerbTag:nVerbTag+4] + sMorph = sMorph[4:].replace(":1ŝ", "").replace(":1ś", "") for m in _zTag.finditer(sMorph): - if m.group(0) in _dTAGS: - sRes += _dTAGS[m.group(0)][0] + sRes += _readableTag(m.group(0)) + if sRes.startswith((" verbe", " participe")) and not sRes.endswith("infinitif"): + if sVType: + sRes += " [" + sMorph[1:sMorph.find("/")] + " : " + _readableTag(sVType).rstrip(",") + "]" else: - sRes += " [" + m.group(0) + "]?" - if sRes.startswith(" verbe") and not sRes.endswith("infinitif"): - sRes += " [" + sMorph[1:sMorph.find("/")] +"]" + sRes += " [" + sMorph[1:sMorph.find("/")] + "]" if not sRes: return " [" + sMorph + "]: étiquettes inconnues" return sRes.rstrip(",") +def _readableTag (sTag): + "returns string: readable tag" + if sTag in _dTAGS: + return _dTAGS[sTag][0] + return " [" + sTag + "]?" + _zPartDemForm = re.compile("([\\w]+)-(là|ci)$") _zInterroVerb = re.compile("([\\w]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$") _zImperatifVerb = re.compile("([\\w]+)(-(?:l(?:es?|a)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts]['’ʼ‘‛´`′‵՚ꞌꞋ](?:y|en)|les?|la|[mt]oi|leur|lui))$")