Overview
Comment: | [graphspell] lexicographer: better readbility for past participle |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | graphspell |
Files: | files | file ages | folders |
SHA3-256: |
9377402874b68007e8d5f2c6c7738774 |
User & Date: | olr on 2020-11-19 18:40:57 |
Other Links: | manifest | tags |
Context
2020-11-19
| ||
23:48 | [fr] ajustements check-in: 9b10c609d3 user: olr tags: trunk, fr | |
18:40 | [graphspell] lexicographer: better readbility for past participle check-in: 9377402874 user: olr tags: trunk, graphspell | |
18:38 | [fr] ajustements check-in: 38b9862aab user: olr tags: trunk, fr | |
Changes
Modified graphspell-js/lexgraph_fr.js from [d137f983fd] to [56d17d9958].
︙ | ︙ | |||
365 366 367 368 369 370 371 | ['‰', "signe pour mille"], ]), _zPartDemForm: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)-(là|ci)$", "i"), _aPartDemExceptList: new Set(["celui", "celle", "ceux", "celles", "de", "jusque", "par", "marie-couche-toi"]), _zInterroVerb: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$", "i"), _zImperatifVerb: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)(-(?:l(?:es?|a)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts]['’ʼ‘‛´`′‵՚ꞌꞋ](?:y|en)|les?|la|[mt]oi|leur|lui))$", "i"), | | | 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 | ['‰', "signe pour mille"], ]), _zPartDemForm: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)-(là|ci)$", "i"), _aPartDemExceptList: new Set(["celui", "celle", "ceux", "celles", "de", "jusque", "par", "marie-couche-toi"]), _zInterroVerb: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$", "i"), _zImperatifVerb: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)(-(?:l(?:es?|a)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts]['’ʼ‘‛´`′‵՚ꞌꞋ](?:y|en)|les?|la|[mt]oi|leur|lui))$", "i"), _zTag: new RegExp("[:;/][a-zA-Z0-9É@*!][^:;/]*", "g"), split: function (sWord) { // returns an arry of strings (prefix, trimed_word, suffix) let sPrefix = ""; let sSuffix = ""; // préfixe élidé let m = /^([ldmtsnjcç]|lorsqu|presqu|jusqu|puisqu|quoiqu|quelqu|qu)['’ʼ‘‛´`′‵՚ꞌꞋ]([a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]+)/i.exec(sWord); |
︙ | ︙ | |||
401 402 403 404 405 406 407 | }, readableMorph: function (sMorph) { if (!sMorph) { return " mot inconnu"; } let sRes = ""; | > > | > > > > > > < | < < | < | > > > | | > > > > > > > > | 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 | }, readableMorph: function (sMorph) { if (!sMorph) { return " mot inconnu"; } let sRes = ""; let sVType = ""; if (sMorph.includes(":V")) { sMorph = sMorph.replace(/:V([0-3][ea_])[itpqnmr_eaxz]+/, ":V$1"); } if (sMorph.includes(":Q")) { let nVerbTag = sMorph.indexOf(":V") sVType = sMorph.slice(nVerbTag, nVerbTag+4); sMorph = sMorph.replace(/:V[0123]./, "").replace(/:1[ŝś]/, ""); } let m; while ((m = this._zTag.exec(sMorph)) !== null) { sRes += this._readableTag(m[0]); } if ((sRes.startsWith(" verbe") && !sRes.includes("infinitif")) || sRes.startsWith(" participe")) { if (sVType) { sRes += " [" + sMorph.slice(1, sMorph.indexOf("/")) + " : " + this._readableTag(sVType).gl_trimRight(",") + "]"; } else { sRes += " [" + sMorph.slice(1, sMorph.indexOf("/")) + "]"; } } if (!sRes) { return " [" + sMorph + "]: étiquettes inconnues"; } return sRes.gl_trimRight(","); }, _readableTag: function (sTag) { if (this.dTag.has(sTag)) { return this.dTag.get(sTag)[0]; } return " [" + sTag + "]?"; }, setLabelsOnToken (oToken) { // Token: .sType, .sValue, .nStart, .nEnd, .lMorph let m = null; try { switch (oToken.sType) { case 'PUNC': |
︙ | ︙ |
Modified graphspell/lexgraph_fr.py from [24bc88a28a] to [851691ac43].
︙ | ︙ | |||
369 370 371 372 373 374 375 | '%': "signe de pourcentage", '‰': "signe pour mille" } _zElidedPrefix = re.compile("(?i)^([ldmtsnjcç]|lorsqu|presqu|jusqu|puisqu|quoiqu|quelqu|qu)[’'‘`ʼ]([\\w-]+)") _zCompoundWord = re.compile("(?i)(\\w+)(-(?:(?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts]’(?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous|ce))$") | | | 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 | '%': "signe de pourcentage", '‰': "signe pour mille" } _zElidedPrefix = re.compile("(?i)^([ldmtsnjcç]|lorsqu|presqu|jusqu|puisqu|quoiqu|quelqu|qu)[’'‘`ʼ]([\\w-]+)") _zCompoundWord = re.compile("(?i)(\\w+)(-(?:(?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts]’(?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous|ce))$") _zTag = re.compile("[:;/][\\w@*!][^:;/]*") def split (sWord): "split word in 3 parts: prefix, root, suffix" sPrefix = "" sSuffix = "" # préfixe élidé m = _zElidedPrefix.match(sWord) |
︙ | ︙ | |||
401 402 403 404 405 406 407 | def readableMorph (sMorph): "returns string: readable tags" if not sMorph: return "mot inconnu" sRes = "" | > > | > > > > | > > | < < | > > > > > > | 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 | def readableMorph (sMorph): "returns string: readable tags" if not sMorph: return "mot inconnu" sRes = "" sVType = "" if ":V" in sMorph: sMorph = re.sub("(?<=V[0123][ea_])[itpqnmr_eaxz]+", "", sMorph) if ":Q" in sMorph: nVerbTag = sMorph.find(":V") sVType = sMorph[nVerbTag:nVerbTag+4] sMorph = sMorph[4:].replace(":1ŝ", "").replace(":1ś", "") for m in _zTag.finditer(sMorph): sRes += _readableTag(m.group(0)) if sRes.startswith((" verbe", " participe")) and not sRes.endswith("infinitif"): if sVType: sRes += " [" + sMorph[1:sMorph.find("/")] + " : " + _readableTag(sVType).rstrip(",") + "]" else: sRes += " [" + sMorph[1:sMorph.find("/")] + "]" if not sRes: return " [" + sMorph + "]: étiquettes inconnues" return sRes.rstrip(",") def _readableTag (sTag): "returns string: readable tag" if sTag in _dTAGS: return _dTAGS[sTag][0] return " [" + sTag + "]?" _zPartDemForm = re.compile("([\\w]+)-(là|ci)$") _zInterroVerb = re.compile("([\\w]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$") _zImperatifVerb = re.compile("([\\w]+)(-(?:l(?:es?|a)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts]['’ʼ‘‛´`′‵՚ꞌꞋ](?:y|en)|les?|la|[mt]oi|leur|lui))$") def setLabelsOnToken (dToken): # Token: .sType, .sValue, .nStart, .nEnd, .lMorph |
︙ | ︙ |