Overview
Comment: | [build][core][fr][misc] phonet token, new syntax |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | fr | core | build | misc |
Files: | files | file ages | folders |
SHA3-256: |
98cbf77aeff1786b8832a40f9b8e0096 |
User & Date: | olr on 2020-11-14 11:08:39 |
Other Links: | manifest | tags |
Context
2020-11-14
| ||
13:46 | [core][fr] phonet: better code for isSimilAs() check-in: 5f68edd979 user: olr tags: trunk, fr, core | |
11:08 | [build][core][fr][misc] phonet token, new syntax check-in: 98cbf77aef user: olr tags: trunk, fr, core, build, misc | |
2020-11-13
| ||
19:28 | [fr] affixes: màj check-in: 45248d2762 user: olr tags: trunk, fr | |
Changes
Modified darg.py from [9b17f8d6af] to [f98928fa4d].
︙ | |||
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 | 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 | + + + + + | def getNodeAsDict (self): "returns the node as a dictionary structure" dNode = {} dReValue = {} # regex for token values dReMorph = {} # regex for morph dMorph = {} # simple search in morph dLemma = {} dPhonet = {} dMeta = {} dTag = {} dRule = {} for sArc, oNode in self.dArcs.items(): if sArc.startswith("@") and len(sArc) > 1: dReMorph[sArc[1:]] = oNode.__hash__() elif sArc.startswith("$") and len(sArc) > 1: dMorph[sArc[1:]] = oNode.__hash__() elif sArc.startswith("~") and len(sArc) > 1: dReValue[sArc[1:]] = oNode.__hash__() elif sArc.startswith(">") and len(sArc) > 1: dLemma[sArc[1:]] = oNode.__hash__() elif sArc.startswith("%") and len(sArc) > 1: dPhonet[sArc[1:]] = oNode.__hash__() elif sArc.startswith("*") and len(sArc) > 1: dMeta[sArc[1:]] = oNode.__hash__() elif sArc.startswith("/") and len(sArc) > 1: dTag[sArc[1:]] = oNode.__hash__() elif sArc.startswith("##"): dRule[sArc[1:]] = oNode.__hash__() else: dNode[sArc] = oNode.__hash__() if dReValue: dNode["<re_value>"] = dReValue if dReMorph: dNode["<re_morph>"] = dReMorph if dMorph: dNode["<morph>"] = dMorph if dLemma: dNode["<lemmas>"] = dLemma if dPhonet: dNode["<phonet>"] = dPhonet if dTag: dNode["<tags>"] = dTag if dMeta: dNode["<meta>"] = dMeta if dRule: dNode["<rules>"] = dRule #if self.bFinal: # dNode["<final>"] = 1 return dNode |
Modified gc_core/js/lang_core/gc_engine.js from [2769c1001e] to [2d8b7e8dc3].
︙ | |||
474 475 476 477 478 479 480 481 482 483 484 485 486 487 | 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 | + + + + + + + + + + + + + + + + + + + + + + + + + + | if (bDebug) { console.log(" MATCH: >" + sLemma); } yield { "iToken1": iToken1, "iNode": oNode["<lemmas>"][sLemma] }; bTokenFound = true; } } } // phonetic similarity if (oNode.hasOwnProperty("<phonet>")) { for (let sPhonet in oNode["<phonet>"]) { if (sPhonet.endsWith("!")) { let sPhon = sPhonet.slice(0,-1); if (oToken["sValue"] == sPhon) { continue; } if (oToken["sValue"].slice(0,1).gl_isUpperCase()) { if (oToken["sValue"].toLowerCase() == sPhon) { continue; } if (oToken["sValue"].gl_isUpperCase() && oToken["sValue"].gl_toCapitalize() == sPhon) { continue; } } } if (phonet.isSimilAs(oToken["sValue"], sPhonet.gl_trimRight("!"))) { if (bDebug) { console.log(" MATCH: %" + sPhonet); } yield { "iToken1": iToken1, "iNode": oNode["<phonet>"][sPhonet] }; bTokenFound = true; } } } // morph arcs if (oNode.hasOwnProperty("<morph>")) { let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : gc_engine.oSpellChecker.getMorph(oToken["sValue"]); if (lMorph.length > 0) { for (let sSearch in oNode["<morph>"]) { if (!sSearch.includes("¬")) { |
︙ |
Modified gc_core/py/lang_core/gc_engine.py from [957964b063] to [f9382d41bb].
︙ | |||
11 12 13 14 15 16 17 18 19 20 21 22 23 24 | 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | + | from ..graphspell.spellchecker import SpellChecker from ..graphspell.echo import echo from .. import text from . import gc_functions from . import gc_options from . import phonet try: # LibreOffice / OpenOffice from com.sun.star.linguistic2 import SingleProofreadingError from com.sun.star.text.TextMarkupType import PROOFREADING from com.sun.star.beans import PropertyValue #import lightproof_handler_${implname} as opt |
︙ | |||
457 458 459 460 461 462 463 464 465 466 467 468 469 470 | 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 | + + + + + + + + + + + + + + + + + | if "<lemmas>" in dNode: for sLemma in _oSpellChecker.getLemma(dToken["sValue"]): if sLemma in dNode["<lemmas>"]: if bDebug: echo(" MATCH: >" + sLemma) yield { "iToken1": iToken1, "iNode": dNode["<lemmas>"][sLemma] } bTokenFound = True # phonetic similarity if "<phonet>" in dNode: for sPhonet in dNode["<phonet>"]: if sPhonet.endswith("!"): sPhon = sPhonet[0:-1] if dToken["sValue"] == sPhon: continue if dToken["sValue"][0:1].isupper(): if dToken["sValue"].lower() == sPhon: continue if dToken["sValue"].isupper() and dToken["sValue"].capitalize() == sPhon: continue if phonet.isSimilAs(dToken["sValue"], sPhonet.rstrip("!")): if bDebug: echo(" MATCH: %" + sPhonet) yield { "iToken1": iToken1, "iNode": dNode["<phonet>"][sPhonet] } bTokenFound = True # morph arcs if "<morph>" in dNode: lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"])) if lMorph: for sSearch in dNode["<morph>"]: if "¬" not in sSearch: # no anti-pattern |
︙ |
Modified gc_lang/fr/config.ini from [9e7bd75963] to [4aa81ead77].
1 2 3 4 5 6 7 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | - + | [args] lang = fr lang_name = French locales = fr_FR fr_BE fr_CA fr_CH fr_LU fr_BF fr_BJ fr_CD fr_CI fr_CM fr_MA fr_ML fr_MU fr_NE fr_RE fr_SN fr_TG country_default = FR name = Grammalecte implname = grammalecte # always use 3 numbers for version: x.y.z |
︙ |
Modified gc_lang/fr/modules-js/phonet.js from [0562e85836] to [62385b8be1].
︙ | |||
25 26 27 28 29 30 31 | 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | - + | } catch (e) { console.error(e); } }, hasSimil: function (sWord, sPattern=null) { |
︙ | |||
48 49 50 51 52 53 54 | 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 | - + - + + + + + + + + + + + + + + + + + + + + + + + + | return true; } } return false; }, getSimil: function (sWord) { |
︙ |
Modified gc_lang/fr/modules/phonet.py from [df9f884192] to [a7ff873dfd].
︙ | |||
8 9 10 11 12 13 14 | 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | - + - + - + + + + + + + + + + + + + + + + + + | from .phonet_data import dWord as _dWord from .phonet_data import lSet as _lSet from .phonet_data import dMorph as _dMorph def hasSimil (sWord, sPattern=None): |
Modified gc_lang/fr/rules.grx from [0adc5875f6] to [a9ce3440c8].
︙ | |||
12255 12256 12257 12258 12259 12260 12261 12262 12263 12264 12265 12266 12267 12268 | 12255 12256 12257 12258 12259 12260 12261 12262 12263 12264 12265 12266 12267 12268 12269 12270 12271 12272 12273 12274 12275 12276 12277 12278 12279 12280 12281 12282 12283 12284 | + + + + + + + + + + + + + + + + | __conf_tandis_que__ tendis [que|qu’] <<- /conf/ not value(<1, "|je|tu|il|elle|iel|on|ne|n’|le|la|les|l’|me|m’|te|t’|se|s’|") ->> tandis \2 && Confusion probable. Écrivez “tandis que” s’il s’agit bien de la locution conjonctive exprimant concomitance ou opposition.|https://fr.wiktionary.org/wiki/tandis_que TEST: mais {{tendis que}} le policier examinait nos papiers ->> tandis que # tard / tare __conf_tard_tare__ il >être ?$:W¿ %tard! [se|s’] >faire %tard! [me|m’|te|t’|se|s’] >lever ?$:W¿ %tard! [quelque+s|un] temps plus %tard! <<- /conf/ --1>> tard && Confusion. Pour dire que le temps a passé, écrivez “tard”.|https://fr.wiktionary.org/wiki/tard TEST: il est trop {{tare}} ->> tard TEST: quelque temps plus {{tares}} ->> tard TEST: s’fait {{tare}} ->> tard TEST: quelque temps plus tard TEST: QUELQUE TEMPS PLUS TARD TEST: Quelque Temps Plus Tard # taule / tôle __conf_taule_tôle1__ [>taule] [de|d’|en] [>acier|>alu|>aluminium|>bardage|>cuivre|>étanchéité|>fer|>festonnage|inox|>laiton|>métal|>trapèze|>zinc|>éverite|>fibrociment|>fibro-ciment|>plastique|>polycarbonate|PVC] <<- /conf/ -1>> =\1.replace("au", "ô").replace("AU", "Ô") && Confusion. La taule est la forme argotique pour évoquer la prison, le bordel ou toute forme d’habitation. TEST: une {{taule}} en acier |
︙ |
Modified misc/grammalecte.sublime-color-scheme from [e0092a90a2] to [c24fa9f267].
1 2 3 4 5 6 7 8 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | - + | { "name": "Grammalecte Color Scheme", "globals": { "background": "hsl(210, 20%, 15%)", "foreground": "hsl(210, 20%, 95%)", "caret": "hsl(210, 20%, 80%)", "block_caret": "red", |
︙ | |||
64 65 66 67 68 69 70 71 72 73 74 75 76 77 | 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | + | { "name": "Entity Valid", "scope": "entity.valid", "foreground": "hsl(150, 100%, 80%)", "background": "hsl(150, 100%, 20%)", "font_style": "bold", }, { "name": "Entity Invalid", "scope": "entity.invalid", "foreground": "hsl(0, 100%, 80%)", "background": "hsl(0, 100%, 20%)", "font_style": "bold", }, { "name": "Token meta", "scope": "string.meta", "foreground": "hsl(270, 100%, 90%)", "background": "hsl(270, 100%, 40%)", }, { "name": "Token token", "scope": "string.token", "foreground": "hsl(240, 50%, 90%)", "background": "hsl(240, 50%, 40%)", }, { "name": "Token Jumptoken", "scope": "string.jumptoken", "foreground": "hsl(0, 50%, 90%)", "background": "hsl(10, 50%, 40%)", }, { "name": "Token lemma", "scope": "string.lemma", "foreground": "hsl(210, 100%, 80%)", "background": "hsl(210, 100%, 15%)", }, { "name": "Token phonet", "scope": "string.phonet", "foreground": "hsl(90, 100%, 80%)", "background": "hsl(90, 100%, 10%)", }, { "name": "Token tag", "scope": "string.tag", "foreground": "hsl(30, 100%, 90%)", "background": "hsl(30, 100%, 20%)", }, { "name": "Token regex", "scope": "string.regex", "foreground": "hsl(60, 100%, 80%)", "background": "hsl(60, 100%, 10%)", }, { "name": "Token morph regex", "scope": "string.morph.regex", "foreground": "hsl(150, 80%, 90%)", "background": "hsl(150, 80%, 10%)", }, { "name": "Token morph negregex", "scope": "string.morph.negregex","foreground": "hsl(0, 80%, 90%)", "background": "hsl(0, 80%, 10%)", }, { "name": "Keyword Python", "scope": "keyword.python", "foreground": "#A0A0A0", }, |
︙ |
Modified misc/grammalecte.sublime-syntax from [c9d3e55815] to [90c3fa5c9d].
︙ | |||
56 57 58 59 60 61 62 | 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | - + | # other. - match: '\b(?:if|else|and|or|not|in)\b' scope: keyword.python - match: '\b(?:True|False|None)\b' scope: constant.language |
︙ | |||
149 150 151 152 153 154 155 156 157 158 159 160 161 162 | 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 | + + + + + | 1: entity.tag.group # Tokens - match: '(>)[\w-]+' scope: string.lemma captures: 1: entity.valid - match: '(%)[\w-]+' scope: string.phonet captures: 1: entity.valid - match: '(~)(?!(?:\d+(?::\d+|)|)>>)[^\s¬]*' scope: string.regex captures: 1: entity.valid - match: '(@)([^@\s¬]*)' |
︙ |