Index: darg.py ================================================================== --- darg.py +++ darg.py @@ -218,10 +218,11 @@ dNode = {} dReValue = {} # regex for token values dReMorph = {} # regex for morph dMorph = {} # simple search in morph dLemma = {} + dPhonet = {} dMeta = {} dTag = {} dRule = {} for sArc, oNode in self.dArcs.items(): if sArc.startswith("@") and len(sArc) > 1: @@ -230,10 +231,12 @@ dMorph[sArc[1:]] = oNode.__hash__() elif sArc.startswith("~") and len(sArc) > 1: dReValue[sArc[1:]] = oNode.__hash__() elif sArc.startswith(">") and len(sArc) > 1: dLemma[sArc[1:]] = oNode.__hash__() + elif sArc.startswith("%") and len(sArc) > 1: + dPhonet[sArc[1:]] = oNode.__hash__() elif sArc.startswith("*") and len(sArc) > 1: dMeta[sArc[1:]] = oNode.__hash__() elif sArc.startswith("/") and len(sArc) > 1: dTag[sArc[1:]] = oNode.__hash__() elif sArc.startswith("##"): @@ -246,14 +249,16 @@ dNode[""] = dReMorph if dMorph: dNode[""] = dMorph if dLemma: dNode[""] = dLemma + if dPhonet: + dNode[""] = dPhonet if dTag: dNode[""] = dTag if dMeta: dNode[""] = dMeta if dRule: dNode[""] = dRule #if self.bFinal: # dNode[""] = 1 return dNode Index: gc_core/js/lang_core/gc_engine.js ================================================================== --- gc_core/js/lang_core/gc_engine.js +++ gc_core/js/lang_core/gc_engine.js @@ -476,10 +476,36 @@ } yield { "iToken1": iToken1, "iNode": oNode[""][sLemma] }; bTokenFound = true; } } + } + // phonetic similarity + if (oNode.hasOwnProperty("")) { + for (let sPhonet in oNode[""]) { + if (sPhonet.endsWith("!")) { + let sPhon = sPhonet.slice(0,-1); + if (oToken["sValue"] == sPhon) { + continue; + } + if (oToken["sValue"].slice(0,1).gl_isUpperCase()) { + if (oToken["sValue"].toLowerCase() == sPhon) { + continue; + } + if (oToken["sValue"].gl_isUpperCase() && oToken["sValue"].gl_toCapitalize() == sPhon) { + continue; + } + } + } + if (phonet.isSimilAs(oToken["sValue"], sPhonet.gl_trimRight("!"))) { + if (bDebug) { + console.log(" MATCH: %" + sPhonet); + } + yield { "iToken1": iToken1, "iNode": oNode[""][sPhonet] }; + bTokenFound = true; + } + } } // morph arcs if (oNode.hasOwnProperty("")) { let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : gc_engine.oSpellChecker.getMorph(oToken["sValue"]); if (lMorph.length > 0) { Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -13,10 +13,11 @@ from .. import text from . import gc_functions from . import gc_options +from . import phonet try: # LibreOffice / OpenOffice from com.sun.star.linguistic2 import SingleProofreadingError from com.sun.star.text.TextMarkupType import PROOFREADING @@ -459,10 +460,27 @@ if sLemma in dNode[""]: if bDebug: echo(" MATCH: >" + sLemma) yield { "iToken1": iToken1, "iNode": dNode[""][sLemma] } bTokenFound = True + # phonetic similarity + if "" in dNode: + for sPhonet in dNode[""]: + if sPhonet.endswith("!"): + sPhon = sPhonet[0:-1] + if dToken["sValue"] == sPhon: + continue + if dToken["sValue"][0:1].isupper(): + if dToken["sValue"].lower() == sPhon: + continue + if dToken["sValue"].isupper() and dToken["sValue"].capitalize() == sPhon: + continue + if phonet.isSimilAs(dToken["sValue"], sPhonet.rstrip("!")): + if bDebug: + echo(" MATCH: %" + sPhonet) + yield { "iToken1": iToken1, "iNode": dNode[""][sPhonet] } + bTokenFound = True # morph arcs if "" in dNode: lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"])) if lMorph: for sSearch in dNode[""]: Index: gc_lang/fr/config.ini ================================================================== --- gc_lang/fr/config.ini +++ gc_lang/fr/config.ini @@ -4,11 +4,11 @@ locales = fr_FR fr_BE fr_CA fr_CH fr_LU fr_BF fr_BJ fr_CD fr_CI fr_CM fr_MA fr_ML fr_MU fr_NE fr_RE fr_SN fr_TG country_default = FR name = Grammalecte implname = grammalecte # always use 3 numbers for version: x.y.z -version = 1.12.2 +version = 2.0.0 author = Olivier R. provider = Grammalecte.net link = https://grammalecte.net description = Correcteur grammatical, orthographique et typographique pour le français. extras = README_fr.txt Index: gc_lang/fr/modules-js/phonet.js ================================================================== --- gc_lang/fr/modules-js/phonet.js +++ gc_lang/fr/modules-js/phonet.js @@ -27,11 +27,11 @@ console.error(e); } }, hasSimil: function (sWord, sPattern=null) { - // return True if there is list of words phonetically similar to sWord + // return True if there is list of words phonetically similar to if (!sWord) { return false; } if (this._dWord.has(sWord)) { if (sPattern) { @@ -50,11 +50,11 @@ } return false; }, getSimil: function (sWord) { - // return list of words phonetically similar to sWord + // return list of words phonetically similar to if (!sWord) { return []; } if (this._dWord.has(sWord)) { return this._lSet[this._dWord.get(sWord)]; @@ -67,11 +67,11 @@ } return []; }, selectSimil: function (sWord, sPattern) { - // return a set of words phonetically similar to sWord and whom POS is matching sPattern + // return a set of words phonetically similar to and whom POS is matching if (!sPattern) { return new Set(this.getSimil(sWord)); } let aSelect = new Set(); for (let sSimil of this.getSimil(sWord)) { @@ -80,10 +80,33 @@ aSelect.add(sSimil); } } } return aSelect; + }, + + isSimilAs: function (sWord, sSimil) { + // return True if phonetically similar to ( tested with several casing) + if (!sWord) { + return false; + } + let lSimils = this.getSimil(sSimil); + if (lSimils.length == 0) { + return false; + } + if (lSimils.includes(sWord)) { + return true; + } + if (sWord.slice(0,1).gl_isUpperCase()) { + if (lSimils.includes(sWord.toLowerCase())) { + return true; + } + if (sWord.gl_isUpperCase() && lSimils.includes(sWord.gl_toCapitalize())) { + return true; + } + } + return false; } }; // Initialization Index: gc_lang/fr/modules/phonet.py ================================================================== --- gc_lang/fr/modules/phonet.py +++ gc_lang/fr/modules/phonet.py @@ -10,11 +10,11 @@ from .phonet_data import lSet as _lSet from .phonet_data import dMorph as _dMorph def hasSimil (sWord, sPattern=None): - "return True if there is list of words phonetically similar to sWord" + "return True if there is list of words phonetically similar to " if not sWord: return False if sWord in _dWord: if sPattern: return any(re.search(sPattern, sMorph) for sSimil in getSimil(sWord) for sMorph in _dMorph.get(sSimil, [])) @@ -27,11 +27,11 @@ return True return False def getSimil (sWord): - "return list of words phonetically similar to sWord" + "return list of words phonetically similar to " if not sWord: return [] if sWord in _dWord: return _lSet[_dWord[sWord]] if sWord[0:1].isupper(): @@ -40,14 +40,31 @@ return _lSet[_dWord[sWord]] return [] def selectSimil (sWord, sPattern): - "return a set of words phonetically similar to sWord and whom POS is matching sPattern" + "return a set of words phonetically similar to and whom POS is matching " if not sPattern: return set(getSimil(sWord)) aSelect = set() for sSimil in getSimil(sWord): for sMorph in _dMorph.get(sSimil, []): if re.search(sPattern, sMorph): aSelect.add(sSimil) return aSelect + + +def isSimilAs (sWord, sSimil): + "return True if phonetically similar to ( tested with several casing)" + if not sWord: + return False + lSimils = getSimil(sSimil) + if not lSimils: + return False + if sWord in lSimils: + return True + if sWord[0:1].isupper(): + if sWord.lower() in lSimils: + return True + if sWord.isupper() and sWord.capitalize() in lSimils: + return True + return False Index: gc_lang/fr/rules.grx ================================================================== --- gc_lang/fr/rules.grx +++ gc_lang/fr/rules.grx @@ -12257,10 +12257,26 @@ <<- /conf/ not value(<1, "|je|tu|il|elle|iel|on|ne|n’|le|la|les|l’|me|m’|te|t’|se|s’|") ->> tandis \2 && Confusion probable. Écrivez “tandis que” s’il s’agit bien de la locution conjonctive exprimant concomitance ou opposition.|https://fr.wiktionary.org/wiki/tandis_que TEST: mais {{tendis que}} le policier examinait nos papiers ->> tandis que + +# tard / tare +__conf_tard_tare__ + il >être ?$:W¿ %tard! + [se|s’] >faire %tard! + [me|m’|te|t’|se|s’] >lever ?$:W¿ %tard! + [quelque+s|un] temps plus %tard! + <<- /conf/ --1>> tard && Confusion. Pour dire que le temps a passé, écrivez “tard”.|https://fr.wiktionary.org/wiki/tard + +TEST: il est trop {{tare}} ->> tard +TEST: quelque temps plus {{tares}} ->> tard +TEST: s’fait {{tare}} ->> tard +TEST: quelque temps plus tard +TEST: QUELQUE TEMPS PLUS TARD +TEST: Quelque Temps Plus Tard + # taule / tôle __conf_taule_tôle1__ [>taule] [de|d’|en] [>acier|>alu|>aluminium|>bardage|>cuivre|>étanchéité|>fer|>festonnage|inox|>laiton|>métal|>trapèze|>zinc|>éverite|>fibrociment|>fibro-ciment|>plastique|>polycarbonate|PVC] <<- /conf/ -1>> =\1.replace("au", "ô").replace("AU", "Ô") && Confusion. La taule est la forme argotique pour évoquer la prison, le bordel ou toute forme d’habitation. Index: misc/grammalecte.sublime-color-scheme ================================================================== --- misc/grammalecte.sublime-color-scheme +++ misc/grammalecte.sublime-color-scheme @@ -5,11 +5,11 @@ "background": "hsl(210, 20%, 15%)", "foreground": "hsl(210, 20%, 95%)", "caret": "hsl(210, 20%, 80%)", "block_caret": "red", - "line_highlight": "hsl(210, 60%, 25%)", + "line_highlight": "hsl(210, 60%, 30%)", "bracket_options": "underline bold", "selection": "hsl(210, 50%, 20%)", "selection_border": "hsl(210, 80%, 40%)", "selection_border_width": "1", @@ -66,10 +66,11 @@ { "name": "Entity Invalid", "scope": "entity.invalid", "foreground": "hsl(0, 100%, 80%)", "background": "hsl(0, 100%, 20%)", "font_style": "bold", }, { "name": "Token meta", "scope": "string.meta", "foreground": "hsl(270, 100%, 90%)", "background": "hsl(270, 100%, 40%)", }, { "name": "Token token", "scope": "string.token", "foreground": "hsl(240, 50%, 90%)", "background": "hsl(240, 50%, 40%)", }, { "name": "Token Jumptoken", "scope": "string.jumptoken", "foreground": "hsl(0, 50%, 90%)", "background": "hsl(10, 50%, 40%)", }, { "name": "Token lemma", "scope": "string.lemma", "foreground": "hsl(210, 100%, 80%)", "background": "hsl(210, 100%, 15%)", }, + { "name": "Token phonet", "scope": "string.phonet", "foreground": "hsl(90, 100%, 80%)", "background": "hsl(90, 100%, 10%)", }, { "name": "Token tag", "scope": "string.tag", "foreground": "hsl(30, 100%, 90%)", "background": "hsl(30, 100%, 20%)", }, { "name": "Token regex", "scope": "string.regex", "foreground": "hsl(60, 100%, 80%)", "background": "hsl(60, 100%, 10%)", }, { "name": "Token morph regex", "scope": "string.morph.regex", "foreground": "hsl(150, 80%, 90%)", "background": "hsl(150, 80%, 10%)", }, { "name": "Token morph negregex", "scope": "string.morph.negregex","foreground": "hsl(0, 80%, 90%)", "background": "hsl(0, 80%, 10%)", }, Index: misc/grammalecte.sublime-syntax ================================================================== --- misc/grammalecte.sublime-syntax +++ misc/grammalecte.sublime-syntax @@ -58,11 +58,11 @@ scope: keyword.python - match: '\b(?:True|False|None)\b' scope: constant.language - - match: '\b(?:spell|morph|morphVC|stem|tag|value|space_after|textarea0?\w*|before0?\w*|after0?\w*|word|option|define|define_from|select|exclude|analyse\w*|tag_\w+|apposition|is[A-Z]\w+|rewriteSubject|checkD\w+|getD\w+|has[A-Z]\w+|sugg[A-Z]\w+|switch[A-Z]\w+|ceOrCet|formatN\w+|mbUnit)\b' + - match: '\b(?:spell|morph|morphVC|stem|tag|value|space_after|textarea0?\w*|before0?\w*|after0?\w*|word|option|define|define_from|select|exclude|analyse\w*|tag_\w+|apposition|is[A-Z]\w+|checkAgreement|rewrite|checkD\w+|getD\w+|has[A-Z]\w+|sugg[A-Z]\w+|switch[A-Z]\w+|ceOrCet|formatN\w+|mbUnit)\b' scope: entity.name.function - match: '\b(?:replace|endswith|startswith|search|upper|lower|capitalize|strip|rstrip|is(?:alpha|upper|lower|digit|title))\b' scope: support.function @@ -151,10 +151,15 @@ # Tokens - match: '(>)[\w-]+' scope: string.lemma captures: 1: entity.valid + + - match: '(%)[\w-]+' + scope: string.phonet + captures: + 1: entity.valid - match: '(~)(?!(?:\d+(?::\d+|)|)>>)[^\s¬]*' scope: string.regex captures: 1: entity.valid