Index: gc_core/js/helpers.js ================================================================== --- gc_core/js/helpers.js +++ gc_core/js/helpers.js @@ -30,10 +30,19 @@ } else { console.error(sMsg); } } +function inspect (o) { + let sMsg = "__inspect__: " + typeof o; + for (let sParam in o) { + sMsg += "\n" + sParam + ": " + o.sParam; + } + sMsg += "\n" + JSON.stringify(o) + "\n__end__"; + echo(sMsg); +} + // load ressources in workers (suggested by Mozilla extensions reviewers) // for more options have a look here: https://gist.github.com/Noitidart/ec1e6b9a593ec7e3efed // if not in workers, use sdk/data.load() instead function loadFile (spf) { @@ -78,11 +87,12 @@ obj[k] = v; } return obj; } +exports.setLogOutput = setLogOutput; exports.echo = echo; exports.logerror = logerror; +exports.inspect = inspect; exports.objectToMap = objectToMap; exports.mapToObject = mapToObject; -exports.setLogOutput = setLogOutput; exports.loadFile = loadFile; Index: gc_core/js/tokenizer.js ================================================================== --- gc_core/js/tokenizer.js +++ gc_core/js/tokenizer.js @@ -3,17 +3,16 @@ "use strict"; const helpers = require("resource://grammalecte/helpers.js"); - const aPatterns = { // All regexps must start with ^. "default": [ [/^[   \t]+/, 'SPACE'], - [/^[,.;:!?…«»“”"()/·]+/, 'SEPARATOR'], + [/^[,.;:!?…«»“”‘’"(){}\[\]/·–—]+/, 'SEPARATOR'], [/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_]+[@.])[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.\/?&!%=+*"'@$#-]+/, 'LINK'], [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'], [/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+ *>/, 'HTML'], [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'], [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'], @@ -22,19 +21,19 @@ [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+)*/, 'WORD'] ], "fr": [ [/^[   \t]+/, 'SPACE'], - [/^[,.;:!?…«»“”"()/·]+/, 'SEPARATOR'], + [/^[,.;:!?…«»“”‘’"(){}\[\]/·–—]+/, 'SEPARATOR'], [/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_]+[@.])[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.\/?&!%=+*"'@$#-]+/, 'LINK'], [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'], [/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+ *>/, 'HTML'], [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'], [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'], [/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`]/i, 'ELPFX'], [/^\d\d?[hm]\d\d\b/, 'HOUR'], - [/^\d+(?:er|nd|e|de|ième|ème|eme)\b/, 'ORDINAL'], + [/^\d+(?:er|nd|e|de|ième|ème|eme)s?\b/, 'ORDINAL'], [/^-?\d+(?:[.,]\d+|)/, 'NUM'], [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+)*/, 'WORD'] ] } @@ -44,11 +43,11 @@ constructor (sLang) { this.sLang = sLang; if (!aPatterns.hasOwnProperty(sLang)) { this.sLang = "default"; } - this.aRules = aPatterns[sLang]; + this.aRules = aPatterns[this.sLang]; }; * genTokens (sText) { let m; let i = 0; @@ -67,9 +66,19 @@ } } i += nCut; sText = sText.slice(nCut); } + }; + + getSpellingErrors (sText, oDict) { + let aSpellErr = []; + for (let oToken of this.genTokens(sText)) { + if (oToken.sType === 'WORD' && !oDict.isValidToken(oToken.sValue)) { + aSpellErr.push(oToken); + } + } + return aSpellErr; } } exports.Tokenizer = Tokenizer; Index: gc_lang/fr/config.ini ================================================================== --- gc_lang/fr/config.ini +++ gc_lang/fr/config.ini @@ -3,11 +3,11 @@ lang_name = French locales = fr_FR fr_BE fr_CA fr_CH fr_LU fr_MC fr_BF fr_CI fr_SN fr_ML fr_NE fr_TG fr_BJ country_default = FR name = Grammalecte implname = grammalecte -version = 0.5.17.2 +version = 0.5.18 author = Olivier R. provider = Dicollecte link = http://grammalecte.net description = Correcteur grammatical pour le français. extras = README_fr.txt Index: gc_lang/fr/modules-js/lexicographe.js ================================================================== --- gc_lang/fr/modules-js/lexicographe.js +++ gc_lang/fr/modules-js/lexicographe.js @@ -2,13 +2,15 @@ // License: MPL 2 "use strict"; ${string} +${map} const helpers = require("resource://grammalecte/helpers.js"); +const tkz = require("resource://grammalecte/tokenizer.js"); const _dTAGS = new Map ([ [':G', "[mot grammatical]"], [':N', " nom,"], @@ -155,10 +157,42 @@ ["m'en", " (me) pronom personnel objet + (en) pronom adverbial"], ["t'en", " (te) pronom personnel objet + (en) pronom adverbial"], ["s'en", " (se) pronom personnel objet + (en) pronom adverbial"] ]); +const _dSeparator = new Map ([ + ['.', "point"], + ['·', "point médian"], + ['…', "points de suspension"], + [':', "deux-points"], + [';', "point-virgule"], + [',', "virgule"], + ['?', "point d’interrogation"], + ['!', "point d’exclamation"], + ['(', "parenthèse ouvrante"], + [')', "parenthèse fermante"], + ['[', "crochet ouvrante"], + [']', "crochet fermante"], + ['{', "accolade ouvrante"], + ['}', "accolade fermante"], + ['-', "tiret"], + ['—', "tiret cadratin"], + ['–', "tiret demi-cadratin"], + ['«', "guillemet ouvrant (chevrons)"], + ['»', "guillemet fermant (chevrons)"], + ['“', "guillemet ouvrant double"], + ['”', "guillemet fermant double"], + ['‘', "guillemet ouvrant"], + ['’', "guillemet fermant"], + ['/', "signe de la division"], + ['+', "signe de l’addition"], + ['*', "signe de la multiplication"], + ['=', "signe de l’égalité"], + ['<', "inférieur à"], + ['>', "supérieur à"], +]); + class Lexicographe { constructor (oDict) { this.oDict = oDict; @@ -165,24 +199,74 @@ this._zElidedPrefix = new RegExp ("^([dljmtsncç]|quoiqu|lorsqu|jusqu|puisqu|qu)['’](.+)", "i"); this._zCompoundWord = new RegExp ("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)-((?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts][’'](?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous)$", "i"); this._zTag = new RegExp ("[:;/][a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ*][^:;/]*", "g"); }; - analyzeText (sText) { + getInfoForToken (oToken) { + // Token: .sType, .sValue, .nStart, .nEnd + // return a list [type, token_string, values] + let m = null; + try { + helpers.echo(oToken); + switch (oToken.sType) { + case 'SEPARATOR': + return { sType: oToken.sType, sValue: oToken.sValue, aLabel: [_dSeparator._get(oToken.sValue, "caractère indéterminé")] }; + break; + case 'NUM': + return { sType: oToken.sType, sValue: oToken.sValue, aLabel: ["nombre"] }; + break; + case 'LINK': + return { sType: oToken.sType, sValue: oToken.sValue.slice(0,40)+"…", aLabel: ["hyperlien"] }; + break; + case 'ELPFX': + let sTemp = oToken.sValue.replace("’", "").replace("'", "").replace("`", "").toLowerCase(); + return { sType: oToken.sType, sValue: oToken.sValue, aLabel: [_dPFX._get(sTemp, "préfixe élidé inconnu")] }; + break; + case 'WORD': + if (oToken.sValue._count("-") > 4) { + return { sType: "COMPLEX", sValue: oToken.sValue, aLabel: ["élément complexe indéterminé"] }; + } + else if (this.oDict.isValidToken(oToken.sValue)) { + let lMorph = this.oDict.getMorph(oToken.sValue); + let aElem = [ for (s of lMorph) if (s.includes(":")) this._formatTags(s) ]; + return { sType: oToken.sType, sValue: oToken.sValue, aLabel: aElem}; + } + else if (m = this._zCompoundWord.exec(oToken.sValue)) { + // mots composés + let lMorph = this.oDict.getMorph(m[1]); + let aElem = [ for (s of lMorph) if (s.includes(":")) this._formatTags(s) ]; + aElem.push("-" + m[2] + ": " + this._formatSuffix(m[2].toLowerCase())); + return { sType: oToken.sType, sValue: oToken.sValue, aLabel: aElem }; + } + else { + return { sType: "UNKNOWN", sValue: oToken.sValue, aLabel: ["inconnu du dictionnaire"] }; + } + break; + } + } + catch (e) { + helpers.logerror(e); + } + return null; + }; + + getHTMLForText (sText) { + // deprecated sText = sText.replace(/[.,.?!:;…\/()\[\]“”«»"„{}–—#+*<>%=\n]/g, " ").replace(/\s+/g, " "); let iStart = 0; let iEnd = 0; let sHtml = '
\n'; while ((iEnd = sText.indexOf(" ", iStart)) !== -1) { - sHtml += this.analyzeWord(sText.slice(iStart, iEnd)); + sHtml += this.getHTMLForToken(sText.slice(iStart, iEnd)); iStart = iEnd + 1; } - sHtml += this.analyzeWord(sText.slice(iStart)); + sHtml += this.getHTMLForToken(sText.slice(iStart)); return sHtml + '
\n'; - } + }; - analyzeWord (sWord) { + getHTMLForToken (sWord) { + // deprecated try { if (!sWord) { return ""; } if (sWord._count("-") > 4) { @@ -205,13 +289,13 @@ sWord = m2[1]; } // Morphologies let lMorph = this.oDict.getMorph(sWord); if (lMorph.length === 1) { - sHtml += "

" + sWord + " : " + this.formatTags(lMorph[0]) + "

\n"; + sHtml += "

" + sWord + " : " + this._formatTags(lMorph[0]) + "

\n"; } else if (lMorph.length > 1) { - sHtml += "

" + sWord + "

\n"; + sHtml += "

" + sWord + "

\n"; } else { sHtml += '

' + sWord + " : absent du dictionnaire

\n"; } // suffixe d’un mot composé if (m2) { @@ -225,11 +309,11 @@ helpers.logerror(e); return "#erreur"; } }; - formatTags (sTags) { + _formatTags (sTags) { let sRes = ""; sTags = sTags.replace(/V([0-3][ea]?)[itpqnmr_eaxz]+/, "V$1"); let m; while ((m = this._zTag.exec(sTags)) !== null) { sRes += _dTAGS.get(m[0]); Index: gc_lang/fr/xpi/data/lxg_panel.css ================================================================== --- gc_lang/fr/xpi/data/lxg_panel.css +++ gc_lang/fr/xpi/data/lxg_panel.css @@ -40,28 +40,43 @@ color: hsla(0, 0%, 96%, 1); border-radius: 5px; text-align: center; font-size: 20px; } +#wordlist .token { + margin: 8px; +} #wordlist ul { margin: 0 0 5px 40px; } #wordlist b { - background-color: hsla(150, 50%, 50%, 1); + background-color: hsla(150, 10%, 50%, 1); color: hsla(0, 0%, 96%, 1); padding: 2px 5px; border-radius: 2px; text-decoration: none; } -#wordlist b.unknown { +#wordlist b.WORD { + background-color: hsla(150, 50%, 50%, 1); +} +#wordlist b.ELPFX { + background-color: hsla(150, 30%, 50%, 1); +} +#wordlist b.UNKNOWN { background-color: hsla(0, 50%, 50%, 1); } -#wordlist b.nb { +#wordlist b.NUM { + background-color: hsla(180, 50%, 50%, 1); +} +#wordlist b.COMPLEX { + background-color: hsla(60, 50%, 50%, 1); +} +#wordlist b.SEPARATOR { background-color: hsla(210, 50%, 50%, 1); } -#wordlist b.mbok { - background-color: hsla(60, 50%, 50%, 1); +#wordlist b.LINK { + background-color: hsla(270, 50%, 50%, 1); } #wordlist s { color: hsla(0, 0%, 60%, 1); text-decoration: none; } Index: gc_lang/fr/xpi/data/lxg_panel.js ================================================================== --- gc_lang/fr/xpi/data/lxg_panel.js +++ gc_lang/fr/xpi/data/lxg_panel.js @@ -25,22 +25,19 @@ self.port.emit('openConjugueur'); }); */ self.port.on("addSeparator", function (sText) { - if (document.getElementById("wordlist").innerHTML !== "") { - let xElem = document.createElement("p"); - xElem.className = "separator"; - xElem.innerHTML = sText; - document.getElementById("wordlist").appendChild(xElem); - } + addSeparator(sText); +}); + +self.port.on("addParagraphElems", function (sJSON) { + addParagraphElems(sJSON); }); -self.port.on("addElem", function (sHtml) { - let xElem = document.createElement("div"); - xElem.innerHTML = sHtml; - document.getElementById("wordlist").appendChild(xElem); +self.port.on("addMessage", function (sClass, sText) { + addMessage(sClass, sText); }); self.port.on("clear", function (sHtml) { document.getElementById("wordlist").textContent = ""; }); @@ -65,10 +62,71 @@ } }, false ); + +/* + Actions +*/ + +function addSeparator (sText) { + if (document.getElementById("wordlist").textContent !== "") { + let xElem = document.createElement("p"); + xElem.className = "separator"; + xElem.textContent = sText; + document.getElementById("wordlist").appendChild(xElem); + } +} + +function addMessage (sClass, sText) { + let xNode = document.createElement("p"); + xNode.className = sClass; + xNode.textContent = sText; + document.getElementById("wordlist").appendChild(xNode); +} + +function addParagraphElems (sJSON) { + try { + let xNodeDiv = document.createElement("div"); + xNodeDiv.className = "paragraph"; + let lElem = JSON.parse(sJSON); + for (let oToken of lElem) { + xNodeDiv.appendChild(createTokenNode(oToken)); + } + document.getElementById("wordlist").appendChild(xNodeDiv); + } + catch (e) { + console.error("\n" + e.fileName + "\n" + e.name + "\nline: " + e.lineNumber + "\n" + e.message); + console.error(sJSON); + } +} + +function createTokenNode (oToken) { + let xTokenNode = document.createElement("div"); + xTokenNode.className = "token " + oToken.sType; + let xTokenValue = document.createElement("b"); + xTokenValue.className = oToken.sType; + xTokenValue.textContent = oToken.sValue; + xTokenNode.appendChild(xTokenValue); + let xSep = document.createElement("s"); + xSep.textContent = " : "; + xTokenNode.appendChild(xSep); + if (oToken.aLabel.length === 1) { + xTokenNode.appendChild(document.createTextNode(oToken.aLabel[0])); + } else { + let xTokenList = document.createElement("ul"); + for (let sLabel of oToken.aLabel) { + let xTokenLine = document.createElement("li"); + xTokenLine.textContent = sLabel; + xTokenList.appendChild(xTokenLine); + } + xTokenNode.appendChild(xTokenList); + } + return xTokenNode; +} + // display selection function displayClasses () { setHidden("ok", document.getElementById("ok").checked); Index: gc_lang/fr/xpi/gce_worker.js ================================================================== --- gc_lang/fr/xpi/gce_worker.js +++ gc_lang/fr/xpi/gce_worker.js @@ -84,28 +84,18 @@ return JSON.stringify(aGrammErr); } function parseAndSpellcheck (sText, sLang, bDebug, bContext) { let aGrammErr = gce.parse(sText, sLang, bDebug, bContext); - let aSpellErr = []; - for (let oToken of oTokenizer.genTokens(sText)) { - if (oToken.sType === 'WORD' && !oDict.isValidToken(oToken.sValue)) { - aSpellErr.push(oToken); - } - } + let aSpellErr = oTokenizer.getSpellingErrors(sText, oDict); return JSON.stringify({ aGrammErr: aGrammErr, aSpellErr: aSpellErr }); } function parseAndTag (sText, iParagraph, sLang, bDebug) { sText = text.addHtmlEntities(sText); - let aSpellErr = []; - for (let oToken of oTokenizer.genTokens(sText)) { - if (oToken.sType === 'WORD' && !oDict.isValidToken(oToken.sValue)) { - aSpellErr.push(oToken); - } - } let aGrammErr = gce.parse(sText, sLang, bDebug); + let aSpellErr = oTokenizer.getSpellingErrors(sText, oDict); let sHtml = text.tagParagraph(sText, iParagraph, aGrammErr, aSpellErr); return sHtml; } function parseAndGenerateParagraph (sText, iParagraph, sLang, bDebug) { @@ -155,8 +145,21 @@ } // Lexicographer -function analyzeWords (sText) { - return oLxg.analyzeText(sText); +function getListOfElements (sText) { + try { + let aElem = []; + let aRes = null; + for (let oToken of oTokenizer.genTokens(sText)) { + aRes = oLxg.getInfoForToken(oToken); + if (aRes) { + aElem = aElem.concat(aRes); + } + } + return JSON.stringify(aElem); + } + catch (e) { + helpers.logerror(e); + } } Index: gc_lang/fr/xpi/package.json ================================================================== --- gc_lang/fr/xpi/package.json +++ gc_lang/fr/xpi/package.json @@ -1,10 +1,10 @@ { "name": "grammalecte-fr", "title": "Grammalecte [fr]", "id": "French-GC@grammalecte.net", - "version": "0.5.17.2", + "version": "0.5.18", "description": "Correcteur grammatical pour le français", "homepage": "http://www.dicollecte.org/grammalecte", "main": "ui.js", "icon": "data/img/icon-48.png", "scripts": { Index: gc_lang/fr/xpi/ui.js ================================================================== --- gc_lang/fr/xpi/ui.js +++ gc_lang/fr/xpi/ui.js @@ -569,19 +569,19 @@ let nParagraph = 0; // non empty paragraphs let sRes = ""; try { for (let sParagraph of text.getParagraph(sText)) { if (sParagraph.trim() !== "") { - sRes = await xGCEWorker.post('analyzeWords', [sParagraph]) - xLxgPanel.port.emit("addElem", sRes); + sRes = await xGCEWorker.post('getListOfElements', [sParagraph]); + xLxgPanel.port.emit("addParagraphElems", sRes); nParagraph += 1; } } - xLxgPanel.port.emit("addElem", '

' + _("numberOfParagraphs") + " " + nParagraph + '

'); + xLxgPanel.port.emit("addMessage", 'message', _("numberOfParagraphs") + " " + nParagraph); } catch (e) { - xLxgPanel.port.emit("addElem", '

'+e.message+"

"); + xLxgPanel.port.emit("addMessage", 'bug', e.message); } xLxgPanel.port.emit("stopWaitIcon"); }