Index: gc_core/js/helpers.js
==================================================================
--- gc_core/js/helpers.js
+++ gc_core/js/helpers.js
@@ -30,10 +30,19 @@
} else {
console.error(sMsg);
}
}
+function inspect (o) {
+ let sMsg = "__inspect__: " + typeof o;
+ for (let sParam in o) {
+ sMsg += "\n" + sParam + ": " + o.sParam;
+ }
+ sMsg += "\n" + JSON.stringify(o) + "\n__end__";
+ echo(sMsg);
+}
+
// load ressources in workers (suggested by Mozilla extensions reviewers)
// for more options have a look here: https://gist.github.com/Noitidart/ec1e6b9a593ec7e3efed
// if not in workers, use sdk/data.load() instead
function loadFile (spf) {
@@ -78,11 +87,12 @@
obj[k] = v;
}
return obj;
}
+exports.setLogOutput = setLogOutput;
exports.echo = echo;
exports.logerror = logerror;
+exports.inspect = inspect;
exports.objectToMap = objectToMap;
exports.mapToObject = mapToObject;
-exports.setLogOutput = setLogOutput;
exports.loadFile = loadFile;
Index: gc_core/js/tokenizer.js
==================================================================
--- gc_core/js/tokenizer.js
+++ gc_core/js/tokenizer.js
@@ -3,17 +3,16 @@
"use strict";
const helpers = require("resource://grammalecte/helpers.js");
-
const aPatterns = {
// All regexps must start with ^.
"default":
[
[/^[ \t]+/, 'SPACE'],
- [/^[,.;:!?…«»“”"()/·]+/, 'SEPARATOR'],
+ [/^[,.;:!?…«»“”‘’"(){}\[\]/·–—]+/, 'SEPARATOR'],
[/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_]+[@.])[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
[/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'],
[/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+ *>/, 'HTML'],
[/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'],
[/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
@@ -22,19 +21,19 @@
[/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+)*/, 'WORD']
],
"fr":
[
[/^[ \t]+/, 'SPACE'],
- [/^[,.;:!?…«»“”"()/·]+/, 'SEPARATOR'],
+ [/^[,.;:!?…«»“”‘’"(){}\[\]/·–—]+/, 'SEPARATOR'],
[/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_]+[@.])[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
[/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'],
[/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+ *>/, 'HTML'],
[/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'],
[/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
[/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`]/i, 'ELPFX'],
[/^\d\d?[hm]\d\d\b/, 'HOUR'],
- [/^\d+(?:er|nd|e|de|ième|ème|eme)\b/, 'ORDINAL'],
+ [/^\d+(?:er|nd|e|de|ième|ème|eme)s?\b/, 'ORDINAL'],
[/^-?\d+(?:[.,]\d+|)/, 'NUM'],
[/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+)*/, 'WORD']
]
}
@@ -44,11 +43,11 @@
constructor (sLang) {
this.sLang = sLang;
if (!aPatterns.hasOwnProperty(sLang)) {
this.sLang = "default";
}
- this.aRules = aPatterns[sLang];
+ this.aRules = aPatterns[this.sLang];
};
* genTokens (sText) {
let m;
let i = 0;
@@ -55,11 +54,17 @@
while (sText) {
let nCut = 1;
for (let [zRegex, sType] of this.aRules) {
try {
if ((m = zRegex.exec(sText)) !== null) {
- yield { "sType": sType, "sValue": m[0], "nStart": i, "nEnd": i + m[0].length }
+ if (sType == 'SEPARATOR') {
+ for (let c of m[0]) {
+ yield { "sType": sType, "sValue": c, "nStart": i, "nEnd": i + m[0].length }
+ }
+ } else {
+ yield { "sType": sType, "sValue": m[0], "nStart": i, "nEnd": i + m[0].length }
+ }
nCut = m[0].length;
break;
}
}
catch (e) {
@@ -67,9 +72,19 @@
}
}
i += nCut;
sText = sText.slice(nCut);
}
+ };
+
+ getSpellingErrors (sText, oDict) {
+ let aSpellErr = [];
+ for (let oToken of this.genTokens(sText)) {
+ if (oToken.sType === 'WORD' && !oDict.isValidToken(oToken.sValue)) {
+ aSpellErr.push(oToken);
+ }
+ }
+ return aSpellErr;
}
}
exports.Tokenizer = Tokenizer;
Index: gc_lang/fr/config.ini
==================================================================
--- gc_lang/fr/config.ini
+++ gc_lang/fr/config.ini
@@ -3,11 +3,11 @@
lang_name = French
locales = fr_FR fr_BE fr_CA fr_CH fr_LU fr_MC fr_BF fr_CI fr_SN fr_ML fr_NE fr_TG fr_BJ
country_default = FR
name = Grammalecte
implname = grammalecte
-version = 0.5.17.2
+version = 0.5.18
author = Olivier R.
provider = Dicollecte
link = http://grammalecte.net
description = Correcteur grammatical pour le français.
extras = README_fr.txt
Index: gc_lang/fr/modules-js/lexicographe.js
==================================================================
--- gc_lang/fr/modules-js/lexicographe.js
+++ gc_lang/fr/modules-js/lexicographe.js
@@ -2,13 +2,15 @@
// License: MPL 2
"use strict";
${string}
+${map}
const helpers = require("resource://grammalecte/helpers.js");
+const tkz = require("resource://grammalecte/tokenizer.js");
const _dTAGS = new Map ([
[':G', "[mot grammatical]"],
[':N', " nom,"],
@@ -155,10 +157,42 @@
["m'en", " (me) pronom personnel objet + (en) pronom adverbial"],
["t'en", " (te) pronom personnel objet + (en) pronom adverbial"],
["s'en", " (se) pronom personnel objet + (en) pronom adverbial"]
]);
+const _dSeparator = new Map ([
+ ['.', "point"],
+ ['·', "point médian"],
+ ['…', "points de suspension"],
+ [':', "deux-points"],
+ [';', "point-virgule"],
+ [',', "virgule"],
+ ['?', "point d’interrogation"],
+ ['!', "point d’exclamation"],
+ ['(', "parenthèse ouvrante"],
+ [')', "parenthèse fermante"],
+ ['[', "crochet ouvrante"],
+ [']', "crochet fermante"],
+ ['{', "accolade ouvrante"],
+ ['}', "accolade fermante"],
+ ['-', "tiret"],
+ ['—', "tiret cadratin"],
+ ['–', "tiret demi-cadratin"],
+ ['«', "guillemet ouvrant (chevrons)"],
+ ['»', "guillemet fermant (chevrons)"],
+ ['“', "guillemet ouvrant double"],
+ ['”', "guillemet fermant double"],
+ ['‘', "guillemet ouvrant"],
+ ['’', "guillemet fermant"],
+ ['/', "signe de la division"],
+ ['+', "signe de l’addition"],
+ ['*', "signe de la multiplication"],
+ ['=', "signe de l’égalité"],
+ ['<', "inférieur à"],
+ ['>', "supérieur à"],
+]);
+
class Lexicographe {
constructor (oDict) {
this.oDict = oDict;
@@ -165,71 +199,58 @@
this._zElidedPrefix = new RegExp ("^([dljmtsncç]|quoiqu|lorsqu|jusqu|puisqu|qu)['’](.+)", "i");
this._zCompoundWord = new RegExp ("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)-((?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts][’'](?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous)$", "i");
this._zTag = new RegExp ("[:;/][a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ*][^:;/]*", "g");
};
- analyzeText (sText) {
- sText = sText.replace(/[.,.?!:;…\/()\[\]“”«»"„{}–—#+*<>%=\n]/g, " ").replace(/\s+/g, " ");
- let iStart = 0;
- let iEnd = 0;
- let sHtml = '
\n';
- while ((iEnd = sText.indexOf(" ", iStart)) !== -1) {
- sHtml += this.analyzeWord(sText.slice(iStart, iEnd));
- iStart = iEnd + 1;
- }
- sHtml += this.analyzeWord(sText.slice(iStart));
- return sHtml + '
\n';
- }
-
- analyzeWord (sWord) {
+ getInfoForToken (oToken) {
+ // Token: .sType, .sValue, .nStart, .nEnd
+ // return a list [type, token_string, values]
+ let m = null;
try {
- if (!sWord) {
- return "";
- }
- if (sWord._count("-") > 4) {
- return '' + sWord + " : élément complexe indéterminé
\n";
- }
- if (sWord._isDigit()) {
- return '' + sWord + " : nombre
\n";
- }
-
- let sHtml = "";
- // préfixes élidés
- let m = this._zElidedPrefix.exec(sWord);
- if (m !== null) {
- sWord = m[2];
- sHtml += "" + m[1] + "’ : " + _dPFX.get(m[1].toLowerCase()) + "
\n";
- }
- // mots composés
- let m2 = this._zCompoundWord.exec(sWord);
- if (m2 !== null) {
- sWord = m2[1];
- }
- // Morphologies
- let lMorph = this.oDict.getMorph(sWord);
- if (lMorph.length === 1) {
- sHtml += "" + sWord + " : " + this.formatTags(lMorph[0]) + "
\n";
- } else if (lMorph.length > 1) {
- sHtml += "" + sWord + "
- " + [for (s of lMorph) if (s.includes(":")) this.formatTags(s)].join("
- ") + "
\n";
- } else {
- sHtml += '' + sWord + " : absent du dictionnaire
\n";
- }
- // suffixe d’un mot composé
- if (m2) {
- sHtml += "
-" + m2[2] + " : " + this._formatSuffix(m2[2].toLowerCase()) + "
\n";
- }
- // Verbes
- //let aVerb = new Set([ for (s of lMorph) if (s.includes(":V")) s.slice(1, s.indexOf(" ")) ]);
- return sHtml;
+ switch (oToken.sType) {
+ case 'SEPARATOR':
+ return { sType: oToken.sType, sValue: oToken.sValue, aLabel: [_dSeparator._get(oToken.sValue, "caractère indéterminé")] };
+ break;
+ case 'NUM':
+ return { sType: oToken.sType, sValue: oToken.sValue, aLabel: ["nombre"] };
+ break;
+ case 'LINK':
+ return { sType: oToken.sType, sValue: oToken.sValue.slice(0,40)+"…", aLabel: ["hyperlien"] };
+ break;
+ case 'ELPFX':
+ let sTemp = oToken.sValue.replace("’", "").replace("'", "").replace("`", "").toLowerCase();
+ return { sType: oToken.sType, sValue: oToken.sValue, aLabel: [_dPFX._get(sTemp, "préfixe élidé inconnu")] };
+ break;
+ case 'WORD':
+ if (oToken.sValue._count("-") > 4) {
+ return { sType: "COMPLEX", sValue: oToken.sValue, aLabel: ["élément complexe indéterminé"] };
+ }
+ else if (this.oDict.isValidToken(oToken.sValue)) {
+ let lMorph = this.oDict.getMorph(oToken.sValue);
+ let aElem = [ for (s of lMorph) if (s.includes(":")) this._formatTags(s) ];
+ return { sType: oToken.sType, sValue: oToken.sValue, aLabel: aElem};
+ }
+ else if (m = this._zCompoundWord.exec(oToken.sValue)) {
+ // mots composés
+ let lMorph = this.oDict.getMorph(m[1]);
+ let aElem = [ for (s of lMorph) if (s.includes(":")) this._formatTags(s) ];
+ aElem.push("-" + m[2] + ": " + this._formatSuffix(m[2].toLowerCase()));
+ return { sType: oToken.sType, sValue: oToken.sValue, aLabel: aElem };
+ }
+ else {
+ return { sType: "UNKNOWN", sValue: oToken.sValue, aLabel: ["inconnu du dictionnaire"] };
+ }
+ break;
+ }
}
catch (e) {
helpers.logerror(e);
- return "#erreur";
}
+ return null;
};
- formatTags (sTags) {
+ _formatTags (sTags) {
let sRes = "";
sTags = sTags.replace(/V([0-3][ea]?)[itpqnmr_eaxz]+/, "V$1");
let m;
while ((m = this._zTag.exec(sTags)) !== null) {
sRes += _dTAGS.get(m[0]);
Index: gc_lang/fr/xpi/data/lxg_panel.css
==================================================================
--- gc_lang/fr/xpi/data/lxg_panel.css
+++ gc_lang/fr/xpi/data/lxg_panel.css
@@ -40,28 +40,43 @@
color: hsla(0, 0%, 96%, 1);
border-radius: 5px;
text-align: center;
font-size: 20px;
}
+#wordlist .token {
+ margin: 8px;
+}
#wordlist ul {
margin: 0 0 5px 40px;
}
#wordlist b {
- background-color: hsla(150, 50%, 50%, 1);
+ background-color: hsla(150, 10%, 50%, 1);
color: hsla(0, 0%, 96%, 1);
padding: 2px 5px;
border-radius: 2px;
text-decoration: none;
}
-#wordlist b.unknown {
+#wordlist b.WORD {
+ background-color: hsla(150, 50%, 50%, 1);
+}
+#wordlist b.ELPFX {
+ background-color: hsla(150, 30%, 50%, 1);
+}
+#wordlist b.UNKNOWN {
background-color: hsla(0, 50%, 50%, 1);
}
-#wordlist b.nb {
+#wordlist b.NUM {
+ background-color: hsla(180, 50%, 50%, 1);
+}
+#wordlist b.COMPLEX {
+ background-color: hsla(60, 50%, 50%, 1);
+}
+#wordlist b.SEPARATOR {
background-color: hsla(210, 50%, 50%, 1);
}
-#wordlist b.mbok {
- background-color: hsla(60, 50%, 50%, 1);
+#wordlist b.LINK {
+ background-color: hsla(270, 50%, 50%, 1);
}
#wordlist s {
color: hsla(0, 0%, 60%, 1);
text-decoration: none;
}
Index: gc_lang/fr/xpi/data/lxg_panel.js
==================================================================
--- gc_lang/fr/xpi/data/lxg_panel.js
+++ gc_lang/fr/xpi/data/lxg_panel.js
@@ -25,22 +25,19 @@
self.port.emit('openConjugueur');
});
*/
self.port.on("addSeparator", function (sText) {
- if (document.getElementById("wordlist").innerHTML !== "") {
- let xElem = document.createElement("p");
- xElem.className = "separator";
- xElem.innerHTML = sText;
- document.getElementById("wordlist").appendChild(xElem);
- }
+ addSeparator(sText);
+});
+
+self.port.on("addParagraphElems", function (sJSON) {
+ addParagraphElems(sJSON);
});
-self.port.on("addElem", function (sHtml) {
- let xElem = document.createElement("div");
- xElem.innerHTML = sHtml;
- document.getElementById("wordlist").appendChild(xElem);
+self.port.on("addMessage", function (sClass, sText) {
+ addMessage(sClass, sText);
});
self.port.on("clear", function (sHtml) {
document.getElementById("wordlist").textContent = "";
});
@@ -65,10 +62,71 @@
}
},
false
);
+
+/*
+ Actions
+*/
+
+function addSeparator (sText) {
+ if (document.getElementById("wordlist").textContent !== "") {
+ let xElem = document.createElement("p");
+ xElem.className = "separator";
+ xElem.textContent = sText;
+ document.getElementById("wordlist").appendChild(xElem);
+ }
+}
+
+function addMessage (sClass, sText) {
+ let xNode = document.createElement("p");
+ xNode.className = sClass;
+ xNode.textContent = sText;
+ document.getElementById("wordlist").appendChild(xNode);
+}
+
+function addParagraphElems (sJSON) {
+ try {
+ let xNodeDiv = document.createElement("div");
+ xNodeDiv.className = "paragraph";
+ let lElem = JSON.parse(sJSON);
+ for (let oToken of lElem) {
+ xNodeDiv.appendChild(createTokenNode(oToken));
+ }
+ document.getElementById("wordlist").appendChild(xNodeDiv);
+ }
+ catch (e) {
+ console.error("\n" + e.fileName + "\n" + e.name + "\nline: " + e.lineNumber + "\n" + e.message);
+ console.error(sJSON);
+ }
+}
+
+function createTokenNode (oToken) {
+ let xTokenNode = document.createElement("div");
+ xTokenNode.className = "token " + oToken.sType;
+ let xTokenValue = document.createElement("b");
+ xTokenValue.className = oToken.sType;
+ xTokenValue.textContent = oToken.sValue;
+ xTokenNode.appendChild(xTokenValue);
+ let xSep = document.createElement("s");
+ xSep.textContent = " : ";
+ xTokenNode.appendChild(xSep);
+ if (oToken.aLabel.length === 1) {
+ xTokenNode.appendChild(document.createTextNode(oToken.aLabel[0]));
+ } else {
+ let xTokenList = document.createElement("ul");
+ for (let sLabel of oToken.aLabel) {
+ let xTokenLine = document.createElement("li");
+ xTokenLine.textContent = sLabel;
+ xTokenList.appendChild(xTokenLine);
+ }
+ xTokenNode.appendChild(xTokenList);
+ }
+ return xTokenNode;
+}
+
// display selection
function displayClasses () {
setHidden("ok", document.getElementById("ok").checked);
Index: gc_lang/fr/xpi/gce_worker.js
==================================================================
--- gc_lang/fr/xpi/gce_worker.js
+++ gc_lang/fr/xpi/gce_worker.js
@@ -84,28 +84,18 @@
return JSON.stringify(aGrammErr);
}
function parseAndSpellcheck (sText, sLang, bDebug, bContext) {
let aGrammErr = gce.parse(sText, sLang, bDebug, bContext);
- let aSpellErr = [];
- for (let oToken of oTokenizer.genTokens(sText)) {
- if (oToken.sType === 'WORD' && !oDict.isValidToken(oToken.sValue)) {
- aSpellErr.push(oToken);
- }
- }
+ let aSpellErr = oTokenizer.getSpellingErrors(sText, oDict);
return JSON.stringify({ aGrammErr: aGrammErr, aSpellErr: aSpellErr });
}
function parseAndTag (sText, iParagraph, sLang, bDebug) {
sText = text.addHtmlEntities(sText);
- let aSpellErr = [];
- for (let oToken of oTokenizer.genTokens(sText)) {
- if (oToken.sType === 'WORD' && !oDict.isValidToken(oToken.sValue)) {
- aSpellErr.push(oToken);
- }
- }
let aGrammErr = gce.parse(sText, sLang, bDebug);
+ let aSpellErr = oTokenizer.getSpellingErrors(sText, oDict);
let sHtml = text.tagParagraph(sText, iParagraph, aGrammErr, aSpellErr);
return sHtml;
}
function parseAndGenerateParagraph (sText, iParagraph, sLang, bDebug) {
@@ -155,8 +145,21 @@
}
// Lexicographer
-function analyzeWords (sText) {
- return oLxg.analyzeText(sText);
+function getListOfElements (sText) {
+ try {
+ let aElem = [];
+ let aRes = null;
+ for (let oToken of oTokenizer.genTokens(sText)) {
+ aRes = oLxg.getInfoForToken(oToken);
+ if (aRes) {
+ aElem.push(aRes);
+ }
+ }
+ return JSON.stringify(aElem);
+ }
+ catch (e) {
+ helpers.logerror(e);
+ }
}
Index: gc_lang/fr/xpi/package.json
==================================================================
--- gc_lang/fr/xpi/package.json
+++ gc_lang/fr/xpi/package.json
@@ -1,10 +1,10 @@
{
"name": "grammalecte-fr",
"title": "Grammalecte [fr]",
"id": "French-GC@grammalecte.net",
- "version": "0.5.17.2",
+ "version": "0.5.18",
"description": "Correcteur grammatical pour le français",
"homepage": "http://www.dicollecte.org/grammalecte",
"main": "ui.js",
"icon": "data/img/icon-48.png",
"scripts": {
Index: gc_lang/fr/xpi/ui.js
==================================================================
--- gc_lang/fr/xpi/ui.js
+++ gc_lang/fr/xpi/ui.js
@@ -569,19 +569,19 @@
let nParagraph = 0; // non empty paragraphs
let sRes = "";
try {
for (let sParagraph of text.getParagraph(sText)) {
if (sParagraph.trim() !== "") {
- sRes = await xGCEWorker.post('analyzeWords', [sParagraph])
- xLxgPanel.port.emit("addElem", sRes);
+ sRes = await xGCEWorker.post('getListOfElements', [sParagraph]);
+ xLxgPanel.port.emit("addParagraphElems", sRes);
nParagraph += 1;
}
}
- xLxgPanel.port.emit("addElem", '' + _("numberOfParagraphs") + " " + nParagraph + '
');
+ xLxgPanel.port.emit("addMessage", 'message', _("numberOfParagraphs") + " " + nParagraph);
}
catch (e) {
- xLxgPanel.port.emit("addElem", ''+e.message+"
");
+ xLxgPanel.port.emit("addMessage", 'bug', e.message);
}
xLxgPanel.port.emit("stopWaitIcon");
}