Index: gc_core/js/helpers.js ================================================================== --- gc_core/js/helpers.js +++ gc_core/js/helpers.js @@ -5,94 +5,94 @@ // In Firefox, there is no console.log in PromiseWorker, but there is worker.log. // In Thunderbird, you can’t access to console directly. So it’s required to pass a log function. let funcOutput = null; -function setLogOutput (func) { - funcOutput = func; -} - -function echo (obj) { - if (funcOutput !== null) { - funcOutput(obj); - } else { - console.log(obj); - } - return true; -} - -function logerror (e, bStack=false) { - let sMsg = "\n" + e.fileName + "\n" + e.name + "\nline: " + e.lineNumber + "\n" + e.message; - if (bStack) { - sMsg += "\n--- Stack ---\n" + e.stack; - } - if (funcOutput !== null) { - funcOutput(sMsg); - } else { - console.error(sMsg); - } -} - -function inspect (o) { - let sMsg = "__inspect__: " + typeof o; - for (let sParam in o) { - sMsg += "\n" + sParam + ": " + o.sParam; - } - sMsg += "\n" + JSON.stringify(o) + "\n__end__"; - echo(sMsg); -} - - -// load ressources in workers (suggested by Mozilla extensions reviewers) -// for more options have a look here: https://gist.github.com/Noitidart/ec1e6b9a593ec7e3efed -// if not in workers, use sdk/data.load() instead -function loadFile (spf) { - try { - let xRequest; - if (typeof XMLHttpRequest !== "undefined") { - xRequest = new XMLHttpRequest(); - } - else { - // JS bullshit again… necessary for Thunderbird - let { Cc, Ci } = require("chrome"); - xRequest = Cc["@mozilla.org/xmlextras/xmlhttprequest;1"].createInstance(); - xRequest.QueryInterface(Ci.nsIXMLHttpRequest); - } - xRequest.open('GET', spf, false); // 3rd arg is false for synchronous, sync is acceptable in workers - xRequest.send(); - return xRequest.responseText; - } - catch (e) { - logerror(e); - return null - } -} - - -// conversions -function objectToMap (obj) { - let m = new Map(); - for (let param in obj) { - //console.log(param + " " + obj[param]); - m.set(param, obj[param]); - } - return m; -} - -function mapToObject (m) { - let obj = {}; - for (let [k, v] of m) { - obj[k] = v; - } - return obj; -} +var helpers = { + + setLogOutput: function (func) { + funcOutput = func; + }, + + echo: function (obj) { + if (funcOutput !== null) { + funcOutput(obj); + } else { + console.log(obj); + } + return true; + }, + + logerror: function (e, bStack=false) { + let sMsg = "\n" + e.fileName + "\n" + e.name + "\nline: " + e.lineNumber + "\n" + e.message; + if (bStack) { + sMsg += "\n--- Stack ---\n" + e.stack; + } + if (funcOutput !== null) { + funcOutput(sMsg); + } else { + console.error(sMsg); + } + }, + + inspect: function (o) { + let sMsg = "__inspect__: " + typeof o; + for (let sParam in o) { + sMsg += "\n" + sParam + ": " + o.sParam; + } + sMsg += "\n" + JSON.stringify(o) + "\n__end__"; + this.echo(sMsg); + }, + + loadFile: function (spf) { + // load ressources in workers (suggested by Mozilla extensions reviewers) + // for more options have a look here: https://gist.github.com/Noitidart/ec1e6b9a593ec7e3efed + // if not in workers, use sdk/data.load() instead + try { + let xRequest; + if (typeof XMLHttpRequest !== "undefined") { + xRequest = new XMLHttpRequest(); + } else { + // JS bullshit again… necessary for Thunderbird + let { Cc, Ci } = require("chrome"); + xRequest = Cc["@mozilla.org/xmlextras/xmlhttprequest;1"].createInstance(); + xRequest.QueryInterface(Ci.nsIXMLHttpRequest); + } + xRequest.open('GET', spf, false); // 3rd arg is false for synchronous, sync is acceptable in workers + xRequest.send(); + return xRequest.responseText; + } + catch (e) { + this.logerror(e); + return null; + } + }, + + // conversions + objectToMap: function (obj) { + let m = new Map(); + for (let param in obj) { + //console.log(param + " " + obj[param]); + m.set(param, obj[param]); + } + return m; + }, + + mapToObject: function (m) { + let obj = {}; + for (let [k, v] of m) { + obj[k] = v; + } + return obj; + } +}; if (typeof(exports) !== 'undefined') { - exports.setLogOutput = setLogOutput; - exports.echo = echo; - exports.logerror = logerror; - exports.inspect = inspect; - exports.loadFile = loadFile; - exports.objectToMap = objectToMap; - exports.mapToObject = mapToObject; + exports.setLogOutput = helpers.setLogOutput; + exports.echo = helpers.echo; + exports.logerror = helpers.logerror; + exports.inspect = helpers.inspect; + exports.loadFile = helpers.loadFile; + exports.objectToMap = helpers.objectToMap; + exports.mapToObject = helpers.mapToObject; } Index: gc_core/js/ibdawg.js ================================================================== --- gc_core/js/ibdawg.js +++ gc_core/js/ibdawg.js @@ -1,11 +1,14 @@ //// IBDAWG "use strict"; -const st = require("resource://grammalecte/str_transform.js"); -const helpers = require("resource://grammalecte/helpers.js"); + +if (typeof(require) !== 'undefined') { + var str_transform = require("resource://grammalecte/str_transform.js"); + var helpers = require("resource://grammalecte/helpers.js"); +} // String // Don’t remove. Necessary in TB. ${string} @@ -13,16 +16,15 @@ class IBDAWG { // INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH - constructor (sDicName) { + constructor (sDicName, sPath="") { try { - const dict = JSON.parse(helpers.loadFile("resource://grammalecte/_dictionaries/"+sDicName)); + let sURL = (sPath !== "") ? sPath + "/" + sDicName : "resource://grammalecte/_dictionaries/"+sDicName; + const dict = JSON.parse(helpers.loadFile(sURL)); Object.assign(this, dict); - //const dict = require("resource://grammalecte/"+sLang+"/dictionary.js"); - //Object.assign(this, dict.dictionary); } catch (e) { throw Error("# Error. File not found or not loadable.\n" + e.message + "\n"); } /* @@ -39,15 +41,15 @@ this.dChar = helpers.objectToMap(this.dChar); //this.byDic = new Uint8Array(this.byDic); // not quicker, even slower if (this.cStemming == "S") { - this.funcStemming = st.getStemFromSuffixCode; + this.funcStemming = str_transform.getStemFromSuffixCode; } else if (this.cStemming == "A") { - this.funcStemming = st.getStemFromAffixCode; + this.funcStemming = str_transform.getStemFromAffixCode; } else { - this.funcStemming = st.noStemming; + this.funcStemming = str_transform.noStemming; } // Configuring DAWG functions according to nVersion switch (this.nVersion) { case 1: @@ -72,18 +74,18 @@ throw ValueError("# Error: unknown code: " + this.nVersion); } //console.log(this.getInfo()); this.bOptNumSigle = true; this.bOptNumAtLast = false; - }; + } getInfo () { return ` Language: ${this.sLang} Version: ${this.nVersion} Stemming: ${this.cStemming}FX\n` + ` Arcs values: ${this.nArcVal} = ${this.nChar} characters, ${this.nAff} affixes, ${this.nTag} tags\n` + ` Dictionary: ${this.nEntries} entries, ${this.nNode} nodes, ${this.nArc} arcs\n` + ` Address size: ${this.nBytesNodeAddress} bytes, Arc size: ${this.nBytesArc} bytes\n`; - }; + } isValidToken (sToken) { // checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked) if (this.isValid(sToken)) { return true; @@ -93,11 +95,11 @@ return true; } return sToken.split("-").every(sWord => this.isValid(sWord)); } return false; - }; + } isValid (sWord) { // checks if sWord is valid (different casing tested if the first letter is a capital) if (!sWord) { return null; @@ -123,11 +125,11 @@ } else { return !!this.lookup(sWord.toLowerCase()); } } return false; - }; + } _convBytesToInteger (aBytes) { // Byte order = Big Endian (bigger first) let nVal = 0; let nWeight = (aBytes.length - 1) * 8; @@ -134,11 +136,11 @@ for (let n of aBytes) { nVal += n << nWeight; nWeight = nWeight - 8; } return nVal; - }; + } lookup (sWord) { // returns true if sWord in dictionary (strict verification) let iAddr = 0; for (let c of sWord) { @@ -149,11 +151,11 @@ if (iAddr === null) { return false; } } return Boolean(this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask); - }; + } getMorph (sWord) { // retrieves morphologies list, different casing allowed let l = this.morph(sWord); if (sWord[0].gl_isUpperCase()) { @@ -161,15 +163,15 @@ if (sWord.gl_isUpperCase() && sWord.length > 1) { l = l.concat(this.morph(sWord.gl_toCapitalize())); } } return l; - }; + } // morph (sWord) { // is defined in constructor - // }; + // } // VERSION 1 _morph1 (sWord) { // returns morphologies of sWord let iAddr = 0; @@ -205,11 +207,11 @@ iAddr = iEndArcAddr + this.nBytesNodeAddress; } return l; } return []; - }; + } _stem1 (sWord) { // returns stems list of sWord let iAddr = 0; for (let c of sWord) { @@ -235,11 +237,11 @@ iAddr = iEndArcAddr + this.nBytesNodeAddress; } return l; } return []; - }; + } _lookupArcNode1 (nVal, iAddr) { // looks if nVal is an arc at the node at iAddr, if yes, returns address of next node else None while (true) { let iEndArcAddr = iAddr+this.nBytesArc; @@ -255,39 +257,39 @@ return null; } iAddr = iEndArcAddr + this.nBytesNodeAddress; } } - }; + } // VERSION 2 _morph2 (sWord) { // to do - }; + } _stem2 (sWord) { // to do - }; + } _lookupArcNode2 (nVal, iAddr) { // to do - }; + } // VERSION 3 _morph3 (sWord) { // to do - }; + } _stem3 (sWord) { // to do - }; + } _lookupArcNode3 (nVal, iAddr) { // to do - }; + } } if (typeof(exports) !== 'undefined') { exports.IBDAWG = IBDAWG; } Index: gc_core/js/jsex_map.js ================================================================== --- gc_core/js/jsex_map.js +++ gc_core/js/jsex_map.js @@ -6,42 +6,42 @@ let oNewMap = new Map(); for (let [key, val] of this.entries()) { oNewMap.set(key, val); } return oNewMap; - } + }; Map.prototype.gl_get = function (key, defaultValue) { let res = this.get(key); if (res !== undefined) { return res; } return defaultValue; - } + }; Map.prototype.gl_toString = function () { // Default .toString() gives nothing useful let sRes = "{ "; for (let [k, v] of this.entries()) { sRes += (typeof k === "string") ? '"' + k + '": ' : k.toString() + ": "; sRes += (typeof v === "string") ? '"' + v + '", ' : v.toString() + ", "; } - sRes = sRes.slice(0, -2) + " }" + sRes = sRes.slice(0, -2) + " }"; return sRes; - } + }; Map.prototype.gl_update = function (dDict) { for (let [k, v] of dDict.entries()) { this.set(k, v); } - } + }; Map.prototype.gl_updateOnlyExistingKeys = function (dDict) { for (let [k, v] of dDict.entries()) { if (this.has(k)){ this.set(k, v); } } - } + }; Map.prototype.grammalecte = true; } Index: gc_core/js/jsex_regex.js ================================================================== --- gc_core/js/jsex_regex.js +++ gc_core/js/jsex_regex.js @@ -40,23 +40,23 @@ // at the end of the pattern m.start.push(this.lastIndex - m[i].length); m.end.push(this.lastIndex); } else if (codePos === "w") { // word in the middle of the pattern - iPos = m[0].search("[ ’,()«»“”]"+m[i]+"[ ,’()«»“”]") + 1 + m.index + iPos = m[0].search("[ ’,()«»“”]"+m[i]+"[ ,’()«»“”]") + 1 + m.index; m.start.push(iPos); - m.end.push(iPos + m[i].length) + m.end.push(iPos + m[i].length); } else if (codePos === "*") { // anywhere iPos = m[0].indexOf(m[i]) + m.index; m.start.push(iPos); - m.end.push(iPos + m[i].length) + m.end.push(iPos + m[i].length); } else if (codePos === "**") { // anywhere after previous group iPos = m[0].indexOf(m[i], m.end[i-1]-m.index) + m.index; m.start.push(iPos); - m.end.push(iPos + m[i].length) + m.end.push(iPos + m[i].length); } else if (codePos.startsWith(">")) { // >x:_ // todo: look in substring x iPos = m[0].indexOf(m[i]) + m.index; m.start.push(iPos); @@ -81,9 +81,9 @@ } else { console.error(e); } } return m; - } + }; RegExp.prototype.grammalecte = true; } Index: gc_core/js/jsex_string.js ================================================================== --- gc_core/js/jsex_string.js +++ gc_core/js/jsex_string.js @@ -13,45 +13,45 @@ while ((iPos = this.indexOf(sSearch, iPos)) >= 0) { nOccur++; iPos += nStep; } return nOccur; - } + }; String.prototype.gl_isDigit = function () { return (this.search(/^[0-9⁰¹²³⁴⁵⁶⁷⁸⁹]+$/) !== -1); - } + }; String.prototype.gl_isLowerCase = function () { return (this.search(/^[a-zà-öø-ÿ0-9-]+$/) !== -1); - } + }; String.prototype.gl_isUpperCase = function () { return (this.search(/^[A-ZÀ-ÖØ-ߌ0-9-]+$/) !== -1); - } + }; String.prototype.gl_isTitle = function () { return (this.search(/^[A-ZÀ-ÖØ-ߌ][a-zà-öø-ÿ'’-]+$/) !== -1); - } + }; String.prototype.gl_toCapitalize = function () { return this.slice(0,1).toUpperCase() + this.slice(1).toLowerCase(); - } + }; String.prototype.gl_expand = function (oMatch) { let sNew = this; for (let i = 0; i < oMatch.length ; i++) { let z = new RegExp("\\\\"+parseInt(i), "g"); sNew = sNew.replace(z, oMatch[i]); } return sNew; - } + }; String.prototype.gl_trimRight = function (sChars) { let z = new RegExp("["+sChars+"]+$"); return this.replace(z, ""); - } + }; String.prototype.gl_trimLeft = function (sChars) { let z = new RegExp("^["+sChars+"]+"); return this.replace(z, ""); - } + }; String.prototype.gl_trim = function (sChars) { let z1 = new RegExp("^["+sChars+"]+"); let z2 = new RegExp("["+sChars+"]+$"); return this.replace(z1, "").replace(z2, ""); - } + }; String.prototype.grammalecte = true; } Index: gc_core/js/lang_core/gc_engine.js ================================================================== --- gc_core/js/lang_core/gc_engine.js +++ gc_core/js/lang_core/gc_engine.js @@ -1,11 +1,23 @@ // Grammar checker engine +"use strict"; + ${string} ${regex} ${map} + +if (typeof(require) !== 'undefined') { + var helpers = require("resource://grammalecte/helpers.js"); + var echo = require("resource://grammalecte/helpers.js").echo; + var gc_options = require("resource://grammalecte/${lang}/gc_options.js"); + var gc_rules = require("resource://grammalecte/${lang}/gc_rules.js"); + var cregex = require("resource://grammalecte/${lang}/cregex.js"); + var text = require("resource://grammalecte/text.js"); +} + function capitalizeArray (aArray) { // can’t map on user defined function?? let aNew = []; for (let i = 0; i < aArray.length; i = i + 1) { @@ -12,339 +24,343 @@ aNew[i] = aArray[i].gl_toCapitalize(); } return aNew; } -const ibdawg = require("resource://grammalecte/ibdawg.js"); -const helpers = require("resource://grammalecte/helpers.js"); -const gc_options = require("resource://grammalecte/${lang}/gc_options.js"); -const cr = require("resource://grammalecte/${lang}/cregex.js"); -const text = require("resource://grammalecte/text.js"); -const echo = require("resource://grammalecte/helpers.js").echo; - -const lang = "${lang}"; -const locales = ${loc}; -const pkg = "${implname}"; -const name = "${name}"; -const version = "${version}"; -const author = "${author}"; - -// commons regexes -const _zEndOfSentence = new RegExp ('([.?!:;…][ .?!… »”")]*|.$)', "g"); -const _zBeginOfParagraph = new RegExp ("^[-  –—.,;?!…]*", "ig"); -const _zEndOfParagraph = new RegExp ("[-  .,;?!…–—]*$", "ig"); - -// grammar rules and dictionary -//const _rules = require("./gc_rules.js"); -let _sContext = ""; // what software is running -const _rules = require("resource://grammalecte/${lang}/gc_rules.js"); + +// data +let _sAppContext = ""; // what software is running let _dOptions = null; let _aIgnoredRules = new Set(); let _oDict = null; -let _dAnalyses = new Map(); // cache for data from dictionary - - -///// Parsing - -function parse (sText, sCountry="${country_default}", bDebug=false, bContext=false) { - // analyses the paragraph sText and returns list of errors - let dErrors; - let errs; - let sAlt = sText; - let dDA = new Map(); // Disamnbiguator - let dPriority = new Map(); // Key = position; value = priority - let sNew = ""; - - // parse paragraph - try { - [sNew, dErrors] = _proofread(sText, sAlt, 0, true, dDA, dPriority, sCountry, bDebug, bContext); - if (sNew) { - sText = sNew; - } - } - catch (e) { - helpers.logerror(e); - } - - // cleanup - if (sText.includes(" ")) { - sText = sText.replace(/ /g, ' '); // nbsp - } - if (sText.includes(" ")) { - sText = sText.replace(/ /g, ' '); // snbsp - } - if (sText.includes("'")) { - sText = sText.replace(/'/g, "’"); - } - if (sText.includes("‑")) { - sText = sText.replace(/‑/g, "-"); // nobreakdash - } - - // parse sentence - for (let [iStart, iEnd] of _getSentenceBoundaries(sText)) { - if (4 < (iEnd - iStart) < 2000) { - dDA.clear(); - //echo(sText.slice(iStart, iEnd)); - try { - [_, errs] = _proofread(sText.slice(iStart, iEnd), sAlt.slice(iStart, iEnd), iStart, false, dDA, dPriority, sCountry, bDebug, bContext); - dErrors.gl_update(errs); - } - catch (e) { - helpers.logerror(e); - } - } - } - return Array.from(dErrors.values()); -} - -function* _getSentenceBoundaries (sText) { - let mBeginOfSentence = _zBeginOfParagraph.exec(sText) - let iStart = _zBeginOfParagraph.lastIndex; - let m; - while ((m = _zEndOfSentence.exec(sText)) !== null) { - yield [iStart, _zEndOfSentence.lastIndex]; - iStart = _zEndOfSentence.lastIndex; - } -} - -function _proofread (s, sx, nOffset, bParagraph, dDA, dPriority, sCountry, bDebug, bContext) { - let dErrs = new Map(); - let bChange = false; - let bIdRule = option('idrule'); - let m; - let bCondMemo; - let nErrorStart; - - for (let [sOption, lRuleGroup] of _getRules(bParagraph)) { - if (!sOption || option(sOption)) { - for (let [zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions, lGroups, lNegLookBefore] of lRuleGroup) { - if (!_aIgnoredRules.has(sRuleId)) { - while ((m = zRegex.gl_exec2(s, lGroups, lNegLookBefore)) !== null) { - bCondMemo = null; - /*if (bDebug) { - echo(">>>> Rule # " + sLineId + " - Text: " + s + " opt: "+ sOption); - }*/ - for (let [sFuncCond, cActionType, sWhat, ...eAct] of lActions) { - // action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ] - try { - //echo(oEvalFunc[sFuncCond]); - bCondMemo = (!sFuncCond || oEvalFunc[sFuncCond](s, sx, m, dDA, sCountry, bCondMemo)) - if (bCondMemo) { - switch (cActionType) { - case "-": - // grammar error - //echo("-> error detected in " + sLineId + "\nzRegex: " + zRegex.source); - nErrorStart = nOffset + m.start[eAct[0]]; - if (!dErrs.has(nErrorStart) || nPriority > dPriority.get(nErrorStart)) { - dErrs.set(nErrorStart, _createError(s, sx, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bIdRule, sOption, bContext)); - dPriority.set(nErrorStart, nPriority); - } - break; - case "~": - // text processor - //echo("-> text processor by " + sLineId + "\nzRegex: " + zRegex.source); - s = _rewrite(s, sWhat, eAct[0], m, bUppercase); - bChange = true; - if (bDebug) { - echo("~ " + s + " -- " + m[eAct[0]] + " # " + sLineId); - } - break; - case "=": - // disambiguation - //echo("-> disambiguation by " + sLineId + "\nzRegex: " + zRegex.source); - oEvalFunc[sWhat](s, m, dDA); - if (bDebug) { - echo("= " + m[0] + " # " + sLineId + "\nDA: " + dDA.gl_toString()); - } - break; - case ">": - // we do nothing, this test is just a condition to apply all following actions - break; - default: - echo("# error: unknown action at " + sLineId); - } - } else { - if (cActionType == ">") { - break; - } - } - } - catch (e) { - echo(s); - echo("# line id: " + sLineId + "\n# rule id: " + sRuleId); - helpers.logerror(e); +let _dAnalyses = new Map(); // cache for data from dictionary + + +var gc_engine = { + + //// Informations + + lang: "${lang}", + locales: ${loc}, + pkg: "${implname}", + name: "${name}", + version: "${version}", + author: "${author}", + + //// Parsing + + parse: function (sText, sCountry="${country_default}", bDebug=false, bContext=false) { + // analyses the paragraph sText and returns list of errors + let dErrors; + let errs; + let sAlt = sText; + let dDA = new Map(); // Disamnbiguator + let dPriority = new Map(); // Key = position; value = priority + let sNew = ""; + + // parse paragraph + try { + [sNew, dErrors] = this._proofread(sText, sAlt, 0, true, dDA, dPriority, sCountry, bDebug, bContext); + if (sNew) { + sText = sNew; + } + } + catch (e) { + helpers.logerror(e); + } + + // cleanup + if (sText.includes(" ")) { + sText = sText.replace(/ /g, ' '); // nbsp + } + if (sText.includes(" ")) { + sText = sText.replace(/ /g, ' '); // snbsp + } + if (sText.includes("'")) { + sText = sText.replace(/'/g, "’"); + } + if (sText.includes("‑")) { + sText = sText.replace(/‑/g, "-"); // nobreakdash + } + + // parse sentence + for (let [iStart, iEnd] of this._getSentenceBoundaries(sText)) { + if (4 < (iEnd - iStart) < 2000) { + dDA.clear(); + //echo(sText.slice(iStart, iEnd)); + try { + [, errs] = this._proofread(sText.slice(iStart, iEnd), sAlt.slice(iStart, iEnd), iStart, false, dDA, dPriority, sCountry, bDebug, bContext); + dErrors.gl_update(errs); + } + catch (e) { + helpers.logerror(e); + } + } + } + return Array.from(dErrors.values()); + }, + + _zEndOfSentence: new RegExp ('([.?!:;…][ .?!… »”")]*|.$)', "g"), + _zBeginOfParagraph: new RegExp ("^[-  –—.,;?!…]*", "ig"), + _zEndOfParagraph: new RegExp ("[-  .,;?!…–—]*$", "ig"), + + _getSentenceBoundaries: function* (sText) { + let mBeginOfSentence = this._zBeginOfParagraph.exec(sText) + let iStart = this._zBeginOfParagraph.lastIndex; + let m; + while ((m = this._zEndOfSentence.exec(sText)) !== null) { + yield [iStart, this._zEndOfSentence.lastIndex]; + iStart = this._zEndOfSentence.lastIndex; + } + }, + + _proofread: function (s, sx, nOffset, bParagraph, dDA, dPriority, sCountry, bDebug, bContext) { + let dErrs = new Map(); + let bChange = false; + let bIdRule = option('idrule'); + let m; + let bCondMemo; + let nErrorStart; + + for (let [sOption, lRuleGroup] of this._getRules(bParagraph)) { + if (!sOption || option(sOption)) { + for (let [zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions, lGroups, lNegLookBefore] of lRuleGroup) { + if (!_aIgnoredRules.has(sRuleId)) { + while ((m = zRegex.gl_exec2(s, lGroups, lNegLookBefore)) !== null) { + bCondMemo = null; + /*if (bDebug) { + echo(">>>> Rule # " + sLineId + " - Text: " + s + " opt: "+ sOption); + }*/ + for (let [sFuncCond, cActionType, sWhat, ...eAct] of lActions) { + // action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ] + try { + //echo(oEvalFunc[sFuncCond]); + bCondMemo = (!sFuncCond || oEvalFunc[sFuncCond](s, sx, m, dDA, sCountry, bCondMemo)) + if (bCondMemo) { + switch (cActionType) { + case "-": + // grammar error + //echo("-> error detected in " + sLineId + "\nzRegex: " + zRegex.source); + nErrorStart = nOffset + m.start[eAct[0]]; + if (!dErrs.has(nErrorStart) || nPriority > dPriority.get(nErrorStart)) { + dErrs.set(nErrorStart, this._createError(s, sx, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bIdRule, sOption, bContext)); + dPriority.set(nErrorStart, nPriority); + } + break; + case "~": + // text processor + //echo("-> text processor by " + sLineId + "\nzRegex: " + zRegex.source); + s = this._rewrite(s, sWhat, eAct[0], m, bUppercase); + bChange = true; + if (bDebug) { + echo("~ " + s + " -- " + m[eAct[0]] + " # " + sLineId); + } + break; + case "=": + // disambiguation + //echo("-> disambiguation by " + sLineId + "\nzRegex: " + zRegex.source); + oEvalFunc[sWhat](s, m, dDA); + if (bDebug) { + echo("= " + m[0] + " # " + sLineId + "\nDA: " + dDA.gl_toString()); + } + break; + case ">": + // we do nothing, this test is just a condition to apply all following actions + break; + default: + echo("# error: unknown action at " + sLineId); + } + } else { + if (cActionType == ">") { + break; + } + } + } + catch (e) { + echo(s); + echo("# line id: " + sLineId + "\n# rule id: " + sRuleId); + helpers.logerror(e); + } } } } } } } - } - if (bChange) { - return [s, dErrs]; - } - return [false, dErrs]; -} - -function _createError (s, sx, sRepl, nOffset, m, iGroup, sLineId, sRuleId, bUppercase, sMsg, sURL, bIdRule, sOption, bContext) { - let oErr = {}; - oErr["nStart"] = nOffset + m.start[iGroup]; - oErr["nEnd"] = nOffset + m.end[iGroup]; - oErr["sLineId"] = sLineId; - oErr["sRuleId"] = sRuleId; - oErr["sType"] = (sOption) ? sOption : "notype"; - // suggestions - if (sRepl[0] === "=") { - let sugg = oEvalFunc[sRepl.slice(1)](s, m); - if (sugg) { - if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) { - oErr["aSuggestions"] = capitalizeArray(sugg.split("|")); - } else { - oErr["aSuggestions"] = sugg.split("|"); - } - } else { - oErr["aSuggestions"] = []; - } - } else if (sRepl == "_") { - oErr["aSuggestions"] = []; - } else { - if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) { - oErr["aSuggestions"] = capitalizeArray(sRepl.gl_expand(m).split("|")); - } else { - oErr["aSuggestions"] = sRepl.gl_expand(m).split("|"); - } - } - // Message - if (sMsg[0] === "=") { - sMessage = oEvalFunc[sMsg.slice(1)](s, m) - } else { - sMessage = sMsg.gl_expand(m); - } - if (bIdRule) { - sMessage += " ##" + sLineId + " #" + sRuleId; - } - oErr["sMessage"] = sMessage; - // URL - oErr["URL"] = sURL || ""; - // Context - if (bContext) { - oErr["sUnderlined"] = sx.slice(m.start[iGroup], m.end[iGroup]); - oErr["sBefore"] = sx.slice(Math.max(0, m.start[iGroup]-80), m.start[iGroup]); - oErr["sAfter"] = sx.slice(m.end[iGroup], m.end[iGroup]+80); - } - return oErr; -} - -function _rewrite (s, sRepl, iGroup, m, bUppercase) { - // text processor: write sRepl in s at iGroup position" - let ln = m.end[iGroup] - m.start[iGroup]; - let sNew = ""; - if (sRepl === "*") { - sNew = " ".repeat(ln); - } else if (sRepl === ">" || sRepl === "_" || sRepl === "~") { - sNew = sRepl + " ".repeat(ln-1); - } else if (sRepl === "@") { - sNew = "@".repeat(ln); - } else if (sRepl.slice(0,1) === "=") { - sNew = oEvalFunc[sRepl.slice(1)](s, m); - sNew = sNew + " ".repeat(ln-sNew.length); - if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) { - sNew = sNew.gl_toCapitalize(); - } - } else { - sNew = sRepl.gl_expand(m); - sNew = sNew + " ".repeat(ln-sNew.length); - } - //echo("\n"+s+"\nstart: "+m.start[iGroup]+" end:"+m.end[iGroup]) - return s.slice(0, m.start[iGroup]) + sNew + s.slice(m.end[iGroup]); -} - -function ignoreRule (sRuleId) { - _aIgnoredRules.add(sRuleId); -} - -function resetIgnoreRules () { - _aIgnoredRules.clear(); -} - -function reactivateRule (sRuleId) { - _aIgnoredRules.delete(sRuleId); -} - -function listRules (sFilter=null) { - // generator: returns tuple (sOption, sLineId, sRuleId) - try { - for ([sOption, lRuleGroup] of _getRules(true)) { - for ([_, _, sLineId, sRuleId, _, _] of lRuleGroup) { - if (!sFilter || sRuleId.test(sFilter)) { - yield [sOption, sLineId, sRuleId]; - } - } - } - for ([sOption, lRuleGroup] of _getRules(false)) { - for ([_, _, sLineId, sRuleId, _, _] of lRuleGroup) { - if (!sFilter || sRuleId.test(sFilter)) { - yield [sOption, sLineId, sRuleId]; - } - } - } - } - catch (e) { - helpers.logerror(e); - } -} - - -//////// init - -function load (sContext="JavaScript") { - try { - _oDict = new ibdawg.IBDAWG("${dic_name}.json"); - _sContext = sContext; - _dOptions = gc_options.getOptions(sContext).gl_shallowCopy(); // duplication necessary, to be able to reset to default - } - catch (e) { - helpers.logerror(e); - } -} - -function setOption (sOpt, bVal) { - if (_dOptions.has(sOpt)) { - _dOptions.set(sOpt, bVal); - } -} - -function setOptions (dOpt) { - _dOptions.gl_updateOnlyExistingKeys(dOpt); -} - -function getOptions () { - return _dOptions; -} - -function getDefaultOptions () { - return gc_options.getOptions(_sContext).gl_shallowCopy(); -} - -function resetOptions () { - _dOptions = gc_options.getOptions(_sContext).gl_shallowCopy(); -} - -function getDictionary () { - return _oDict; -} - -function _getRules (bParagraph) { - if (!bParagraph) { - return _rules.lSentenceRules; - } - return _rules.lParagraphRules; -} - - - -//////// common functions + if (bChange) { + return [s, dErrs]; + } + return [false, dErrs]; + }, + + _createError: function (s, sx, sRepl, nOffset, m, iGroup, sLineId, sRuleId, bUppercase, sMsg, sURL, bIdRule, sOption, bContext) { + let oErr = {}; + oErr["nStart"] = nOffset + m.start[iGroup]; + oErr["nEnd"] = nOffset + m.end[iGroup]; + oErr["sLineId"] = sLineId; + oErr["sRuleId"] = sRuleId; + oErr["sType"] = (sOption) ? sOption : "notype"; + // suggestions + if (sRepl.slice(0,1) === "=") { + let sugg = oEvalFunc[sRepl.slice(1)](s, m); + if (sugg) { + if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) { + oErr["aSuggestions"] = capitalizeArray(sugg.split("|")); + } else { + oErr["aSuggestions"] = sugg.split("|"); + } + } else { + oErr["aSuggestions"] = []; + } + } else if (sRepl == "_") { + oErr["aSuggestions"] = []; + } else { + if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) { + oErr["aSuggestions"] = capitalizeArray(sRepl.gl_expand(m).split("|")); + } else { + oErr["aSuggestions"] = sRepl.gl_expand(m).split("|"); + } + } + // Message + let sMessage = ""; + if (sMsg.slice(0,1) === "=") { + sMessage = oEvalFunc[sMsg.slice(1)](s, m) + } else { + sMessage = sMsg.gl_expand(m); + } + if (bIdRule) { + sMessage += " ##" + sLineId + " #" + sRuleId; + } + oErr["sMessage"] = sMessage; + // URL + oErr["URL"] = sURL || ""; + // Context + if (bContext) { + oErr["sUnderlined"] = sx.slice(m.start[iGroup], m.end[iGroup]); + oErr["sBefore"] = sx.slice(Math.max(0, m.start[iGroup]-80), m.start[iGroup]); + oErr["sAfter"] = sx.slice(m.end[iGroup], m.end[iGroup]+80); + } + return oErr; + }, + + _rewrite: function (s, sRepl, iGroup, m, bUppercase) { + // text processor: write sRepl in s at iGroup position" + let ln = m.end[iGroup] - m.start[iGroup]; + let sNew = ""; + if (sRepl === "*") { + sNew = " ".repeat(ln); + } else if (sRepl === ">" || sRepl === "_" || sRepl === "~") { + sNew = sRepl + " ".repeat(ln-1); + } else if (sRepl === "@") { + sNew = "@".repeat(ln); + } else if (sRepl.slice(0,1) === "=") { + sNew = oEvalFunc[sRepl.slice(1)](s, m); + sNew = sNew + " ".repeat(ln-sNew.length); + if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) { + sNew = sNew.gl_toCapitalize(); + } + } else { + sNew = sRepl.gl_expand(m); + sNew = sNew + " ".repeat(ln-sNew.length); + } + //echo("\n"+s+"\nstart: "+m.start[iGroup]+" end:"+m.end[iGroup]) + return s.slice(0, m.start[iGroup]) + sNew + s.slice(m.end[iGroup]); + }, + + // Actions on rules + + ignoreRule: function (sRuleId) { + _aIgnoredRules.add(sRuleId); + }, + + resetIgnoreRules: function () { + _aIgnoredRules.clear(); + }, + + reactivateRule: function (sRuleId) { + _aIgnoredRules.delete(sRuleId); + }, + + listRules: function* (sFilter=null) { + // generator: returns tuple (sOption, sLineId, sRuleId) + try { + for (let [sOption, lRuleGroup] of this._getRules(true)) { + for (let [,, sLineId, sRuleId,,] of lRuleGroup) { + if (!sFilter || sRuleId.test(sFilter)) { + yield [sOption, sLineId, sRuleId]; + } + } + } + for (let [sOption, lRuleGroup] of this._getRules(false)) { + for (let [,, sLineId, sRuleId,,] of lRuleGroup) { + if (!sFilter || sRuleId.test(sFilter)) { + yield [sOption, sLineId, sRuleId]; + } + } + } + } + catch (e) { + helpers.logerror(e); + } + }, + + _getRules: function (bParagraph) { + if (!bParagraph) { + return gc_rules.lSentenceRules; + } + return gc_rules.lParagraphRules; + }, + + //// Initialization + + load: function (sContext="JavaScript", sPath="") { + try { + if (typeof(require) !== 'undefined') { + var ibdawg = require("resource://grammalecte/ibdawg.js"); + _oDict = new ibdawg.IBDAWG("${dic_name}.json"); + } else { + _oDict = new IBDAWG("${dic_name}.json", sPath); + } + _sAppContext = sContext; + _dOptions = gc_options.getOptions(sContext).gl_shallowCopy(); // duplication necessary, to be able to reset to default + } + catch (e) { + helpers.logerror(e); + } + }, + + getDictionary: function () { + return _oDict; + }, + + //// Options + + setOption: function (sOpt, bVal) { + if (_dOptions.has(sOpt)) { + _dOptions.set(sOpt, bVal); + } + }, + + setOptions: function (dOpt) { + _dOptions.gl_updateOnlyExistingKeys(dOpt); + }, + + getOptions: function () { + return _dOptions; + }, + + getDefaultOptions: function () { + return gc_options.getOptions(_sAppContext).gl_shallowCopy(); + }, + + resetOptions: function () { + _dOptions = gc_options.getOptions(_sAppContext).gl_shallowCopy(); + } +} + + +//////// Common functions function option (sOpt) { // return true if option sOpt is active return _dOptions.get(sOpt); } @@ -459,39 +475,39 @@ // warning: check compile_rules.py to understand how it works function nextword (s, iStart, n) { // get the nth word of the input string or empty string - let z = new RegExp("^( +[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+){" + (n-1).toString() + "} +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+)", "i"); + let z = new RegExp("^(?: +[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+){" + (n-1).toString() + "} +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+)", "ig"); let m = z.exec(s.slice(iStart)); if (!m) { return null; } - return [iStart + RegExp.lastIndex - m[2].length, m[2]]; + return [iStart + z.lastIndex - m[1].length, m[1]]; } function prevword (s, iEnd, n) { // get the (-)nth word of the input string or empty string - let z = new RegExp("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+) +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+ +){" + (n-1).toString() + "}$", "i"); + let z = new RegExp("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+) +(?:[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st%_-]+ +){" + (n-1).toString() + "}$", "i"); let m = z.exec(s.slice(0, iEnd)); if (!m) { return null; } return [m.index, m[1]]; } -const _zNextWord = new RegExp ("^ +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_-]*)", "i"); -const _zPrevWord = new RegExp ("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_-]*) +$", "i"); - function nextword1 (s, iStart) { // get next word (optimization) + let _zNextWord = new RegExp ("^ +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_-]*)", "ig"); let m = _zNextWord.exec(s.slice(iStart)); if (!m) { return null; } - return [iStart + RegExp.lastIndex - m[1].length, m[1]]; + return [iStart + _zNextWord.lastIndex - m[1].length, m[1]]; } + +const _zPrevWord = new RegExp ("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_-]*) +$", "i"); function prevword1 (s, iEnd) { // get previous word (optimization) //echo("prev1, s:"+s); //echo("prev1, s.slice(0, iEnd):"+s.slice(0, iEnd)); @@ -594,10 +610,11 @@ function define (dDA, nPos, lMorph) { dDA.set(nPos, lMorph); return true; } + //////// GRAMMAR CHECKER PLUGINS ${pluginsJS} @@ -605,20 +622,32 @@ ${callablesJS} if (typeof(exports) !== 'undefined') { - exports.load = load; - exports.parse = parse; - exports.lang = lang; - exports.version = version; - exports.getDictionary = getDictionary; - exports.setOption = setOption; - exports.setOptions = setOptions; - exports.getOptions = getOptions; - exports.getDefaultOptions = getDefaultOptions; - exports.resetOptions = resetOptions; - exports.ignoreRule = ignoreRule; - exports.reactivateRule = reactivateRule; - exports.resetIgnoreRules = resetIgnoreRules; - exports.listRules = listRules; + exports.lang = gc_engine.lang; + exports.locales = gc_engine.locales; + exports.pkg = gc_engine.pkg; + exports.name = gc_engine.name; + exports.version = gc_engine.version; + exports.author = gc_engine.author; + exports.parse = gc_engine.parse; + exports._zEndOfSentence = gc_engine._zEndOfSentence; + exports._zBeginOfParagraph = gc_engine._zBeginOfParagraph; + exports._zEndOfParagraph = gc_engine._zEndOfParagraph; + exports._getSentenceBoundaries = gc_engine._getSentenceBoundaries; + exports._proofread = gc_engine._proofread; + exports._createError = gc_engine._createError; + exports._rewrite = gc_engine._rewrite; + exports.ignoreRule = gc_engine.ignoreRule; + exports.resetIgnoreRules = gc_engine.resetIgnoreRules; + exports.reactivateRule = gc_engine.reactivateRule; + exports.listRules = gc_engine.listRules; + exports._getRules = gc_engine._getRules; + exports.load = gc_engine.load; + exports.getDictionary = gc_engine.getDictionary; + exports.setOption = gc_engine.setOption; + exports.setOptions = gc_engine.setOptions; + exports.getOptions = gc_engine.getOptions; + exports.getDefaultOptions = gc_engine.getDefaultOptions; + exports.resetOptions = gc_engine.resetOptions; } Index: gc_core/js/lang_core/gc_options.js ================================================================== --- gc_core/js/lang_core/gc_options.js +++ gc_core/js/lang_core/gc_options.js @@ -1,27 +1,31 @@ // Options for Grammalecte ${map} -function getOptions (sContext="JavaScript") { - if (dOpt.hasOwnProperty(sContext)) { - return dOpt[sContext]; - } - return dOpt["JavaScript"]; -} - -const lStructOpt = ${lStructOpt}; - -const dOpt = { - "JavaScript": new Map (${dOptJavaScript}), - "Firefox": new Map (${dOptFirefox}), - "Thunderbird": new Map (${dOptThunderbird}), -} - -const dOptLabel = ${dOptLabel}; + +var gc_options = { + getOptions: function (sContext="JavaScript") { + if (this.dOpt.hasOwnProperty(sContext)) { + return this.dOpt[sContext]; + } + return this.dOpt["JavaScript"]; + }, + + lStructOpt: ${lStructOpt}, + + dOpt: { + "JavaScript": new Map (${dOptJavaScript}), + "Firefox": new Map (${dOptFirefox}), + "Thunderbird": new Map (${dOptThunderbird}), + }, + + dOptLabel: ${dOptLabel} +} if (typeof(exports) !== 'undefined') { - exports.getOptions = getOptions; - exports.lStructOpt = lStructOpt; - exports.dOptLabel = dOptLabel; + exports.getOptions = gc_options.getOptions; + exports.lStructOpt = gc_options.lStructOpt; + exports.dOpt = gc_options.dOpt; + exports.dOptLabel = gc_options.dOptLabel; } Index: gc_core/js/lang_core/gc_rules.js ================================================================== --- gc_core/js/lang_core/gc_rules.js +++ gc_core/js/lang_core/gc_rules.js @@ -2,14 +2,16 @@ "use strict"; ${string} ${regex} -const lParagraphRules = ${paragraph_rules_JS}; +var gc_rules = { + lParagraphRules: ${paragraph_rules_JS}, -const lSentenceRules = ${sentence_rules_JS}; + lSentenceRules: ${sentence_rules_JS} +} if (typeof(exports) !== 'undefined') { - exports.lParagraphRules = lParagraphRules; - exports.lSentenceRules = lSentenceRules; + exports.lParagraphRules = gc_rules.lParagraphRules; + exports.lSentenceRules = gc_rules.lSentenceRules; } Index: gc_core/js/str_transform.js ================================================================== --- gc_core/js/str_transform.js +++ gc_core/js/str_transform.js @@ -1,60 +1,32 @@ //// STRING TRANSFORMATION -var dSimilarChars = new Map ([ - ["a", "aàâáä"], - ["à", "aàâáä"], - ["â", "aàâáä"], - ["á", "aàâáä"], - ["ä", "aàâáä"], - ["c", "cç"], - ["ç", "cç"], - ["e", "eéêèë"], - ["é", "eéêèë"], - ["ê", "eéêèë"], - ["è", "eéêèë"], - ["ë", "eéêèë"], - ["i", "iîïíì"], - ["î", "iîïíì"], - ["ï", "iîïíì"], - ["í", "iîïíì"], - ["ì", "iîïíì"], - ["o", "oôóòö"], - ["ô", "oôóòö"], - ["ó", "oôóòö"], - ["ò", "oôóòö"], - ["ö", "oôóòö"], - ["u", "uûùüú"], - ["û", "uûùüú"], - ["ù", "uûùüú"], - ["ü", "uûùüú"], - ["ú", "uûùüú"] -]); - // Note: 48 is the ASCII code for "0" -// Suffix only -function getStemFromSuffixCode (sFlex, sSfxCode) { - if (sSfxCode == "0") { - return sFlex; - } - return sSfxCode[0] == '0' ? sFlex + sSfxCode.slice(1) : sFlex.slice(0, -(sSfxCode.charCodeAt(0)-48)) + sSfxCode.slice(1); -} - -// Prefix and suffix -function getStemFromAffixCode (sFlex, sAffCode) { - if (sAffCode == "0") { - return sFlex; - } - if (!sAffCode.includes("/")) { - return "# error #"; - } - var [sPfxCode, sSfxCode] = sAffCode.split('/'); - sFlex = sPfxCode.slice(1) + sFlex.slice(sPfxCode.charCodeAt(0)-48); - return sSfxCode[0] == '0' ? sFlex + sSfxCode.slice(1) : sFlex.slice(0, -(sSfxCode.charCodeAt(0)-48)) + sSfxCode.slice(1); -} +var str_transform = { + getStemFromSuffixCode: function (sFlex, sSfxCode) { + // Suffix only + if (sSfxCode == "0") { + return sFlex; + } + return sSfxCode[0] == '0' ? sFlex + sSfxCode.slice(1) : sFlex.slice(0, -(sSfxCode.charCodeAt(0)-48)) + sSfxCode.slice(1); + }, + + getStemFromAffixCode: function (sFlex, sAffCode) { + // Prefix and suffix + if (sAffCode == "0") { + return sFlex; + } + if (!sAffCode.includes("/")) { + return "# error #"; + } + let [sPfxCode, sSfxCode] = sAffCode.split('/'); + sFlex = sPfxCode.slice(1) + sFlex.slice(sPfxCode.charCodeAt(0)-48); + return sSfxCode[0] == '0' ? sFlex + sSfxCode.slice(1) : sFlex.slice(0, -(sSfxCode.charCodeAt(0)-48)) + sSfxCode.slice(1); + } +}; if (typeof(exports) !== 'undefined') { - exports.getStemFromSuffixCode = getStemFromSuffixCode; - exports.getStemFromAffixCode = getStemFromAffixCode; + exports.getStemFromSuffixCode = str_transform.getStemFromSuffixCode; + exports.getStemFromAffixCode = str_transform.getStemFromAffixCode; } Index: gc_core/js/tests.js ================================================================== --- gc_core/js/tests.js +++ gc_core/js/tests.js @@ -1,26 +1,29 @@ // JavaScript "use strict"; -const helpers = require("resource://grammalecte/helpers.js"); +if (typeof(require) !== 'undefined') { + var helpers = require("resource://grammalecte/helpers.js"); +} class TestGrammarChecking { - constructor (gce) { + constructor (gce, spfTests="") { this.gce = gce; + this.spfTests = spfTests; this._aRuleTested = new Set(); - }; + } * testParse (bDebug=false) { const t0 = Date.now(); - const aData = JSON.parse(helpers.loadFile("resource://grammalecte/"+this.gce.lang+"/tests_data.json")).aData; - //const aData = require("resource://grammalecte/"+this.gce.lang+"/tests_data.js").aData; - let nInvalid = 0 - let nTotal = 0 + let sURL = (this.spfTests !== "") ? this.spfTests : "resource://grammalecte/"+this.gce.lang+"/tests_data.json"; + const aData = JSON.parse(helpers.loadFile(sURL)).aData; + let nInvalid = 0; + let nTotal = 0; let sErrorText; let sSugg; let sExpectedErrors; let sTextToCheck; let sFoundErrors; @@ -59,11 +62,11 @@ "\n# Line num: " + sLineNum + "\n> to check: " + sTextToCheck + "\n expected: " + sExpectedErrors + "\n found: " + sFoundErrors + "\n errors: \n" + sListErr; - nInvalid = nInvalid + 1 + nInvalid = nInvalid + 1; } nTotal = nTotal + 1; } i = i + 1; if (i % 1000 === 0) { @@ -76,11 +79,11 @@ helpers.logerror(e); } if (bShowUntested) { i = 0; - for (let [sOpt, sLineId, sRuleId] of gce.listRules()) { + for (let [sOpt, sLineId, sRuleId] of this.gce.listRules()) { if (!this._aRuleTested.has(sLineId) && !/^[0-9]+[sp]$|^[pd]_/.test(sRuleId)) { sUntestedRules += sRuleId + ", "; i += 1; } } @@ -90,11 +93,11 @@ } const t1 = Date.now(); yield "Tests parse finished in " + ((t1-t0)/1000).toString() + " s\nTotal errors: " + nInvalid.toString() + " / " + nTotal.toString(); - }; + } _getExpectedErrors (sLine) { try { let sRes = " ".repeat(sLine.length); let z = /\{\{.+?\}\}/g; @@ -116,19 +119,19 @@ } catch (e) { helpers.logerror(e); } return " ".repeat(sLine.length); - }; + } _getFoundErrors (sLine, bDebug, sOption) { try { let aErrs = []; if (sOption) { - gce.setOption(sOption, true); + this.gce.setOption(sOption, true); aErrs = this.gce.parse(sLine, "FR", bDebug); - gce.setOption(sOption, false); + this.gce.setOption(sOption, false); } else { aErrs = this.gce.parse(sLine, "FR", bDebug); } let sRes = " ".repeat(sLine.length); let sListErr = ""; @@ -141,13 +144,13 @@ } catch (e) { helpers.logerror(e); } return [" ".repeat(sLine.length), ""]; - }; + } } if (typeof(exports) !== 'undefined') { exports.TestGrammarChecking = TestGrammarChecking; } Index: gc_core/js/text.js ================================================================== --- gc_core/js/text.js +++ gc_core/js/text.js @@ -1,64 +1,69 @@ // JavaScript "use strict"; -const helpers = require("resource://grammalecte/helpers.js"); - - -function* getParagraph (sText) { - // generator: returns paragraphs of text - let iStart = 0; - let iEnd = 0; - sText = sText.replace("\r", ""); - while ((iEnd = sText.indexOf("\n", iStart)) !== -1) { - yield sText.slice(iStart, iEnd); - iStart = iEnd + 1; - } - yield sText.slice(iStart); -} - -function* wrap (sText, nWidth=80) { - // generator: returns text line by line - while (sText) { - if (sText.length >= nWidth) { - let nEnd = sText.lastIndexOf(" ", nWidth) + 1; - if (nEnd > 0) { - yield sText.slice(0, nEnd); - sText = sText.slice(nEnd); - } else { - yield sText.slice(0, nWidth); - sText = sText.slice(nWidth); - } - } else { - break; - } - } - yield sText; -} - -function getReadableError (oErr) { - // Returns an error oErr as a readable error - try { - let sResult = "\n* " + oErr['nStart'] + ":" + oErr['nEnd'] - + " # " + oErr['sLineId'] + " # " + oErr['sRuleId'] + ":\n"; - sResult += " " + oErr["sMessage"]; - if (oErr["aSuggestions"].length > 0) { - sResult += "\n > Suggestions : " + oErr["aSuggestions"].join(" | "); - } - if (oErr["URL"] !== "") { - sResult += "\n > URL: " + oErr["URL"]; - } - return sResult; - } - catch (e) { - helpers.logerror(e); - return "\n# Error. Data: " + oErr.toString(); - } -} + +if (typeof(exports) !== 'undefined') { + var helpers = require("resource://grammalecte/helpers.js"); +} + + +var text = { + getParagraph: function* (sText) { + // generator: returns paragraphs of text + let iStart = 0; + let iEnd = 0; + sText = sText.replace("\r", ""); + while ((iEnd = sText.indexOf("\n", iStart)) !== -1) { + yield sText.slice(iStart, iEnd); + iStart = iEnd + 1; + } + yield sText.slice(iStart); + }, + + wrap: function* (sText, nWidth=80) { + // generator: returns text line by line + while (sText) { + if (sText.length >= nWidth) { + let nEnd = sText.lastIndexOf(" ", nWidth) + 1; + if (nEnd > 0) { + yield sText.slice(0, nEnd); + sText = sText.slice(nEnd); + } else { + yield sText.slice(0, nWidth); + sText = sText.slice(nWidth); + } + } else { + break; + } + } + yield sText; + }, + + getReadableError: function (oErr) { + // Returns an error oErr as a readable error + try { + let sResult = "\n* " + oErr['nStart'] + ":" + oErr['nEnd'] + + " # " + oErr['sLineId'] + " # " + oErr['sRuleId'] + ":\n"; + sResult += " " + oErr["sMessage"]; + if (oErr["aSuggestions"].length > 0) { + sResult += "\n > Suggestions : " + oErr["aSuggestions"].join(" | "); + } + if (oErr["URL"] !== "") { + sResult += "\n > URL: " + oErr["URL"]; + } + return sResult; + } + catch (e) { + helpers.logerror(e); + return "\n# Error. Data: " + oErr.toString(); + } + } +}; if (typeof(exports) !== 'undefined') { - exports.getParagraph = getParagraph; - exports.wrap = wrap; - exports.getReadableError = getReadableError; + exports.getParagraph = text.getParagraph; + exports.wrap = text.wrap; + exports.getReadableError = text.getReadableError; } Index: gc_core/js/tokenizer.js ================================================================== --- gc_core/js/tokenizer.js +++ gc_core/js/tokenizer.js @@ -1,13 +1,17 @@ // JavaScript // Very simple tokenizer "use strict"; -const helpers = require("resource://grammalecte/helpers.js"); + +if (typeof(exports) !== 'undefined') { + var helpers = require("resource://grammalecte/helpers.js"); +} + -const aPatterns = { +const aTkzPatterns = { // All regexps must start with ^. "default": [ [/^[   \t]+/, 'SPACE'], [/^[,.;:!?…«»“”‘’"(){}\[\]/·–—]+/, 'SEPARATOR'], @@ -33,22 +37,22 @@ [/^\d\d?[hm]\d\d\b/, 'HOUR'], [/^\d+(?:er|nd|e|de|ième|ème|eme)s?\b/, 'ORDINAL'], [/^-?\d+(?:[.,]\d+|)/, 'NUM'], [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+)*/, 'WORD'] ] -} +}; class Tokenizer { constructor (sLang) { this.sLang = sLang; - if (!aPatterns.hasOwnProperty(sLang)) { + if (!aTkzPatterns.hasOwnProperty(sLang)) { this.sLang = "default"; } - this.aRules = aPatterns[this.sLang]; - }; + this.aRules = aTkzPatterns[this.sLang]; + } * genTokens (sText) { let m; let i = 0; while (sText) { @@ -72,11 +76,11 @@ } } i += nCut; sText = sText.slice(nCut); } - }; + } getSpellingErrors (sText, oDict) { let aSpellErr = []; for (let oToken of this.genTokens(sText)) { if (oToken.sType === 'WORD' && !oDict.isValidToken(oToken.sValue)) {