Index: gc_core/js/lang_core/gc_engine.js ================================================================== --- gc_core/js/lang_core/gc_engine.js +++ gc_core/js/lang_core/gc_engine.js @@ -15,336 +15,341 @@ } const ibdawg = require("resource://grammalecte/ibdawg.js"); const helpers = require("resource://grammalecte/helpers.js"); const gc_options = require("resource://grammalecte/${lang}/gc_options.js"); +const gc_rules = require("resource://grammalecte/${lang}/gc_rules.js"); const cr = require("resource://grammalecte/${lang}/cregex.js"); const text = require("resource://grammalecte/text.js"); const echo = require("resource://grammalecte/helpers.js").echo; -const lang = "${lang}"; -const locales = ${loc}; -const pkg = "${implname}"; -const name = "${name}"; -const version = "${version}"; -const author = "${author}"; - -// commons regexes -const _zEndOfSentence = new RegExp ('([.?!:;…][ .?!… »”")]*|.$)', "g"); -const _zBeginOfParagraph = new RegExp ("^[-  –—.,;?!…]*", "ig"); -const _zEndOfParagraph = new RegExp ("[-  .,;?!…–—]*$", "ig"); - -// grammar rules and dictionary -//const _rules = require("./gc_rules.js"); -let _sContext = ""; // what software is running -const _rules = require("resource://grammalecte/${lang}/gc_rules.js"); +// data +let _sAppContext = ""; // what software is running let _dOptions = null; let _aIgnoredRules = new Set(); let _oDict = null; -let _dAnalyses = new Map(); // cache for data from dictionary - - -///// Parsing - -function parse (sText, sCountry="${country_default}", bDebug=false, bContext=false) { - // analyses the paragraph sText and returns list of errors - let dErrors; - let errs; - let sAlt = sText; - let dDA = new Map(); // Disamnbiguator - let dPriority = new Map(); // Key = position; value = priority - let sNew = ""; - - // parse paragraph - try { - [sNew, dErrors] = _proofread(sText, sAlt, 0, true, dDA, dPriority, sCountry, bDebug, bContext); - if (sNew) { - sText = sNew; - } - } - catch (e) { - helpers.logerror(e); - } - - // cleanup - if (sText.includes(" ")) { - sText = sText.replace(/ /g, ' '); // nbsp - } - if (sText.includes(" ")) { - sText = sText.replace(/ /g, ' '); // snbsp - } - if (sText.includes("'")) { - sText = sText.replace(/'/g, "’"); - } - if (sText.includes("‑")) { - sText = sText.replace(/‑/g, "-"); // nobreakdash - } - - // parse sentence - for (let [iStart, iEnd] of _getSentenceBoundaries(sText)) { - if (4 < (iEnd - iStart) < 2000) { - dDA.clear(); - //echo(sText.slice(iStart, iEnd)); - try { - [_, errs] = _proofread(sText.slice(iStart, iEnd), sAlt.slice(iStart, iEnd), iStart, false, dDA, dPriority, sCountry, bDebug, bContext); - dErrors.gl_update(errs); - } - catch (e) { - helpers.logerror(e); - } - } - } - return Array.from(dErrors.values()); -} - -function* _getSentenceBoundaries (sText) { - let mBeginOfSentence = _zBeginOfParagraph.exec(sText) - let iStart = _zBeginOfParagraph.lastIndex; - let m; - while ((m = _zEndOfSentence.exec(sText)) !== null) { - yield [iStart, _zEndOfSentence.lastIndex]; - iStart = _zEndOfSentence.lastIndex; - } -} - -function _proofread (s, sx, nOffset, bParagraph, dDA, dPriority, sCountry, bDebug, bContext) { - let dErrs = new Map(); - let bChange = false; - let bIdRule = option('idrule'); - let m; - let bCondMemo; - let nErrorStart; - - for (let [sOption, lRuleGroup] of _getRules(bParagraph)) { - if (!sOption || option(sOption)) { - for (let [zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions, lGroups, lNegLookBefore] of lRuleGroup) { - if (!_aIgnoredRules.has(sRuleId)) { - while ((m = zRegex.gl_exec2(s, lGroups, lNegLookBefore)) !== null) { - bCondMemo = null; - /*if (bDebug) { - echo(">>>> Rule # " + sLineId + " - Text: " + s + " opt: "+ sOption); - }*/ - for (let [sFuncCond, cActionType, sWhat, ...eAct] of lActions) { - // action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ] - try { - //echo(oEvalFunc[sFuncCond]); - bCondMemo = (!sFuncCond || oEvalFunc[sFuncCond](s, sx, m, dDA, sCountry, bCondMemo)) - if (bCondMemo) { - switch (cActionType) { - case "-": - // grammar error - //echo("-> error detected in " + sLineId + "\nzRegex: " + zRegex.source); - nErrorStart = nOffset + m.start[eAct[0]]; - if (!dErrs.has(nErrorStart) || nPriority > dPriority.get(nErrorStart)) { - dErrs.set(nErrorStart, _createError(s, sx, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bIdRule, sOption, bContext)); - dPriority.set(nErrorStart, nPriority); - } - break; - case "~": - // text processor - //echo("-> text processor by " + sLineId + "\nzRegex: " + zRegex.source); - s = _rewrite(s, sWhat, eAct[0], m, bUppercase); - bChange = true; - if (bDebug) { - echo("~ " + s + " -- " + m[eAct[0]] + " # " + sLineId); - } - break; - case "=": - // disambiguation - //echo("-> disambiguation by " + sLineId + "\nzRegex: " + zRegex.source); - oEvalFunc[sWhat](s, m, dDA); - if (bDebug) { - echo("= " + m[0] + " # " + sLineId + "\nDA: " + dDA.gl_toString()); - } - break; - case ">": - // we do nothing, this test is just a condition to apply all following actions - break; - default: - echo("# error: unknown action at " + sLineId); - } - } else { - if (cActionType == ">") { - break; - } - } - } - catch (e) { - echo(s); - echo("# line id: " + sLineId + "\n# rule id: " + sRuleId); - helpers.logerror(e); +let _dAnalyses = new Map(); // cache for data from dictionary + + +const gc_engine = { + + //// Informations + + lang: "${lang}", + locales: ${loc}, + pkg: "${implname}", + name: "${name}", + version: "${version}", + author: "${author}", + + //// Parsing + + parse: function (sText, sCountry="${country_default}", bDebug=false, bContext=false) { + // analyses the paragraph sText and returns list of errors + let dErrors; + let errs; + let sAlt = sText; + let dDA = new Map(); // Disamnbiguator + let dPriority = new Map(); // Key = position; value = priority + let sNew = ""; + + // parse paragraph + try { + [sNew, dErrors] = this._proofread(sText, sAlt, 0, true, dDA, dPriority, sCountry, bDebug, bContext); + if (sNew) { + sText = sNew; + } + } + catch (e) { + helpers.logerror(e); + } + + // cleanup + if (sText.includes(" ")) { + sText = sText.replace(/ /g, ' '); // nbsp + } + if (sText.includes(" ")) { + sText = sText.replace(/ /g, ' '); // snbsp + } + if (sText.includes("'")) { + sText = sText.replace(/'/g, "’"); + } + if (sText.includes("‑")) { + sText = sText.replace(/‑/g, "-"); // nobreakdash + } + + // parse sentence + for (let [iStart, iEnd] of this._getSentenceBoundaries(sText)) { + if (4 < (iEnd - iStart) < 2000) { + dDA.clear(); + //echo(sText.slice(iStart, iEnd)); + try { + [_, errs] = this._proofread(sText.slice(iStart, iEnd), sAlt.slice(iStart, iEnd), iStart, false, dDA, dPriority, sCountry, bDebug, bContext); + dErrors.gl_update(errs); + } + catch (e) { + helpers.logerror(e); + } + } + } + return Array.from(dErrors.values()); + }, + + _zEndOfSentence: new RegExp ('([.?!:;…][ .?!… »”")]*|.$)', "g"), + _zBeginOfParagraph: new RegExp ("^[-  –—.,;?!…]*", "ig"), + _zEndOfParagraph: new RegExp ("[-  .,;?!…–—]*$", "ig"), + + _getSentenceBoundaries: function* (sText) { + let mBeginOfSentence = this._zBeginOfParagraph.exec(sText) + let iStart = this._zBeginOfParagraph.lastIndex; + let m; + while ((m = this._zEndOfSentence.exec(sText)) !== null) { + yield [iStart, this._zEndOfSentence.lastIndex]; + iStart = this._zEndOfSentence.lastIndex; + } + }, + + _proofread: function (s, sx, nOffset, bParagraph, dDA, dPriority, sCountry, bDebug, bContext) { + let dErrs = new Map(); + let bChange = false; + let bIdRule = option('idrule'); + let m; + let bCondMemo; + let nErrorStart; + + for (let [sOption, lRuleGroup] of this._getRules(bParagraph)) { + if (!sOption || option(sOption)) { + for (let [zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions, lGroups, lNegLookBefore] of lRuleGroup) { + if (!_aIgnoredRules.has(sRuleId)) { + while ((m = zRegex.gl_exec2(s, lGroups, lNegLookBefore)) !== null) { + bCondMemo = null; + /*if (bDebug) { + echo(">>>> Rule # " + sLineId + " - Text: " + s + " opt: "+ sOption); + }*/ + for (let [sFuncCond, cActionType, sWhat, ...eAct] of lActions) { + // action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ] + try { + //echo(oEvalFunc[sFuncCond]); + bCondMemo = (!sFuncCond || oEvalFunc[sFuncCond](s, sx, m, dDA, sCountry, bCondMemo)) + if (bCondMemo) { + switch (cActionType) { + case "-": + // grammar error + //echo("-> error detected in " + sLineId + "\nzRegex: " + zRegex.source); + nErrorStart = nOffset + m.start[eAct[0]]; + if (!dErrs.has(nErrorStart) || nPriority > dPriority.get(nErrorStart)) { + dErrs.set(nErrorStart, this._createError(s, sx, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bIdRule, sOption, bContext)); + dPriority.set(nErrorStart, nPriority); + } + break; + case "~": + // text processor + //echo("-> text processor by " + sLineId + "\nzRegex: " + zRegex.source); + s = this._rewrite(s, sWhat, eAct[0], m, bUppercase); + bChange = true; + if (bDebug) { + echo("~ " + s + " -- " + m[eAct[0]] + " # " + sLineId); + } + break; + case "=": + // disambiguation + //echo("-> disambiguation by " + sLineId + "\nzRegex: " + zRegex.source); + oEvalFunc[sWhat](s, m, dDA); + if (bDebug) { + echo("= " + m[0] + " # " + sLineId + "\nDA: " + dDA.gl_toString()); + } + break; + case ">": + // we do nothing, this test is just a condition to apply all following actions + break; + default: + echo("# error: unknown action at " + sLineId); + } + } else { + if (cActionType == ">") { + break; + } + } + } + catch (e) { + echo(s); + echo("# line id: " + sLineId + "\n# rule id: " + sRuleId); + helpers.logerror(e); + } } } } } } } - } - if (bChange) { - return [s, dErrs]; - } - return [false, dErrs]; -} - -function _createError (s, sx, sRepl, nOffset, m, iGroup, sLineId, sRuleId, bUppercase, sMsg, sURL, bIdRule, sOption, bContext) { - let oErr = {}; - oErr["nStart"] = nOffset + m.start[iGroup]; - oErr["nEnd"] = nOffset + m.end[iGroup]; - oErr["sLineId"] = sLineId; - oErr["sRuleId"] = sRuleId; - oErr["sType"] = (sOption) ? sOption : "notype"; - // suggestions - if (sRepl[0] === "=") { - let sugg = oEvalFunc[sRepl.slice(1)](s, m); - if (sugg) { - if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) { - oErr["aSuggestions"] = capitalizeArray(sugg.split("|")); - } else { - oErr["aSuggestions"] = sugg.split("|"); - } - } else { - oErr["aSuggestions"] = []; - } - } else if (sRepl == "_") { - oErr["aSuggestions"] = []; - } else { - if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) { - oErr["aSuggestions"] = capitalizeArray(sRepl.gl_expand(m).split("|")); - } else { - oErr["aSuggestions"] = sRepl.gl_expand(m).split("|"); - } - } - // Message - if (sMsg[0] === "=") { - sMessage = oEvalFunc[sMsg.slice(1)](s, m) - } else { - sMessage = sMsg.gl_expand(m); - } - if (bIdRule) { - sMessage += " ##" + sLineId + " #" + sRuleId; - } - oErr["sMessage"] = sMessage; - // URL - oErr["URL"] = sURL || ""; - // Context - if (bContext) { - oErr["sUnderlined"] = sx.slice(m.start[iGroup], m.end[iGroup]); - oErr["sBefore"] = sx.slice(Math.max(0, m.start[iGroup]-80), m.start[iGroup]); - oErr["sAfter"] = sx.slice(m.end[iGroup], m.end[iGroup]+80); - } - return oErr; -} - -function _rewrite (s, sRepl, iGroup, m, bUppercase) { - // text processor: write sRepl in s at iGroup position" - let ln = m.end[iGroup] - m.start[iGroup]; - let sNew = ""; - if (sRepl === "*") { - sNew = " ".repeat(ln); - } else if (sRepl === ">" || sRepl === "_" || sRepl === "~") { - sNew = sRepl + " ".repeat(ln-1); - } else if (sRepl === "@") { - sNew = "@".repeat(ln); - } else if (sRepl.slice(0,1) === "=") { - sNew = oEvalFunc[sRepl.slice(1)](s, m); - sNew = sNew + " ".repeat(ln-sNew.length); - if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) { - sNew = sNew.gl_toCapitalize(); - } - } else { - sNew = sRepl.gl_expand(m); - sNew = sNew + " ".repeat(ln-sNew.length); - } - //echo("\n"+s+"\nstart: "+m.start[iGroup]+" end:"+m.end[iGroup]) - return s.slice(0, m.start[iGroup]) + sNew + s.slice(m.end[iGroup]); -} - -function ignoreRule (sRuleId) { - _aIgnoredRules.add(sRuleId); -} - -function resetIgnoreRules () { - _aIgnoredRules.clear(); -} - -function reactivateRule (sRuleId) { - _aIgnoredRules.delete(sRuleId); -} - -function listRules (sFilter=null) { - // generator: returns tuple (sOption, sLineId, sRuleId) - try { - for ([sOption, lRuleGroup] of _getRules(true)) { - for ([_, _, sLineId, sRuleId, _, _] of lRuleGroup) { - if (!sFilter || sRuleId.test(sFilter)) { - yield [sOption, sLineId, sRuleId]; - } - } - } - for ([sOption, lRuleGroup] of _getRules(false)) { - for ([_, _, sLineId, sRuleId, _, _] of lRuleGroup) { - if (!sFilter || sRuleId.test(sFilter)) { - yield [sOption, sLineId, sRuleId]; - } - } - } - } - catch (e) { - helpers.logerror(e); - } -} - - -//////// init - -function load (sContext="JavaScript") { - try { - _oDict = new ibdawg.IBDAWG("${dic_name}.json"); - _sContext = sContext; - _dOptions = gc_options.getOptions(sContext).gl_shallowCopy(); // duplication necessary, to be able to reset to default - } - catch (e) { - helpers.logerror(e); - } -} - -function setOption (sOpt, bVal) { - if (_dOptions.has(sOpt)) { - _dOptions.set(sOpt, bVal); - } -} - -function setOptions (dOpt) { - _dOptions.gl_updateOnlyExistingKeys(dOpt); -} - -function getOptions () { - return _dOptions; -} - -function getDefaultOptions () { - return gc_options.getOptions(_sContext).gl_shallowCopy(); -} - -function resetOptions () { - _dOptions = gc_options.getOptions(_sContext).gl_shallowCopy(); -} - -function getDictionary () { - return _oDict; -} - -function _getRules (bParagraph) { - if (!bParagraph) { - return _rules.lSentenceRules; - } - return _rules.lParagraphRules; -} - - - -//////// common functions + if (bChange) { + return [s, dErrs]; + } + return [false, dErrs]; + }, + + _createError: function (s, sx, sRepl, nOffset, m, iGroup, sLineId, sRuleId, bUppercase, sMsg, sURL, bIdRule, sOption, bContext) { + let oErr = {}; + oErr["nStart"] = nOffset + m.start[iGroup]; + oErr["nEnd"] = nOffset + m.end[iGroup]; + oErr["sLineId"] = sLineId; + oErr["sRuleId"] = sRuleId; + oErr["sType"] = (sOption) ? sOption : "notype"; + // suggestions + if (sRepl[0] === "=") { + let sugg = oEvalFunc[sRepl.slice(1)](s, m); + if (sugg) { + if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) { + oErr["aSuggestions"] = capitalizeArray(sugg.split("|")); + } else { + oErr["aSuggestions"] = sugg.split("|"); + } + } else { + oErr["aSuggestions"] = []; + } + } else if (sRepl == "_") { + oErr["aSuggestions"] = []; + } else { + if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) { + oErr["aSuggestions"] = capitalizeArray(sRepl.gl_expand(m).split("|")); + } else { + oErr["aSuggestions"] = sRepl.gl_expand(m).split("|"); + } + } + // Message + if (sMsg[0] === "=") { + sMessage = oEvalFunc[sMsg.slice(1)](s, m) + } else { + sMessage = sMsg.gl_expand(m); + } + if (bIdRule) { + sMessage += " ##" + sLineId + " #" + sRuleId; + } + oErr["sMessage"] = sMessage; + // URL + oErr["URL"] = sURL || ""; + // Context + if (bContext) { + oErr["sUnderlined"] = sx.slice(m.start[iGroup], m.end[iGroup]); + oErr["sBefore"] = sx.slice(Math.max(0, m.start[iGroup]-80), m.start[iGroup]); + oErr["sAfter"] = sx.slice(m.end[iGroup], m.end[iGroup]+80); + } + return oErr; + }, + + _rewrite: function (s, sRepl, iGroup, m, bUppercase) { + // text processor: write sRepl in s at iGroup position" + let ln = m.end[iGroup] - m.start[iGroup]; + let sNew = ""; + if (sRepl === "*") { + sNew = " ".repeat(ln); + } else if (sRepl === ">" || sRepl === "_" || sRepl === "~") { + sNew = sRepl + " ".repeat(ln-1); + } else if (sRepl === "@") { + sNew = "@".repeat(ln); + } else if (sRepl.slice(0,1) === "=") { + sNew = oEvalFunc[sRepl.slice(1)](s, m); + sNew = sNew + " ".repeat(ln-sNew.length); + if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) { + sNew = sNew.gl_toCapitalize(); + } + } else { + sNew = sRepl.gl_expand(m); + sNew = sNew + " ".repeat(ln-sNew.length); + } + //echo("\n"+s+"\nstart: "+m.start[iGroup]+" end:"+m.end[iGroup]) + return s.slice(0, m.start[iGroup]) + sNew + s.slice(m.end[iGroup]); + }, + + // Actions on rules + + ignoreRule: function (sRuleId) { + _aIgnoredRules.add(sRuleId); + }, + + resetIgnoreRules: function () { + _aIgnoredRules.clear(); + }, + + reactivateRule: function (sRuleId) { + _aIgnoredRules.delete(sRuleId); + }, + + listRules: function (sFilter=null) { + // generator: returns tuple (sOption, sLineId, sRuleId) + try { + for ([sOption, lRuleGroup] of _getRules(true)) { + for ([_, _, sLineId, sRuleId, _, _] of lRuleGroup) { + if (!sFilter || sRuleId.test(sFilter)) { + yield [sOption, sLineId, sRuleId]; + } + } + } + for ([sOption, lRuleGroup] of _getRules(false)) { + for ([_, _, sLineId, sRuleId, _, _] of lRuleGroup) { + if (!sFilter || sRuleId.test(sFilter)) { + yield [sOption, sLineId, sRuleId]; + } + } + } + } + catch (e) { + helpers.logerror(e); + } + }, + + _getRules: function (bParagraph) { + if (!bParagraph) { + return gc_rules.lSentenceRules; + } + return gc_rules.lParagraphRules; + }, + + //// Initialization + + load: function (sContext="JavaScript") { + try { + _oDict = new ibdawg.IBDAWG("${dic_name}.json"); + _sAppContext = sContext; + _dOptions = gc_options.getOptions(sContext).gl_shallowCopy(); // duplication necessary, to be able to reset to default + } + catch (e) { + helpers.logerror(e); + } + }, + + getDictionary: function () { + return _oDict; + }, + + //// Options + + setOption: function (sOpt, bVal) { + if (_dOptions.has(sOpt)) { + _dOptions.set(sOpt, bVal); + } + }, + + setOptions: function (dOpt) { + _dOptions.gl_updateOnlyExistingKeys(dOpt); + }, + + getOptions: function () { + return _dOptions; + }, + + getDefaultOptions: function () { + return gc_options.getOptions(_sAppContext).gl_shallowCopy(); + }, + + resetOptions: function () { + _dOptions = gc_options.getOptions(_sAppContext).gl_shallowCopy(); + } +} + + +//////// Common functions function option (sOpt) { // return true if option sOpt is active return _dOptions.get(sOpt); } @@ -594,10 +599,11 @@ function define (dDA, nPos, lMorph) { dDA.set(nPos, lMorph); return true; } + //////// GRAMMAR CHECKER PLUGINS ${pluginsJS} @@ -605,20 +611,32 @@ ${callablesJS} if (typeof(exports) !== 'undefined') { - exports.load = load; - exports.parse = parse; - exports.lang = lang; - exports.version = version; - exports.getDictionary = getDictionary; - exports.setOption = setOption; - exports.setOptions = setOptions; - exports.getOptions = getOptions; - exports.getDefaultOptions = getDefaultOptions; - exports.resetOptions = resetOptions; - exports.ignoreRule = ignoreRule; - exports.reactivateRule = reactivateRule; - exports.resetIgnoreRules = resetIgnoreRules; - exports.listRules = listRules; + exports.lang = gc_engine.lang; + exports.locales = gc_engine.locales; + exports.pkg = gc_engine.pkg; + exports.name = gc_engine.name; + exports.version = gc_engine.version; + exports.author = gc_engine.author; + exports.parse = gc_engine.parse; + exports._zEndOfSentence = gc_engine._zEndOfSentence; + exports._zBeginOfParagraph = gc_engine._zBeginOfParagraph; + exports._zEndOfParagraph = gc_engine._zEndOfParagraph; + exports._getSentenceBoundaries = gc_engine._getSentenceBoundaries; + exports._proofread = gc_engine._proofread; + exports._createError = gc_engine._createError; + exports._rewrite = gc_engine._rewrite; + exports.ignoreRule = gc_engine.ignoreRule; + exports.resetIgnoreRules = gc_engine.resetIgnoreRules; + exports.reactivateRule = gc_engine.reactivateRule; + exports.listRules = gc_engine.listRules; + exports._getRules = gc_engine._getRules; + exports.load = gc_engine.load; + exports.getDictionary = gc_engine.getDictionary; + exports.setOption = gc_engine.setOption; + exports.setOptions = gc_engine.setOptions; + exports.getOptions = gc_engine.getOptions; + exports.getDefaultOptions = gc_engine.getDefaultOptions; + exports.resetOptions = gc_engine.resetOptions; }