Index: gc_core/js/lang_core/gc_engine.js ================================================================== --- gc_core/js/lang_core/gc_engine.js +++ gc_core/js/lang_core/gc_engine.js @@ -3,18 +3,21 @@ /* jshint esversion:6, -W097 */ /* jslint esversion:6 */ /* global require, exports, console */ "use strict"; + ${string} ${regex} ${map} if (typeof(process) !== 'undefined') { // NodeJS + var spellchecker = require("../graphspell/spellchecker.js"); + var gc_functions = require("./gc_functions.js"); var gc_options = require("./gc_options.js"); var gc_rules = require("./gc_rules.js"); var gc_rules_graph = require("./gc_rules_graph.js"); var cregex = require("./cregex.js"); var text = require("../text.js"); @@ -28,16 +31,10 @@ aNew[i] = aArray[i].slice(0,1).toUpperCase() + aArray[i].slice(1); } return aNew; } -function echo (x) { - console.log(x); - return true; -} - -var _sAppContext = ""; var gc_engine = { //// Informations @@ -58,19 +55,18 @@ //// Initialization load: function (sContext="JavaScript", sColorType="aRGB", sPath="") { try { - _sAppContext = sContext; if (typeof(process) !== 'undefined') { - var spellchecker = require("../graphspell/spellchecker.js"); this.oSpellChecker = new spellchecker.SpellChecker("${lang}", "", "${dic_main_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}"); } else { this.oSpellChecker = new SpellChecker("${lang}", sPath, "${dic_main_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}"); } this.oSpellChecker.activateStorage(); this.oTokenizer = this.oSpellChecker.getTokenizer(); + gc_functions.load(sContext, this.oSpellChecker); gc_options.load(sContext) this.oOptionsColors = gc_options.getOptionsColors(sContext, sColorType); } catch (e) { console.error(e); @@ -300,11 +296,12 @@ while ((m = zRegex.gl_exec2(sText, lGroups, lNegLookBefore)) !== null) { let bCondMemo = null; for (let [sFuncCond, cActionType, sWhat, ...eAct] of lActions) { // action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ] try { - bCondMemo = (!sFuncCond || oEvalFunc[sFuncCond](sText, sText0, m, this.dTokenPos, sCountry, bCondMemo)); + bCondMemo = (!sFuncCond || gc_functions[sFuncCond](sText, sText0, m, this.dTokenPos, sCountry, bCondMemo)); + //bCondMemo = (!sFuncCond || oEvalFunc[sFuncCond](sText, sText0, m, this.dTokenPos, sCountry, bCondMemo)); if (bCondMemo) { switch (cActionType) { case "-": // grammar error //console.log("-> error detected in " + sLineId + "\nzRegex: " + zRegex.source); @@ -325,11 +322,12 @@ } break; case "=": // disambiguation //console.log("-> disambiguation by " + sLineId + "\nzRegex: " + zRegex.source); - oEvalFunc[sWhat](sText, m, this.dTokenPos); + gc_functions[sWhat](sText, m, this.dTokenPos); + //oEvalFunc[sWhat](sText, m, this.dTokenPos); if (bDebug) { console.log("= " + m[0] + " # " + sLineId, "\nDA:", this.dTokenPos); } break; case ">": @@ -668,11 +666,12 @@ // Disambiguator [ option, condition, "=", replacement/suggestion/action ] // Tag [ option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ] // Immunity [ option, condition, "!", "", iTokenStart, iTokenEnd ] // Test [ option, condition, ">", "" ] if (!sOption || dOptions.gl_get(sOption, false)) { - bCondMemo = !sFuncCond || oEvalFunc[sFuncCond](this.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, this.dTags, this.sSentence, this.sSentence0); + bCondMemo = !sFuncCond || gc_functions[sFuncCond](this.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, this.dTags, this.sSentence, this.sSentence0); + //bCondMemo = !sFuncCond || oEvalFunc[sFuncCond](this.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, this.dTags, this.sSentence, this.sSentence0); if (bCondMemo) { if (cActionType == "-") { // grammar error let [iTokenStart, iTokenEnd, cStartLimit, cEndLimit, bCaseSvty, nPriority, sMessage, iURL] = eAct; let nTokenErrorStart = (iTokenStart > 0) ? nTokenOffset + iTokenStart : nLastToken + iTokenStart; @@ -701,11 +700,12 @@ console.log(` TEXT_PROCESSOR: [${this.lToken[nTokenStart]["sValue"]}:${this.lToken[nTokenEnd]["sValue"]}] > ${sWhat}`); } } else if (cActionType == "=") { // disambiguation - oEvalFunc[sWhat](this.lToken, nTokenOffset, nLastToken); + gc_functions[sWhat](this.lToken, nTokenOffset, nLastToken); + //oEvalFunc[sWhat](this.lToken, nTokenOffset, nLastToken); if (bDebug) { console.log(` DISAMBIGUATOR: (${sWhat}) [${this.lToken[nTokenOffset+1]["sValue"]}:${this.lToken[nLastToken]["sValue"]}]`); } } else if (cActionType == ">") { @@ -783,11 +783,12 @@ let nStart = nOffset + m.start[iGroup]; let nEnd = nOffset + m.end[iGroup]; // suggestions let lSugg = []; if (sSugg.startsWith("=")) { - sSugg = oEvalFunc[sSugg.slice(1)](sText, m); + sSugg = gc_functions[sSugg.slice(1)](sText, m); + //sSugg = oEvalFunc[sSugg.slice(1)](sText, m); lSugg = (sSugg) ? sSugg.split("|") : []; } else if (sSugg == "_") { lSugg = []; } else { lSugg = sSugg.gl_expand(m).split("|"); @@ -794,11 +795,12 @@ } if (bCaseSvty && lSugg.length > 0 && m[iGroup].slice(0,1).gl_isUpperCase()) { lSugg = (m[iGroup].gl_isUpperCase()) ? lSugg.map((s) => s.toUpperCase()) : capitalizeArray(lSugg); } // Message - let sMessage = (sMsg.startsWith("=")) ? oEvalFunc[sMsg.slice(1)](sText, m) : sMsg.gl_expand(m); + let sMessage = (sMsg.startsWith("=")) ? gc_functions[sMsg.slice(1)](sText, m) : sMsg.gl_expand(m); + //let sMessage = (sMsg.startsWith("=")) ? oEvalFunc[sMsg.slice(1)](sText, m) : sMsg.gl_expand(m); if (bShowRuleId) { sMessage += " #" + sLineId + " / " + sRuleId; } // return this._createError(nStart, nEnd, sLineId, sRuleId, sOption, sMessage, lSugg, sURL, bContext); @@ -806,11 +808,12 @@ _createErrorFromTokens (sSugg, nTokenOffset, nLastToken, iFirstToken, nStart, nEnd, sLineId, sRuleId, bCaseSvty, sMsg, sURL, bShowRuleId, sOption, bContext) { // suggestions let lSugg = []; if (sSugg.startsWith("=")) { - sSugg = oEvalFunc[sSugg.slice(1)](this.lToken, nTokenOffset, nLastToken); + sSugg = gc_functions[sSugg.slice(1)](this.lToken, nTokenOffset, nLastToken); + //sSugg = oEvalFunc[sSugg.slice(1)](this.lToken, nTokenOffset, nLastToken); lSugg = (sSugg) ? sSugg.split("|") : []; } else if (sSugg == "_") { lSugg = []; } else { lSugg = this._expand(sSugg, nTokenOffset, nLastToken).split("|"); @@ -817,11 +820,12 @@ } if (bCaseSvty && lSugg.length > 0 && this.lToken[iFirstToken]["sValue"].slice(0,1).gl_isUpperCase()) { lSugg = (this.sSentence.slice(nStart, nEnd).gl_isUpperCase()) ? lSugg.map((s) => s.toUpperCase()) : capitalizeArray(lSugg); } // Message - let sMessage = (sMsg.startsWith("=")) ? oEvalFunc[sMsg.slice(1)](this.lToken, nTokenOffset, nLastToken) : this._expand(sMsg, nTokenOffset, nLastToken); + let sMessage = (sMsg.startsWith("=")) ? gc_functions[sMsg.slice(1)](this.lToken, nTokenOffset, nLastToken) : this._expand(sMsg, nTokenOffset, nLastToken); + //let sMessage = (sMsg.startsWith("=")) ? oEvalFunc[sMsg.slice(1)](this.lToken, nTokenOffset, nLastToken) : this._expand(sMsg, nTokenOffset, nLastToken); if (bShowRuleId) { sMessage += " #" + sLineId + " / " + sRuleId; } // return this._createError(nStart, nEnd, sLineId, sRuleId, sOption, sMessage, lSugg, sURL, bContext); @@ -871,11 +875,12 @@ } else if (sRepl === "@") { sNew = "@".repeat(ln); } else if (sRepl.slice(0,1) === "=") { - sNew = oEvalFunc[sRepl.slice(1)](sText, m); + sNew = gc_functions[sRepl.slice(1)](sText, m); + //sNew = oEvalFunc[sRepl.slice(1)](sText, m); sNew = sNew + " ".repeat(ln-sNew.length); if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) { sNew = sNew.gl_toCapitalize(); } } else { @@ -912,11 +917,12 @@ } } } else { if (sWhat.startsWith("=")) { - sWhat = oEvalFunc[sWhat.slice(1)](this.lToken, nTokenOffset, nLastToken); + sWhat = gc_functions[sWhat.slice(1)](this.lToken, nTokenOffset, nLastToken); + //sWhat = oEvalFunc[sWhat.slice(1)](this.lToken, nTokenOffset, nLastToken); } else { sWhat = this._expand(sWhat, nTokenOffset, nLastToken); } let bUppercase = bCaseSvty && this.lToken[nTokenRewriteStart]["sValue"].slice(0,1).gl_isUpperCase(); if (nTokenRewriteEnd - nTokenRewriteStart == 0) { @@ -1020,484 +1026,10 @@ this.lToken.length = 0; this.lToken = lNewToken; } }; - -//////// Common functions - -function option (sOpt) { - // return true if option sOpt is active - return gc_options.dOptions.gl_get(sOpt, false); -} - -var re = { - search: function (sRegex, sText) { - if (sRegex.startsWith("(?i)")) { - return sText.search(new RegExp(sRegex.slice(4), "i")) !== -1; - } else { - return sText.search(sRegex) !== -1; - } - }, - - createRegExp: function (sRegex) { - if (sRegex.startsWith("(?i)")) { - return new RegExp(sRegex.slice(4), "i"); - } else { - return new RegExp(sRegex); - } - } -} - - -//////// functions to get text outside pattern scope - -// warning: check compile_rules.py to understand how it works - -function nextword (s, iStart, n) { - // get the nth word of the input string or empty string - let z = new RegExp("^(?: +[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+){" + (n-1).toString() + "} +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+)", "ig"); - let m = z.exec(s.slice(iStart)); - if (!m) { - return null; - } - return [iStart + z.lastIndex - m[1].length, m[1]]; -} - -function prevword (s, iEnd, n) { - // get the (-)nth word of the input string or empty string - let z = new RegExp("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+) +(?:[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+ +){" + (n-1).toString() + "}$", "i"); - let m = z.exec(s.slice(0, iEnd)); - if (!m) { - return null; - } - return [m.index, m[1]]; -} - -function nextword1 (s, iStart) { - // get next word (optimization) - let _zNextWord = new RegExp ("^ +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_-]*)", "ig"); - let m = _zNextWord.exec(s.slice(iStart)); - if (!m) { - return null; - } - return [iStart + _zNextWord.lastIndex - m[1].length, m[1]]; -} - -const _zPrevWord = new RegExp ("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_-]*) +$", "i"); - -function prevword1 (s, iEnd) { - // get previous word (optimization) - let m = _zPrevWord.exec(s.slice(0, iEnd)); - if (!m) { - return null; - } - return [m.index, m[1]]; -} - -function look (s, sPattern, sNegPattern=null) { - // seek sPattern in s (before/after/fulltext), if antipattern sNegPattern not in s - try { - if (sNegPattern && re.search(sNegPattern, s)) { - return false; - } - return re.search(sPattern, s); - } - catch (e) { - console.error(e); - } - return false; -} - - -//////// Analyse groups for regex rules - -function displayInfo (dTokenPos, aWord) { - // for debugging: info of word - if (!aWord) { - console.log("> nothing to find"); - return true; - } - let lMorph = gc_engine.oSpellChecker.getMorph(aWord[1]); - if (lMorph.length === 0) { - console.log("> not in dictionary"); - return true; - } - if (dTokenPos.has(aWord[0])) { - console.log("DA: " + dTokenPos.get(aWord[0])); - } - console.log("FSA: " + lMorph); - return true; -} - -function morph (dTokenPos, aWord, sPattern, sNegPattern, bNoWord=false) { - // analyse a tuple (position, word), returns true if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on) - if (!aWord) { - return bNoWord; - } - let lMorph = (dTokenPos.has(aWord[0]) && dTokenPos.get(aWord[0]))["lMorph"] ? dTokenPos.get(aWord[0])["lMorph"] : gc_engine.oSpellChecker.getMorph(aWord[1]); - if (lMorph.length === 0) { - return false; - } - if (sNegPattern) { - // check negative condition - if (sNegPattern === "*") { - // all morph must match sPattern - return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); - } - else { - if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { - return false; - } - } - } - // search sPattern - return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); -} - -function analyse (sWord, sPattern, sNegPattern) { - // analyse a word, returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation off) - let lMorph = gc_engine.oSpellChecker.getMorph(sWord); - if (lMorph.length === 0) { - return false; - } - if (sNegPattern) { - // check negative condition - if (sNegPattern === "*") { - // all morph must match sPattern - return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); - } - else { - if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { - return false; - } - } - } - // search sPattern - return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); -} - - -//// Analyse tokens for graph rules - -function g_value (oToken, sValues, nLeft=null, nRight=null) { - // test if is in sValues (each value should be separated with |) - let sValue = (nLeft === null) ? "|"+oToken["sValue"]+"|" : "|"+oToken["sValue"].slice(nLeft, nRight)+"|"; - if (sValues.includes(sValue)) { - return true; - } - if (oToken["sValue"].slice(0,2).gl_isTitle()) { // we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". - if (sValues.includes(sValue.toLowerCase())) { - return true; - } - } - else if (oToken["sValue"].gl_isUpperCase()) { - //if sValue.lower() in sValues: - // return true; - sValue = "|"+sValue.slice(1).gl_toCapitalize(); - if (sValues.includes(sValue)) { - return true; - } - sValue = sValue.toLowerCase(); - if (sValues.includes(sValue)) { - return true; - } - } - return false; -} - -function g_morph (oToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) { - // analyse a token, return True if not in morphologies and in morphologies - let lMorph; - if (oToken.hasOwnProperty("lMorph")) { - lMorph = oToken["lMorph"]; - } - else { - if (nLeft !== null) { - let sValue = (nRight !== null) ? oToken["sValue"].slice(nLeft, nRight) : oToken["sValue"].slice(nLeft); - lMorph = gc_engine.oSpellChecker.getMorph(sValue); - if (bMemorizeMorph) { - oToken["lMorph"] = lMorph; - } - } else { - lMorph = gc_engine.oSpellChecker.getMorph(oToken["sValue"]); - } - } - if (lMorph.length == 0) { - return false; - } - // check negative condition - if (sNegPattern) { - if (sNegPattern == "*") { - // all morph must match sPattern - return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); - } - else { - if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { - return false; - } - } - } - // search sPattern - return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); -} - -function g_analyse (oToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) { - // analyse a token, return True if not in morphologies and in morphologies - let lMorph; - if (nLeft !== null) { - let sValue = (nRight !== null) ? oToken["sValue"].slice(nLeft, nRight) : oToken["sValue"].slice(nLeft); - lMorph = gc_engine.oSpellChecker.getMorph(sValue); - if (bMemorizeMorph) { - oToken["lMorph"] = lMorph; - } - } else { - lMorph = gc_engine.oSpellChecker.getMorph(oToken["sValue"]); - } - if (lMorph.length == 0) { - return false; - } - // check negative condition - if (sNegPattern) { - if (sNegPattern == "*") { - // all morph must match sPattern - return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); - } - else { - if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { - return false; - } - } - } - // search sPattern - return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); -} - -function g_merged_analyse (oToken1, oToken2, cMerger, sPattern, sNegPattern="", bSetMorph=true) { - // merge two token values, return True if not in morphologies and in morphologies (disambiguation off) - let lMorph = gc_engine.oSpellChecker.getMorph(oToken1["sValue"] + cMerger + oToken2["sValue"]); - if (lMorph.length == 0) { - return false; - } - // check negative condition - if (sNegPattern) { - if (sNegPattern == "*") { - // all morph must match sPattern - let bResult = lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); - if (bResult && bSetMorph) { - oToken1["lMorph"] = lMorph; - } - return bResult; - } - else { - if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { - return false; - } - } - } - // search sPattern - let bResult = lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); - if (bResult && bSetMorph) { - oToken1["lMorph"] = lMorph; - } - return bResult; -} - -function g_tag_before (oToken, dTags, sTag) { - if (!dTags.has(sTag)) { - return false; - } - if (oToken["i"] > dTags.get(sTag)[0]) { - return true; - } - return false; -} - -function g_tag_after (oToken, dTags, sTag) { - if (!dTags.has(sTag)) { - return false; - } - if (oToken["i"] < dTags.get(sTag)[1]) { - return true; - } - return false; -} - -function g_tag (oToken, sTag) { - return oToken.hasOwnProperty("aTags") && oToken["aTags"].has(sTag); -} - -function g_space_between_tokens (oToken1, oToken2, nMin, nMax=null) { - let nSpace = oToken2["nStart"] - oToken1["nEnd"] - if (nSpace < nMin) { - return false; - } - if (nMax !== null && nSpace > nMax) { - return false; - } - return true; -} - -function g_token (lToken, i) { - if (i < 0) { - return lToken[0]; - } - if (i >= lToken.length) { - return lToken[lToken.length-1]; - } - return lToken[i]; -} - - -//////// Disambiguator - -function select (dTokenPos, nPos, sWord, sPattern, lDefault=null) { - if (!sWord) { - return true; - } - if (!dTokenPos.has(nPos)) { - console.log("Error. There should be a token at this position: ", nPos); - return true; - } - let lMorph = gc_engine.oSpellChecker.getMorph(sWord); - if (lMorph.length === 0 || lMorph.length === 1) { - return true; - } - let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) !== -1 ); - if (lSelect.length > 0) { - if (lSelect.length != lMorph.length) { - dTokenPos.get(nPos)["lMorph"] = lSelect; - } - } else if (lDefault) { - dTokenPos.get(nPos)["lMorph"] = lDefault; - } - return true; -} - -function exclude (dTokenPos, nPos, sWord, sPattern, lDefault=null) { - if (!sWord) { - return true; - } - if (!dTokenPos.has(nPos)) { - console.log("Error. There should be a token at this position: ", nPos); - return true; - } - let lMorph = gc_engine.oSpellChecker.getMorph(sWord); - if (lMorph.length === 0 || lMorph.length === 1) { - return true; - } - let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) === -1 ); - if (lSelect.length > 0) { - if (lSelect.length != lMorph.length) { - dTokenPos.get(nPos)["lMorph"] = lSelect; - } - } else if (lDefault) { - dTokenPos.get(nPos)["lMorph"] = lDefault; - } - return true; -} - -function define (dTokenPos, nPos, sMorphs) { - dTokenPos.get(nPos)["lMorph"] = sMorphs.split("|"); - return true; -} - - -//// Disambiguation for graph rules - -function g_select (oToken, sPattern, lDefault=null) { - // select morphologies for according to , always return true - let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : gc_engine.oSpellChecker.getMorph(oToken["sValue"]); - if (lMorph.length === 0 || lMorph.length === 1) { - if (lDefault) { - oToken["lMorph"] = lDefault; - } - return true; - } - let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) !== -1 ); - if (lSelect.length > 0) { - if (lSelect.length != lMorph.length) { - oToken["lMorph"] = lSelect; - } - } else if (lDefault) { - oToken["lMorph"] = lDefault; - } - return true; -} - -function g_exclude (oToken, sPattern, lDefault=null) { - // select morphologies for according to , always return true - let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : gc_engine.oSpellChecker.getMorph(oToken["sValue"]); - if (lMorph.length === 0 || lMorph.length === 1) { - if (lDefault) { - oToken["lMorph"] = lDefault; - } - return true; - } - let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) === -1 ); - if (lSelect.length > 0) { - if (lSelect.length != lMorph.length) { - oToken["lMorph"] = lSelect; - } - } else if (lDefault) { - oToken["lMorph"] = lDefault; - } - return true; -} - -function g_add_morph (oToken, sNewMorph) { - // Disambiguation: add a morphology to a token - let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : gc_engine.oSpellChecker.getMorph(oToken["sValue"]); - lMorph.push(...sNewMorph.split("|")); - oToken["lMorph"] = lMorph; - return true; -} - -function g_rewrite (oToken, sToReplace, sReplace) { - // Disambiguation: rewrite morphologies - let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : gc_engine.oSpellChecker.getMorph(oToken["sValue"]); - oToken["lMorph"] = lMorph.map(s => s.replace(sToReplace, sReplace)); - return true; -} - -function g_define (oToken, sMorphs) { - // set morphologies of , always return true - oToken["lMorph"] = sMorphs.split("|"); - return true; -} - -function g_define_from (oToken, nLeft=null, nRight=null) { - let sValue = oToken["sValue"]; - if (nLeft !== null) { - sValue = (nRight !== null) ? sValue.slice(nLeft, nRight) : sValue.slice(nLeft); - } - oToken["lMorph"] = gc_engine.oSpellChecker.getMorph(sValue); - return true; -} - -function g_change_meta (oToken, sType) { - // Disambiguation: change type of token - oToken["sType"] = sType; - return true; -} - - - -//////// GRAMMAR CHECKER PLUGINS - -${pluginsJS} - - -// generated code, do not edit -const oEvalFunc = { - // callables for regex rules -${callablesJS} - - // callables for graph rules -${graph_callablesJS} -} - if (typeof(exports) !== 'undefined') { exports.lang = gc_engine.lang; exports.locales = gc_engine.locales; exports.pkg = gc_engine.pkg; ADDED gc_core/js/lang_core/gc_functions.js Index: gc_core/js/lang_core/gc_functions.js ================================================================== --- gc_core/js/lang_core/gc_functions.js +++ gc_core/js/lang_core/gc_functions.js @@ -0,0 +1,505 @@ +// JavaScript +// Grammar checker engine functions + +${string} +${regex} +${map} + + +if (typeof(process) !== 'undefined') { + var gc_options = require("./gc_options.js"); +} + + +let _sAppContext = "JavaScript"; // what software is running +let _oSpellChecker = null; + + +//////// Common functions + +function option (sOpt) { + // return true if option sOpt is active + return gc_options.dOptions.gl_get(sOpt, false); +} + +function echo (x) { + console.log(x); + return true; +} + +var re = { + search: function (sRegex, sText) { + if (sRegex.startsWith("(?i)")) { + return sText.search(new RegExp(sRegex.slice(4), "i")) !== -1; + } else { + return sText.search(sRegex) !== -1; + } + }, + + createRegExp: function (sRegex) { + if (sRegex.startsWith("(?i)")) { + return new RegExp(sRegex.slice(4), "i"); + } else { + return new RegExp(sRegex); + } + } +} + + +//////// functions to get text outside pattern scope + +// warning: check compile_rules.py to understand how it works + +function nextword (s, iStart, n) { + // get the nth word of the input string or empty string + let z = new RegExp("^(?: +[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+){" + (n-1).toString() + "} +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+)", "ig"); + let m = z.exec(s.slice(iStart)); + if (!m) { + return null; + } + return [iStart + z.lastIndex - m[1].length, m[1]]; +} + +function prevword (s, iEnd, n) { + // get the (-)nth word of the input string or empty string + let z = new RegExp("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+) +(?:[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+ +){" + (n-1).toString() + "}$", "i"); + let m = z.exec(s.slice(0, iEnd)); + if (!m) { + return null; + } + return [m.index, m[1]]; +} + +function nextword1 (s, iStart) { + // get next word (optimization) + let _zNextWord = new RegExp ("^ +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_-]*)", "ig"); + let m = _zNextWord.exec(s.slice(iStart)); + if (!m) { + return null; + } + return [iStart + _zNextWord.lastIndex - m[1].length, m[1]]; +} + +const _zPrevWord = new RegExp ("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_-]*) +$", "i"); + +function prevword1 (s, iEnd) { + // get previous word (optimization) + let m = _zPrevWord.exec(s.slice(0, iEnd)); + if (!m) { + return null; + } + return [m.index, m[1]]; +} + +function look (s, sPattern, sNegPattern=null) { + // seek sPattern in s (before/after/fulltext), if antipattern sNegPattern not in s + try { + if (sNegPattern && re.search(sNegPattern, s)) { + return false; + } + return re.search(sPattern, s); + } + catch (e) { + console.error(e); + } + return false; +} + + +//////// Analyse groups for regex rules + +function displayInfo (dTokenPos, aWord) { + // for debugging: info of word + if (!aWord) { + console.log("> nothing to find"); + return true; + } + let lMorph = gc_engine.oSpellChecker.getMorph(aWord[1]); + if (lMorph.length === 0) { + console.log("> not in dictionary"); + return true; + } + if (dTokenPos.has(aWord[0])) { + console.log("DA: " + dTokenPos.get(aWord[0])); + } + console.log("FSA: " + lMorph); + return true; +} + +function morph (dTokenPos, aWord, sPattern, sNegPattern, bNoWord=false) { + // analyse a tuple (position, word), returns true if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on) + if (!aWord) { + return bNoWord; + } + let lMorph = (dTokenPos.has(aWord[0]) && dTokenPos.get(aWord[0]))["lMorph"] ? dTokenPos.get(aWord[0])["lMorph"] : gc_engine.oSpellChecker.getMorph(aWord[1]); + if (lMorph.length === 0) { + return false; + } + if (sNegPattern) { + // check negative condition + if (sNegPattern === "*") { + // all morph must match sPattern + return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); + } + else { + if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { + return false; + } + } + } + // search sPattern + return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); +} + +function analyse (sWord, sPattern, sNegPattern) { + // analyse a word, returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation off) + let lMorph = gc_engine.oSpellChecker.getMorph(sWord); + if (lMorph.length === 0) { + return false; + } + if (sNegPattern) { + // check negative condition + if (sNegPattern === "*") { + // all morph must match sPattern + return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); + } + else { + if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { + return false; + } + } + } + // search sPattern + return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); +} + + +//// Analyse tokens for graph rules + +function g_value (oToken, sValues, nLeft=null, nRight=null) { + // test if is in sValues (each value should be separated with |) + let sValue = (nLeft === null) ? "|"+oToken["sValue"]+"|" : "|"+oToken["sValue"].slice(nLeft, nRight)+"|"; + if (sValues.includes(sValue)) { + return true; + } + if (oToken["sValue"].slice(0,2).gl_isTitle()) { // we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". + if (sValues.includes(sValue.toLowerCase())) { + return true; + } + } + else if (oToken["sValue"].gl_isUpperCase()) { + //if sValue.lower() in sValues: + // return true; + sValue = "|"+sValue.slice(1).gl_toCapitalize(); + if (sValues.includes(sValue)) { + return true; + } + sValue = sValue.toLowerCase(); + if (sValues.includes(sValue)) { + return true; + } + } + return false; +} + +function g_morph (oToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) { + // analyse a token, return True if not in morphologies and in morphologies + let lMorph; + if (oToken.hasOwnProperty("lMorph")) { + lMorph = oToken["lMorph"]; + } + else { + if (nLeft !== null) { + let sValue = (nRight !== null) ? oToken["sValue"].slice(nLeft, nRight) : oToken["sValue"].slice(nLeft); + lMorph = gc_engine.oSpellChecker.getMorph(sValue); + if (bMemorizeMorph) { + oToken["lMorph"] = lMorph; + } + } else { + lMorph = gc_engine.oSpellChecker.getMorph(oToken["sValue"]); + } + } + if (lMorph.length == 0) { + return false; + } + // check negative condition + if (sNegPattern) { + if (sNegPattern == "*") { + // all morph must match sPattern + return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); + } + else { + if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { + return false; + } + } + } + // search sPattern + return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); +} + +function g_analyse (oToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) { + // analyse a token, return True if not in morphologies and in morphologies + let lMorph; + if (nLeft !== null) { + let sValue = (nRight !== null) ? oToken["sValue"].slice(nLeft, nRight) : oToken["sValue"].slice(nLeft); + lMorph = gc_engine.oSpellChecker.getMorph(sValue); + if (bMemorizeMorph) { + oToken["lMorph"] = lMorph; + } + } else { + lMorph = gc_engine.oSpellChecker.getMorph(oToken["sValue"]); + } + if (lMorph.length == 0) { + return false; + } + // check negative condition + if (sNegPattern) { + if (sNegPattern == "*") { + // all morph must match sPattern + return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); + } + else { + if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { + return false; + } + } + } + // search sPattern + return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); +} + +function g_merged_analyse (oToken1, oToken2, cMerger, sPattern, sNegPattern="", bSetMorph=true) { + // merge two token values, return True if not in morphologies and in morphologies (disambiguation off) + let lMorph = gc_engine.oSpellChecker.getMorph(oToken1["sValue"] + cMerger + oToken2["sValue"]); + if (lMorph.length == 0) { + return false; + } + // check negative condition + if (sNegPattern) { + if (sNegPattern == "*") { + // all morph must match sPattern + let bResult = lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); + if (bResult && bSetMorph) { + oToken1["lMorph"] = lMorph; + } + return bResult; + } + else { + if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { + return false; + } + } + } + // search sPattern + let bResult = lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); + if (bResult && bSetMorph) { + oToken1["lMorph"] = lMorph; + } + return bResult; +} + +function g_tag_before (oToken, dTags, sTag) { + if (!dTags.has(sTag)) { + return false; + } + if (oToken["i"] > dTags.get(sTag)[0]) { + return true; + } + return false; +} + +function g_tag_after (oToken, dTags, sTag) { + if (!dTags.has(sTag)) { + return false; + } + if (oToken["i"] < dTags.get(sTag)[1]) { + return true; + } + return false; +} + +function g_tag (oToken, sTag) { + return oToken.hasOwnProperty("aTags") && oToken["aTags"].has(sTag); +} + +function g_space_between_tokens (oToken1, oToken2, nMin, nMax=null) { + let nSpace = oToken2["nStart"] - oToken1["nEnd"] + if (nSpace < nMin) { + return false; + } + if (nMax !== null && nSpace > nMax) { + return false; + } + return true; +} + +function g_token (lToken, i) { + if (i < 0) { + return lToken[0]; + } + if (i >= lToken.length) { + return lToken[lToken.length-1]; + } + return lToken[i]; +} + + +//////// Disambiguator for regex rules + +function select (dTokenPos, nPos, sWord, sPattern, lDefault=null) { + if (!sWord) { + return true; + } + if (!dTokenPos.has(nPos)) { + console.log("Error. There should be a token at this position: ", nPos); + return true; + } + let lMorph = gc_engine.oSpellChecker.getMorph(sWord); + if (lMorph.length === 0 || lMorph.length === 1) { + return true; + } + let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) !== -1 ); + if (lSelect.length > 0) { + if (lSelect.length != lMorph.length) { + dTokenPos.get(nPos)["lMorph"] = lSelect; + } + } else if (lDefault) { + dTokenPos.get(nPos)["lMorph"] = lDefault; + } + return true; +} + +function exclude (dTokenPos, nPos, sWord, sPattern, lDefault=null) { + if (!sWord) { + return true; + } + if (!dTokenPos.has(nPos)) { + console.log("Error. There should be a token at this position: ", nPos); + return true; + } + let lMorph = gc_engine.oSpellChecker.getMorph(sWord); + if (lMorph.length === 0 || lMorph.length === 1) { + return true; + } + let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) === -1 ); + if (lSelect.length > 0) { + if (lSelect.length != lMorph.length) { + dTokenPos.get(nPos)["lMorph"] = lSelect; + } + } else if (lDefault) { + dTokenPos.get(nPos)["lMorph"] = lDefault; + } + return true; +} + +function define (dTokenPos, nPos, sMorphs) { + dTokenPos.get(nPos)["lMorph"] = sMorphs.split("|"); + return true; +} + + +//// Disambiguation for graph rules + +function g_select (oToken, sPattern, lDefault=null) { + // select morphologies for according to , always return true + let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : gc_engine.oSpellChecker.getMorph(oToken["sValue"]); + if (lMorph.length === 0 || lMorph.length === 1) { + if (lDefault) { + oToken["lMorph"] = lDefault; + } + return true; + } + let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) !== -1 ); + if (lSelect.length > 0) { + if (lSelect.length != lMorph.length) { + oToken["lMorph"] = lSelect; + } + } else if (lDefault) { + oToken["lMorph"] = lDefault; + } + return true; +} + +function g_exclude (oToken, sPattern, lDefault=null) { + // select morphologies for according to , always return true + let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : gc_engine.oSpellChecker.getMorph(oToken["sValue"]); + if (lMorph.length === 0 || lMorph.length === 1) { + if (lDefault) { + oToken["lMorph"] = lDefault; + } + return true; + } + let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) === -1 ); + if (lSelect.length > 0) { + if (lSelect.length != lMorph.length) { + oToken["lMorph"] = lSelect; + } + } else if (lDefault) { + oToken["lMorph"] = lDefault; + } + return true; +} + +function g_add_morph (oToken, sNewMorph) { + // Disambiguation: add a morphology to a token + let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : gc_engine.oSpellChecker.getMorph(oToken["sValue"]); + lMorph.push(...sNewMorph.split("|")); + oToken["lMorph"] = lMorph; + return true; +} + +function g_rewrite (oToken, sToReplace, sReplace) { + // Disambiguation: rewrite morphologies + let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : gc_engine.oSpellChecker.getMorph(oToken["sValue"]); + oToken["lMorph"] = lMorph.map(s => s.replace(sToReplace, sReplace)); + return true; +} + +function g_define (oToken, sMorphs) { + // set morphologies of , always return true + oToken["lMorph"] = sMorphs.split("|"); + return true; +} + +function g_define_from (oToken, nLeft=null, nRight=null) { + let sValue = oToken["sValue"]; + if (nLeft !== null) { + sValue = (nRight !== null) ? sValue.slice(nLeft, nRight) : sValue.slice(nLeft); + } + oToken["lMorph"] = gc_engine.oSpellChecker.getMorph(sValue); + return true; +} + +function g_change_meta (oToken, sType) { + // Disambiguation: change type of token + oToken["sType"] = sType; + return true; +} + + + +//////// GRAMMAR CHECKER PLUGINS + +${pluginsJS} + + +// generated code, do not edit +var gc_functions = { + + load: function (sContext, oSpellChecker) { + _sAppContext = sContext + _oSpellChecker = oSpellChecker + }, + + // callables for regex rules +${callablesJS} + + // callables for graph rules +${graph_callablesJS} +} + + +if (typeof(exports) !== 'undefined') { + exports.load = gc_functions.load; +} Index: gc_lang/fr/webext/gce_worker.js ================================================================== --- gc_lang/fr/webext/gce_worker.js +++ gc_lang/fr/webext/gce_worker.js @@ -43,10 +43,11 @@ importScripts("grammalecte/fr/conj.js"); importScripts("grammalecte/fr/mfsp.js"); importScripts("grammalecte/fr/phonet.js"); importScripts("grammalecte/fr/cregex.js"); importScripts("grammalecte/fr/gc_options.js"); +importScripts("grammalecte/fr/gc_functions.js"); importScripts("grammalecte/fr/gc_rules.js"); importScripts("grammalecte/fr/gc_rules_graph.js"); importScripts("grammalecte/fr/gc_engine.js"); importScripts("grammalecte/tests.js"); /*