Index: gc_core/js/lang_core/gc_engine.js ================================================================== --- gc_core/js/lang_core/gc_engine.js +++ gc_core/js/lang_core/gc_engine.js @@ -9,17 +9,20 @@ ${string} ${regex} ${map} -if(typeof(process) !== 'undefined') { +if (typeof(process) !== 'undefined') { + var gc_engine_func = require("./gc_engine_func.js"); var gc_options = require("./gc_options.js"); var gc_rules = require("./gc_rules.js"); var gc_rules_graph = require("./gc_rules_graph.js"); var cregex = require("./cregex.js"); var text = require("../text.js"); -} else if (typeof(require) !== 'undefined') { +} +else if (typeof(require) !== 'undefined') { + var gc_engine_func = require("resource://grammalecte/${lang}/gc_engine_func.js"); var gc_options = require("resource://grammalecte/${lang}/gc_options.js"); var gc_rules = require("resource://grammalecte/${lang}/gc_rules.js"); var gc_rules_graph = require("resource://grammalecte/${lang}/gc_rules_graph.js"); var cregex = require("resource://grammalecte/${lang}/cregex.js"); var text = require("resource://grammalecte/text.js"); @@ -33,25 +36,10 @@ aNew[i] = aArray[i].slice(0,1).toUpperCase() + aArray[i].slice(1); } return aNew; } - -// data -let _sAppContext = ""; // what software is running -let _dOptions = null; -let _dOptionsColors = null; -let _oSpellChecker = null; -let _oTokenizer = null; -let _aIgnoredRules = new Set(); - - -function echo (x) { - console.log(x); - return true; -} - var gc_engine = { //// Informations @@ -60,36 +48,46 @@ pkg: "${implname}", name: "${name}", version: "${version}", author: "${author}", + //// Tools + oSpellChecker: null, + oTokenizer: null, + + //// Data + aIgnoredRules: new Set(), + oOptionsColors: null, + //// Initialization load: function (sContext="JavaScript", sColorType="aRGB", sPath="") { try { - if(typeof(process) !== 'undefined') { + if (typeof(process) !== 'undefined') { var spellchecker = require("../graphspell/spellchecker.js"); - _oSpellChecker = new spellchecker.SpellChecker("${lang}", "", "${dic_main_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}"); - } else if (typeof(require) !== 'undefined') { + this.oSpellChecker = new spellchecker.SpellChecker("${lang}", "", "${dic_main_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}"); + } + else if (typeof(require) !== 'undefined') { var spellchecker = require("resource://grammalecte/graphspell/spellchecker.js"); - _oSpellChecker = new spellchecker.SpellChecker("${lang}", "", "${dic_main_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}"); - } else { - _oSpellChecker = new SpellChecker("${lang}", sPath, "${dic_main_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}"); - } - _sAppContext = sContext; - _dOptions = gc_options.getOptions(sContext).gl_shallowCopy(); // duplication necessary, to be able to reset to default - _dOptionsColors = gc_options.getOptionsColors(sContext, sColorType); - _oTokenizer = _oSpellChecker.getTokenizer(); - _oSpellChecker.activateStorage(); + this.oSpellChecker = new spellchecker.SpellChecker("${lang}", "", "${dic_main_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}"); + } + else { + this.oSpellChecker = new SpellChecker("${lang}", sPath, "${dic_main_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}"); + } + this.oOptionsColors = gc_options.getOptionsColors(sContext, sColorType); + this.oTokenizer = this.oSpellChecker.getTokenizer(); + this.oSpellChecker.activateStorage(); + gc_engine_func.load(sContext, this.oSpellChecker) + gc_options.load(sContext) } catch (e) { console.error(e); } }, getSpellChecker: function () { - return _oSpellChecker; + return this.oSpellChecker; }, //// Rules getRules: function (bParagraph) { @@ -98,19 +96,19 @@ } return gc_rules.lParagraphRules; }, ignoreRule: function (sRuleId) { - _aIgnoredRules.add(sRuleId); + this.aIgnoredRules.add(sRuleId); }, resetIgnoreRules: function () { - _aIgnoredRules.clear(); + this.aIgnoredRules.clear(); }, reactivateRule: function (sRuleId) { - _aIgnoredRules.delete(sRuleId); + this.aIgnoredRules.delete(sRuleId); }, listRules: function* (sFilter=null) { // generator: returns tuple (sOption, sLineId, sRuleId) try { @@ -132,34 +130,10 @@ catch (e) { console.error(e); } }, - //// Options - - setOption: function (sOpt, bVal) { - if (_dOptions.has(sOpt)) { - _dOptions.set(sOpt, bVal); - } - }, - - setOptions: function (dOpt) { - _dOptions.gl_updateOnlyExistingKeys(dOpt); - }, - - getOptions: function () { - return _dOptions; - }, - - getDefaultOptions: function () { - return gc_options.getOptions(_sAppContext).gl_shallowCopy(); - }, - - resetOptions: function () { - _dOptions = gc_options.getOptions(_sAppContext).gl_shallowCopy(); - }, - //// Parsing parse: function (sText, sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false, bFullInfo=false) { // init point to analyse and returns an iterable of errors or (with option ) a list of sentences with tokens and errors let oText = new TextParser(sText); @@ -201,12 +175,12 @@ return s; } parse (sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false, bFullInfo=false) { // analyses and returns an iterable of errors or (with option ) a list of sentences with tokens and errors - let dOpt = dOptions || _dOptions; - let bShowRuleId = option('idrule'); + let dOpt = dOptions || gc_options.dOptions; + let bShowRuleId = gc_options.dOptions.gl_get('idrule', false); // parse paragraph try { this.parseText(this.sText, this.sText0, true, 0, sCountry, dOpt, bShowRuleId, bDebug, bContext); } catch (e) { @@ -224,11 +198,11 @@ for (let [iStart, iEnd] of text.getSentenceBoundaries(sText)) { try { this.sSentence = sText.slice(iStart, iEnd); this.sSentence0 = this.sText0.slice(iStart, iEnd); this.nOffsetWithinParagraph = iStart; - this.lToken = Array.from(_oTokenizer.genTokens(this.sSentence, true)); + this.lToken = Array.from(gc_engine.oTokenizer.genTokens(this.sSentence, true)); this.dTokenPos.clear(); for (let dToken of this.lToken) { if (dToken["sType"] != "INFO") { this.dTokenPos.set(dToken["nStart"], dToken); } @@ -235,11 +209,11 @@ } if (bFullInfo) { oSentence = { "nStart": iStart, "nEnd": iEnd, "sSentence": this.sSentence, "lToken": Array.from(this.lToken) }; for (let oToken of oSentence["lToken"]) { if (oToken["sType"] == "WORD") { - oToken["bValidToken"] = _oSpellChecker.isValidToken(oToken["sValue"]); + oToken["bValidToken"] = gc_engine.oSpellChecker.isValidToken(oToken["sValue"]); } } // the list of tokens is duplicated, to keep all tokens from being deleted when analysis } this.parseText(this.sSentence, this.sSentence0, false, iStart, sCountry, dOpt, bShowRuleId, bDebug, bContext); @@ -300,19 +274,19 @@ } sText = this.parseGraph(gc_rules_graph.dAllGraph[sGraphName], sCountry, dOptions, bShowRuleId, bDebug, bContext); } } } - else if (!sOption || option(sOption)) { + else if (!sOption || gc_options.dOptions.gl_get(sOption, false)) { for (let [zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions, lGroups, lNegLookBefore] of lRuleGroup) { - if (!_aIgnoredRules.has(sRuleId)) { + if (!gc_engine.aIgnoredRules.has(sRuleId)) { while ((m = zRegex.gl_exec2(sText, lGroups, lNegLookBefore)) !== null) { let bCondMemo = null; for (let [sFuncCond, cActionType, sWhat, ...eAct] of lActions) { // action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ] try { - bCondMemo = (!sFuncCond || oEvalFunc[sFuncCond](sText, sText0, m, this.dTokenPos, sCountry, bCondMemo)); + bCondMemo = (!sFuncCond || gc_engine_func[sFuncCond](sText, sText0, m, this.dTokenPos, sCountry, bCondMemo)); if (bCondMemo) { switch (cActionType) { case "-": // grammar error //console.log("-> error detected in " + sLineId + "\nzRegex: " + zRegex.source); @@ -333,11 +307,11 @@ } break; case "=": // disambiguation //console.log("-> disambiguation by " + sLineId + "\nzRegex: " + zRegex.source); - oEvalFunc[sWhat](sText, m, this.dTokenPos); + gc_engine_func[sWhat](sText, m, this.dTokenPos); if (bDebug) { console.log("= " + m[0] + " # " + sLineId, "\nDA:", this.dTokenPos); } break; case ">": @@ -373,11 +347,11 @@ } update (sSentence, bDebug=false) { // update and retokenize this.sSentence = sSentence; - let lNewToken = Array.from(_oTokenizer.genTokens(sSentence, true)); + let lNewToken = Array.from(gc_engine.oTokenizer.genTokens(sSentence, true)); for (let oToken of lNewToken) { if (this.dTokenPos.gl_get(oToken["nStart"], {}).hasOwnProperty("lMorph")) { oToken["lMorph"] = this.dTokenPos.get(oToken["nStart"])["lMorph"]; } if (this.dTokenPos.gl_get(oToken["nStart"], {}).hasOwnProperty("aTags")) { @@ -471,11 +445,11 @@ } // analysable tokens if (oToken["sType"].slice(0,4) == "WORD") { // token lemmas if (oNode.hasOwnProperty("")) { - for (let sLemma of _oSpellChecker.getLemma(oToken["sValue"])) { + for (let sLemma of gc_engine.oSpellChecker.getLemma(oToken["sValue"])) { if (oNode[""].hasOwnProperty(sLemma)) { if (bDebug) { console.log(" MATCH: >" + sLemma); } yield { "iToken1": iToken1, "iNode": oNode[""][sLemma] }; @@ -483,11 +457,11 @@ } } } // morph arcs if (oNode.hasOwnProperty("")) { - let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]); + let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : gc_engine.oSpellChecker.getMorph(oToken["sValue"]); if (lMorph.length > 0) { for (let sSearch in oNode[""]) { if (!sSearch.includes("¬")) { // no anti-pattern if (lMorph.some(sMorph => (sMorph.includes(sSearch)))) { @@ -527,11 +501,11 @@ } } } // regex morph arcs if (oNode.hasOwnProperty("")) { - let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]); + let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : gc_engine.oSpellChecker.getMorph(oToken["sValue"]); if (lMorph.length > 0) { for (let sRegex in oNode[""]) { if (!sRegex.includes("¬")) { // no anti-pattern if (lMorph.some(sMorph => (sMorph.search(sRegex) !== -1))) { @@ -676,11 +650,11 @@ // Disambiguator [ option, condition, "=", replacement/suggestion/action ] // Tag [ option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ] // Immunity [ option, condition, "!", "", iTokenStart, iTokenEnd ] // Test [ option, condition, ">", "" ] if (!sOption || dOptions.gl_get(sOption, false)) { - bCondMemo = !sFuncCond || oEvalFunc[sFuncCond](this.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, this.dTags, this.sSentence, this.sSentence0); + bCondMemo = !sFuncCond || gc_engine_func[sFuncCond](this.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, this.dTags, this.sSentence, this.sSentence0); if (bCondMemo) { if (cActionType == "-") { // grammar error let [iTokenStart, iTokenEnd, cStartLimit, cEndLimit, bCaseSvty, nPriority, sMessage, sURL] = eAct; let nTokenErrorStart = (iTokenStart > 0) ? nTokenOffset + iTokenStart : nLastToken + iTokenStart; @@ -708,11 +682,11 @@ console.log(` TEXT_PROCESSOR: [${this.lToken[nTokenStart]["sValue"]}:${this.lToken[nTokenEnd]["sValue"]}] > ${sWhat}`); } } else if (cActionType == "=") { // disambiguation - oEvalFunc[sWhat](this.lToken, nTokenOffset, nLastToken); + gc_engine_func[sWhat](this.lToken, nTokenOffset, nLastToken); if (bDebug) { console.log(` DISAMBIGUATOR: (${sWhat}) [${this.lToken[nTokenOffset+1]["sValue"]}:${this.lToken[nLastToken]["sValue"]}]`); } } else if (cActionType == ">") { @@ -788,11 +762,11 @@ let nStart = nOffset + m.start[iGroup]; let nEnd = nOffset + m.end[iGroup]; // suggestions let lSugg = []; if (sSugg.startsWith("=")) { - sSugg = oEvalFunc[sSugg.slice(1)](sText, m); + sSugg = gc_engine_func[sSugg.slice(1)](sText, m); lSugg = (sSugg) ? sSugg.split("|") : []; } else if (sSugg == "_") { lSugg = []; } else { lSugg = sSugg.gl_expand(m).split("|"); @@ -799,11 +773,11 @@ } if (bUppercase && lSugg.length > 0 && m[iGroup].slice(0,1).gl_isUpperCase()) { lSugg = capitalizeArray(lSugg); } // Message - let sMessage = (sMsg.startsWith("=")) ? oEvalFunc[sMsg.slice(1)](sText, m) : sMsg.gl_expand(m); + let sMessage = (sMsg.startsWith("=")) ? gc_engine_func[sMsg.slice(1)](sText, m) : sMsg.gl_expand(m); if (bShowRuleId) { sMessage += " #" + sLineId + " / " + sRuleId; } // return this._createError(nStart, nEnd, sLineId, sRuleId, sOption, sMessage, lSugg, sURL, bContext); @@ -811,11 +785,11 @@ _createErrorFromTokens (sSugg, nTokenOffset, nLastToken, iFirstToken, nStart, nEnd, sLineId, sRuleId, bCaseSvty, sMsg, sURL, bShowRuleId, sOption, bContext) { // suggestions let lSugg = []; if (sSugg.startsWith("=")) { - sSugg = oEvalFunc[sSugg.slice(1)](this.lToken, nTokenOffset, nLastToken); + sSugg = gc_engine_func[sSugg.slice(1)](this.lToken, nTokenOffset, nLastToken); lSugg = (sSugg) ? sSugg.split("|") : []; } else if (sSugg == "_") { lSugg = []; } else { lSugg = this._expand(sSugg, nTokenOffset, nLastToken).split("|"); @@ -822,11 +796,11 @@ } if (bCaseSvty && lSugg.length > 0 && this.lToken[iFirstToken]["sValue"].slice(0,1).gl_isUpperCase()) { lSugg = capitalizeArray(lSugg); } // Message - let sMessage = (sMsg.startsWith("=")) ? oEvalFunc[sMsg.slice(1)](this.lToken, nTokenOffset, nLastToken) : this._expand(sMsg, nTokenOffset, nLastToken); + let sMessage = (sMsg.startsWith("=")) ? gc_engine_func[sMsg.slice(1)](this.lToken, nTokenOffset, nLastToken) : this._expand(sMsg, nTokenOffset, nLastToken); if (bShowRuleId) { sMessage += " #" + sLineId + " / " + sRuleId; } // return this._createError(nStart, nEnd, sLineId, sRuleId, sOption, sMessage, lSugg, sURL, bContext); @@ -837,11 +811,11 @@ "nStart": nStart, "nEnd": nEnd, "sLineId": sLineId, "sRuleId": sRuleId, "sType": sOption || "notype", - "aColor": _dOptionsColors[sOption], + "aColor": gc_engine.oOptionsColors[sOption], "sMessage": sMessage, "aSuggestions": lSugg, "URL": sURL } if (bContext) { @@ -876,11 +850,11 @@ } else if (sRepl === "@") { sNew = "@".repeat(ln); } else if (sRepl.slice(0,1) === "=") { - sNew = oEvalFunc[sRepl.slice(1)](sText, m); + sNew = gc_engine_func[sRepl.slice(1)](sText, m); sNew = sNew + " ".repeat(ln-sNew.length); if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) { sNew = sNew.gl_toCapitalize(); } } else { @@ -917,11 +891,11 @@ } } } else { if (sWhat.startsWith("=")) { - sWhat = oEvalFunc[sWhat.slice(1)](this.lToken, nTokenOffset, nLastToken); + sWhat = gc_engine_func[sWhat.slice(1)](this.lToken, nTokenOffset, nLastToken); } else { sWhat = this._expand(sWhat, nTokenOffset, nLastToken); } let bUppercase = bCaseSvty && this.lToken[nTokenRewriteStart]["sValue"].slice(0,1).gl_isUpperCase(); if (nTokenRewriteEnd - nTokenRewriteStart == 0) { @@ -1025,485 +999,23 @@ this.lToken.length = 0; this.lToken = lNewToken; } }; - -//////// Common functions - -function option (sOpt) { - // return true if option sOpt is active - return _dOptions.get(sOpt); -} - -var re = { - search: function (sRegex, sText) { - if (sRegex.startsWith("(?i)")) { - return sText.search(new RegExp(sRegex.slice(4), "i")) !== -1; - } else { - return sText.search(sRegex) !== -1; - } - }, - - createRegExp: function (sRegex) { - if (sRegex.startsWith("(?i)")) { - return new RegExp(sRegex.slice(4), "i"); - } else { - return new RegExp(sRegex); - } - } -} - - -//////// functions to get text outside pattern scope - -// warning: check compile_rules.py to understand how it works - -function nextword (s, iStart, n) { - // get the nth word of the input string or empty string - let z = new RegExp("^(?: +[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+){" + (n-1).toString() + "} +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+)", "ig"); - let m = z.exec(s.slice(iStart)); - if (!m) { - return null; - } - return [iStart + z.lastIndex - m[1].length, m[1]]; -} - -function prevword (s, iEnd, n) { - // get the (-)nth word of the input string or empty string - let z = new RegExp("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+) +(?:[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+ +){" + (n-1).toString() + "}$", "i"); - let m = z.exec(s.slice(0, iEnd)); - if (!m) { - return null; - } - return [m.index, m[1]]; -} - -function nextword1 (s, iStart) { - // get next word (optimization) - let _zNextWord = new RegExp ("^ +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_-]*)", "ig"); - let m = _zNextWord.exec(s.slice(iStart)); - if (!m) { - return null; - } - return [iStart + _zNextWord.lastIndex - m[1].length, m[1]]; -} - -const _zPrevWord = new RegExp ("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_-]*) +$", "i"); - -function prevword1 (s, iEnd) { - // get previous word (optimization) - let m = _zPrevWord.exec(s.slice(0, iEnd)); - if (!m) { - return null; - } - return [m.index, m[1]]; -} - -function look (s, sPattern, sNegPattern=null) { - // seek sPattern in s (before/after/fulltext), if antipattern sNegPattern not in s - try { - if (sNegPattern && re.search(sNegPattern, s)) { - return false; - } - return re.search(sPattern, s); - } - catch (e) { - console.error(e); - } - return false; -} - - -//////// Analyse groups for regex rules - -function displayInfo (dTokenPos, aWord) { - // for debugging: info of word - if (!aWord) { - console.log("> nothing to find"); - return true; - } - let lMorph = _oSpellChecker.getMorph(aWord[1]); - if (lMorph.length === 0) { - console.log("> not in dictionary"); - return true; - } - if (dTokenPos.has(aWord[0])) { - console.log("DA: " + dTokenPos.get(aWord[0])); - } - console.log("FSA: " + lMorph); - return true; -} - -function morph (dTokenPos, aWord, sPattern, sNegPattern, bNoWord=false) { - // analyse a tuple (position, word), returns true if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on) - if (!aWord) { - return bNoWord; - } - let lMorph = (dTokenPos.has(aWord[0]) && dTokenPos.get(aWord[0]))["lMorph"] ? dTokenPos.get(aWord[0])["lMorph"] : _oSpellChecker.getMorph(aWord[1]); - if (lMorph.length === 0) { - return false; - } - if (sNegPattern) { - // check negative condition - if (sNegPattern === "*") { - // all morph must match sPattern - return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); - } - else { - if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { - return false; - } - } - } - // search sPattern - return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); -} - -function analyse (sWord, sPattern, sNegPattern) { - // analyse a word, returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation off) - let lMorph = _oSpellChecker.getMorph(sWord); - if (lMorph.length === 0) { - return false; - } - if (sNegPattern) { - // check negative condition - if (sNegPattern === "*") { - // all morph must match sPattern - return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); - } - else { - if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { - return false; - } - } - } - // search sPattern - return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); -} - - -//// Analyse tokens for graph rules - -function g_value (oToken, sValues, nLeft=null, nRight=null) { - // test if is in sValues (each value should be separated with |) - let sValue = (nLeft === null) ? "|"+oToken["sValue"]+"|" : "|"+oToken["sValue"].slice(nLeft, nRight)+"|"; - if (sValues.includes(sValue)) { - return true; - } - if (oToken["sValue"].slice(0,2).gl_isTitle()) { // we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". - if (sValues.includes(sValue.toLowerCase())) { - return true; - } - } - else if (oToken["sValue"].gl_isUpperCase()) { - //if sValue.lower() in sValues: - // return true; - sValue = "|"+sValue.slice(1).gl_toCapitalize(); - if (sValues.includes(sValue)) { - return true; - } - sValue = sValue.toLowerCase(); - if (sValues.includes(sValue)) { - return true; - } - } - return false; -} - -function g_morph (oToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) { - // analyse a token, return True if not in morphologies and in morphologies - let lMorph; - if (oToken.hasOwnProperty("lMorph")) { - lMorph = oToken["lMorph"]; - } - else { - if (nLeft !== null) { - let sValue = (nRight !== null) ? oToken["sValue"].slice(nLeft, nRight) : oToken["sValue"].slice(nLeft); - lMorph = _oSpellChecker.getMorph(sValue); - if (bMemorizeMorph) { - oToken["lMorph"] = lMorph; - } - } else { - lMorph = _oSpellChecker.getMorph(oToken["sValue"]); - } - } - if (lMorph.length == 0) { - return false; - } - // check negative condition - if (sNegPattern) { - if (sNegPattern == "*") { - // all morph must match sPattern - return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); - } - else { - if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { - return false; - } - } - } - // search sPattern - return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); -} - -function g_analyse (oToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) { - // analyse a token, return True if not in morphologies and in morphologies - let lMorph; - if (nLeft !== null) { - let sValue = (nRight !== null) ? oToken["sValue"].slice(nLeft, nRight) : oToken["sValue"].slice(nLeft); - lMorph = _oSpellChecker.getMorph(sValue); - if (bMemorizeMorph) { - oToken["lMorph"] = lMorph; - } - } else { - lMorph = _oSpellChecker.getMorph(oToken["sValue"]); - } - if (lMorph.length == 0) { - return false; - } - // check negative condition - if (sNegPattern) { - if (sNegPattern == "*") { - // all morph must match sPattern - return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); - } - else { - if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { - return false; - } - } - } - // search sPattern - return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); -} - -function g_merged_analyse (oToken1, oToken2, cMerger, sPattern, sNegPattern="", bSetMorph=true) { - // merge two token values, return True if not in morphologies and in morphologies (disambiguation off) - let lMorph = _oSpellChecker.getMorph(oToken1["sValue"] + cMerger + oToken2["sValue"]); - if (lMorph.length == 0) { - return false; - } - // check negative condition - if (sNegPattern) { - if (sNegPattern == "*") { - // all morph must match sPattern - let bResult = lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); - if (bResult && bSetMorph) { - oToken1["lMorph"] = lMorph; - } - return bResult; - } - else { - if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { - return false; - } - } - } - // search sPattern - let bResult = lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); - if (bResult && bSetMorph) { - oToken1["lMorph"] = lMorph; - } - return bResult; -} - -function g_tag_before (oToken, dTags, sTag) { - if (!dTags.has(sTag)) { - return false; - } - if (oToken["i"] > dTags.get(sTag)[0]) { - return true; - } - return false; -} - -function g_tag_after (oToken, dTags, sTag) { - if (!dTags.has(sTag)) { - return false; - } - if (oToken["i"] < dTags.get(sTag)[1]) { - return true; - } - return false; -} - -function g_tag (oToken, sTag) { - return oToken.hasOwnProperty("aTags") && oToken["aTags"].has(sTag); -} - -function g_space_between_tokens (oToken1, oToken2, nMin, nMax=null) { - let nSpace = oToken2["nStart"] - oToken1["nEnd"] - if (nSpace < nMin) { - return false; - } - if (nMax !== null && nSpace > nMax) { - return false; - } - return true; -} - -function g_token (lToken, i) { - if (i < 0) { - return lToken[0]; - } - if (i >= lToken.length) { - return lToken[-1]; - } - return lToken[i]; -} - - -//////// Disambiguator - -function select (dTokenPos, nPos, sWord, sPattern, lDefault=null) { - if (!sWord) { - return true; - } - if (!dTokenPos.has(nPos)) { - console.log("Error. There should be a token at this position: ", nPos); - return true; - } - let lMorph = _oSpellChecker.getMorph(sWord); - if (lMorph.length === 0 || lMorph.length === 1) { - return true; - } - let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) !== -1 ); - if (lSelect.length > 0) { - if (lSelect.length != lMorph.length) { - dTokenPos.get(nPos)["lMorph"] = lSelect; - } - } else if (lDefault) { - dTokenPos.get(nPos)["lMorph"] = lDefault; - } - return true; -} - -function exclude (dTokenPos, nPos, sWord, sPattern, lDefault=null) { - if (!sWord) { - return true; - } - if (!dTokenPos.has(nPos)) { - console.log("Error. There should be a token at this position: ", nPos); - return true; - } - let lMorph = _oSpellChecker.getMorph(sWord); - if (lMorph.length === 0 || lMorph.length === 1) { - return true; - } - let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) === -1 ); - if (lSelect.length > 0) { - if (lSelect.length != lMorph.length) { - dTokenPos.get(nPos)["lMorph"] = lSelect; - } - } else if (lDefault) { - dTokenPos.get(nPos)["lMorph"] = lDefault; - } - return true; -} - -function define (dTokenPos, nPos, lMorph) { - dTokenPos.get(nPos)["lMorph"] = lMorph; - return true; -} - - -//// Disambiguation for graph rules - -function g_select (oToken, sPattern, lDefault=null) { - // select morphologies for according to , always return true - let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]); - if (lMorph.length === 0 || lMorph.length === 1) { - if (lDefault) { - oToken["lMorph"] = lDefault; - } - return true; - } - let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) !== -1 ); - if (lSelect.length > 0) { - if (lSelect.length != lMorph.length) { - oToken["lMorph"] = lSelect; - } - } else if (lDefault) { - oToken["lMorph"] = lDefault; - } - return true; -} - -function g_exclude (oToken, sPattern, lDefault=null) { - // select morphologies for according to , always return true - let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]); - if (lMorph.length === 0 || lMorph.length === 1) { - if (lDefault) { - oToken["lMorph"] = lDefault; - } - return true; - } - let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) === -1 ); - if (lSelect.length > 0) { - if (lSelect.length != lMorph.length) { - oToken["lMorph"] = lSelect; - } - } else if (lDefault) { - oToken["lMorph"] = lDefault; - } - return true; -} - -function g_add_morph (oToken, lNewMorph) { - "Disambiguation: add a morphology to a token" - let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]); - lMorph.push(...lNewMorph); - oToken["lMorph"] = lMorph; - return true; -} - -function g_define (oToken, lMorph) { - // set morphologies of , always return true - oToken["lMorph"] = lMorph; - return true; -} - -function g_define_from (oToken, nLeft=null, nRight=null) { - let sValue = oToken["sValue"]; - if (nLeft !== null) { - sValue = (nRight !== null) ? sValue.slice(nLeft, nRight) : sValue.slice(nLeft); - } - oToken["lMorph"] = _oSpellChecker.getMorph(sValue); - return true; -} - -function g_change_meta (oToken, sType) { - // Disambiguation: change type of token - oToken["sType"] = sType; - return true; -} - - - -//////// GRAMMAR CHECKER PLUGINS - -${pluginsJS} - - -// generated code, do not edit -const oEvalFunc = { - // callables for regex rules -${callablesJS} - - // callables for graph rules -${graph_callablesJS} -} - if (typeof(exports) !== 'undefined') { exports.lang = gc_engine.lang; exports.locales = gc_engine.locales; exports.pkg = gc_engine.pkg; exports.name = gc_engine.name; exports.version = gc_engine.version; exports.author = gc_engine.author; + // objects + exports.oSpellChecker = gc_engine.oSpellChecker; + exports.oTokenizer = gc_engine.oTokenizer; + exports.aIgnoredRules = gc_engine.aIgnoredRules; + exports.oOptionsColors = gc_engine.oOptionsColors; // init exports.load = gc_engine.load; exports.parse = gc_engine.parse; exports.getSpellChecker = gc_engine.getSpellChecker; // rules @@ -1510,14 +1022,8 @@ exports.ignoreRule = gc_engine.ignoreRule; exports.resetIgnoreRules = gc_engine.resetIgnoreRules; exports.reactivateRule = gc_engine.reactivateRule; exports.listRules = gc_engine.listRules; exports.getRules = gc_engine.getRules; - // options - exports.setOption = gc_engine.setOption; - exports.setOptions = gc_engine.setOptions; - exports.getOptions = gc_engine.getOptions; - exports.getDefaultOptions = gc_engine.getDefaultOptions; - exports.resetOptions = gc_engine.resetOptions; // other exports.TextParser = TextParser; } ADDED gc_core/js/lang_core/gc_engine_func.js Index: gc_core/js/lang_core/gc_engine_func.js ================================================================== --- /dev/null +++ gc_core/js/lang_core/gc_engine_func.js @@ -0,0 +1,501 @@ +// JavaScript +// Grammar checker engine functions + +${string} +${regex} +${map} + + +if (typeof(process) !== 'undefined') { + var gc_options = require("./gc_options.js"); +} +else if (typeof(require) !== 'undefined') { + var gc_options = require("resource://grammalecte/${lang}/gc_options.js"); +} + + +let _sAppContext = "JavaScript"; // what software is running +let _oSpellChecker = null; + + +//////// Common functions + +function option (sOpt) { + // return true if option sOpt is active + return gc_options.dOptions.gl_get(sOpt, false); +} + +function echo (x) { + console.log(x); + return true; +} + +var re = { + search: function (sRegex, sText) { + if (sRegex.startsWith("(?i)")) { + return sText.search(new RegExp(sRegex.slice(4), "i")) !== -1; + } else { + return sText.search(sRegex) !== -1; + } + }, + + createRegExp: function (sRegex) { + if (sRegex.startsWith("(?i)")) { + return new RegExp(sRegex.slice(4), "i"); + } else { + return new RegExp(sRegex); + } + } +} + + +//////// functions to get text outside pattern scope + +// warning: check compile_rules.py to understand how it works + +function nextword (s, iStart, n) { + // get the nth word of the input string or empty string + let z = new RegExp("^(?: +[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+){" + (n-1).toString() + "} +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+)", "ig"); + let m = z.exec(s.slice(iStart)); + if (!m) { + return null; + } + return [iStart + z.lastIndex - m[1].length, m[1]]; +} + +function prevword (s, iEnd, n) { + // get the (-)nth word of the input string or empty string + let z = new RegExp("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+) +(?:[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+ +){" + (n-1).toString() + "}$", "i"); + let m = z.exec(s.slice(0, iEnd)); + if (!m) { + return null; + } + return [m.index, m[1]]; +} + +function nextword1 (s, iStart) { + // get next word (optimization) + let _zNextWord = new RegExp ("^ +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_-]*)", "ig"); + let m = _zNextWord.exec(s.slice(iStart)); + if (!m) { + return null; + } + return [iStart + _zNextWord.lastIndex - m[1].length, m[1]]; +} + +const _zPrevWord = new RegExp ("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_-]*) +$", "i"); + +function prevword1 (s, iEnd) { + // get previous word (optimization) + let m = _zPrevWord.exec(s.slice(0, iEnd)); + if (!m) { + return null; + } + return [m.index, m[1]]; +} + +function look (s, sPattern, sNegPattern=null) { + // seek sPattern in s (before/after/fulltext), if antipattern sNegPattern not in s + try { + if (sNegPattern && re.search(sNegPattern, s)) { + return false; + } + return re.search(sPattern, s); + } + catch (e) { + console.error(e); + } + return false; +} + + +//////// Analyse groups for regex rules + +function displayInfo (dTokenPos, aWord) { + // for debugging: info of word + if (!aWord) { + console.log("> nothing to find"); + return true; + } + let lMorph = _oSpellChecker.getMorph(aWord[1]); + if (lMorph.length === 0) { + console.log("> not in dictionary"); + return true; + } + if (dTokenPos.has(aWord[0])) { + console.log("DA: " + dTokenPos.get(aWord[0])); + } + console.log("FSA: " + lMorph); + return true; +} + +function morph (dTokenPos, aWord, sPattern, sNegPattern, bNoWord=false) { + // analyse a tuple (position, word), returns true if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on) + if (!aWord) { + return bNoWord; + } + let lMorph = (dTokenPos.has(aWord[0]) && dTokenPos.get(aWord[0]))["lMorph"] ? dTokenPos.get(aWord[0])["lMorph"] : _oSpellChecker.getMorph(aWord[1]); + if (lMorph.length === 0) { + return false; + } + if (sNegPattern) { + // check negative condition + if (sNegPattern === "*") { + // all morph must match sPattern + return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); + } + else { + if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { + return false; + } + } + } + // search sPattern + return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); +} + +function analyse (sWord, sPattern, sNegPattern) { + // analyse a word, returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation off) + let lMorph = _oSpellChecker.getMorph(sWord); + if (lMorph.length === 0) { + return false; + } + if (sNegPattern) { + // check negative condition + if (sNegPattern === "*") { + // all morph must match sPattern + return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); + } + else { + if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { + return false; + } + } + } + // search sPattern + return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); +} + + +//// Analyse tokens for graph rules + +function g_value (oToken, sValues, nLeft=null, nRight=null) { + // test if is in sValues (each value should be separated with |) + let sValue = (nLeft === null) ? "|"+oToken["sValue"]+"|" : "|"+oToken["sValue"].slice(nLeft, nRight)+"|"; + if (sValues.includes(sValue)) { + return true; + } + if (oToken["sValue"].slice(0,2).gl_isTitle()) { // we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". + if (sValues.includes(sValue.toLowerCase())) { + return true; + } + } + else if (oToken["sValue"].gl_isUpperCase()) { + //if sValue.lower() in sValues: + // return true; + sValue = "|"+sValue.slice(1).gl_toCapitalize(); + if (sValues.includes(sValue)) { + return true; + } + sValue = sValue.toLowerCase(); + if (sValues.includes(sValue)) { + return true; + } + } + return false; +} + +function g_morph (oToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) { + // analyse a token, return True if not in morphologies and in morphologies + let lMorph; + if (oToken.hasOwnProperty("lMorph")) { + lMorph = oToken["lMorph"]; + } + else { + if (nLeft !== null) { + let sValue = (nRight !== null) ? oToken["sValue"].slice(nLeft, nRight) : oToken["sValue"].slice(nLeft); + lMorph = _oSpellChecker.getMorph(sValue); + if (bMemorizeMorph) { + oToken["lMorph"] = lMorph; + } + } else { + lMorph = _oSpellChecker.getMorph(oToken["sValue"]); + } + } + if (lMorph.length == 0) { + return false; + } + // check negative condition + if (sNegPattern) { + if (sNegPattern == "*") { + // all morph must match sPattern + return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); + } + else { + if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { + return false; + } + } + } + // search sPattern + return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); +} + +function g_analyse (oToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) { + // analyse a token, return True if not in morphologies and in morphologies + let lMorph; + if (nLeft !== null) { + let sValue = (nRight !== null) ? oToken["sValue"].slice(nLeft, nRight) : oToken["sValue"].slice(nLeft); + lMorph = _oSpellChecker.getMorph(sValue); + if (bMemorizeMorph) { + oToken["lMorph"] = lMorph; + } + } else { + lMorph = _oSpellChecker.getMorph(oToken["sValue"]); + } + if (lMorph.length == 0) { + return false; + } + // check negative condition + if (sNegPattern) { + if (sNegPattern == "*") { + // all morph must match sPattern + return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); + } + else { + if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { + return false; + } + } + } + // search sPattern + return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); +} + +function g_merged_analyse (oToken1, oToken2, cMerger, sPattern, sNegPattern="", bSetMorph=true) { + // merge two token values, return True if not in morphologies and in morphologies (disambiguation off) + let lMorph = _oSpellChecker.getMorph(oToken1["sValue"] + cMerger + oToken2["sValue"]); + if (lMorph.length == 0) { + return false; + } + // check negative condition + if (sNegPattern) { + if (sNegPattern == "*") { + // all morph must match sPattern + let bResult = lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); + if (bResult && bSetMorph) { + oToken1["lMorph"] = lMorph; + } + return bResult; + } + else { + if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { + return false; + } + } + } + // search sPattern + let bResult = lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); + if (bResult && bSetMorph) { + oToken1["lMorph"] = lMorph; + } + return bResult; +} + +function g_tag_before (oToken, dTags, sTag) { + if (!dTags.has(sTag)) { + return false; + } + if (oToken["i"] > dTags.get(sTag)[0]) { + return true; + } + return false; +} + +function g_tag_after (oToken, dTags, sTag) { + if (!dTags.has(sTag)) { + return false; + } + if (oToken["i"] < dTags.get(sTag)[1]) { + return true; + } + return false; +} + +function g_tag (oToken, sTag) { + return oToken.hasOwnProperty("aTags") && oToken["aTags"].has(sTag); +} + +function g_space_between_tokens (oToken1, oToken2, nMin, nMax=null) { + let nSpace = oToken2["nStart"] - oToken1["nEnd"] + if (nSpace < nMin) { + return false; + } + if (nMax !== null && nSpace > nMax) { + return false; + } + return true; +} + +function g_token (lToken, i) { + if (i < 0) { + return lToken[0]; + } + if (i >= lToken.length) { + return lToken[-1]; + } + return lToken[i]; +} + + +//////// Disambiguator + +function select (dTokenPos, nPos, sWord, sPattern, lDefault=null) { + if (!sWord) { + return true; + } + if (!dTokenPos.has(nPos)) { + console.log("Error. There should be a token at this position: ", nPos); + return true; + } + let lMorph = _oSpellChecker.getMorph(sWord); + if (lMorph.length === 0 || lMorph.length === 1) { + return true; + } + let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) !== -1 ); + if (lSelect.length > 0) { + if (lSelect.length != lMorph.length) { + dTokenPos.get(nPos)["lMorph"] = lSelect; + } + } else if (lDefault) { + dTokenPos.get(nPos)["lMorph"] = lDefault; + } + return true; +} + +function exclude (dTokenPos, nPos, sWord, sPattern, lDefault=null) { + if (!sWord) { + return true; + } + if (!dTokenPos.has(nPos)) { + console.log("Error. There should be a token at this position: ", nPos); + return true; + } + let lMorph = _oSpellChecker.getMorph(sWord); + if (lMorph.length === 0 || lMorph.length === 1) { + return true; + } + let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) === -1 ); + if (lSelect.length > 0) { + if (lSelect.length != lMorph.length) { + dTokenPos.get(nPos)["lMorph"] = lSelect; + } + } else if (lDefault) { + dTokenPos.get(nPos)["lMorph"] = lDefault; + } + return true; +} + +function define (dTokenPos, nPos, lMorph) { + dTokenPos.get(nPos)["lMorph"] = lMorph; + return true; +} + + +//// Disambiguation for graph rules + +function g_select (oToken, sPattern, lDefault=null) { + // select morphologies for according to , always return true + let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]); + if (lMorph.length === 0 || lMorph.length === 1) { + if (lDefault) { + oToken["lMorph"] = lDefault; + } + return true; + } + let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) !== -1 ); + if (lSelect.length > 0) { + if (lSelect.length != lMorph.length) { + oToken["lMorph"] = lSelect; + } + } else if (lDefault) { + oToken["lMorph"] = lDefault; + } + return true; +} + +function g_exclude (oToken, sPattern, lDefault=null) { + // select morphologies for according to , always return true + let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]); + if (lMorph.length === 0 || lMorph.length === 1) { + if (lDefault) { + oToken["lMorph"] = lDefault; + } + return true; + } + let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) === -1 ); + if (lSelect.length > 0) { + if (lSelect.length != lMorph.length) { + oToken["lMorph"] = lSelect; + } + } else if (lDefault) { + oToken["lMorph"] = lDefault; + } + return true; +} + +function g_add_morph (oToken, lNewMorph) { + "Disambiguation: add a morphology to a token" + let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]); + lMorph.push(...lNewMorph); + oToken["lMorph"] = lMorph; + return true; +} + +function g_define (oToken, lMorph) { + // set morphologies of , always return true + oToken["lMorph"] = lMorph; + return true; +} + +function g_define_from (oToken, nLeft=null, nRight=null) { + let sValue = oToken["sValue"]; + if (nLeft !== null) { + sValue = (nRight !== null) ? sValue.slice(nLeft, nRight) : sValue.slice(nLeft); + } + oToken["lMorph"] = _oSpellChecker.getMorph(sValue); + return true; +} + +function g_change_meta (oToken, sType) { + // Disambiguation: change type of token + oToken["sType"] = sType; + return true; +} + + + +//////// GRAMMAR CHECKER PLUGINS + +${pluginsJS} + + +// generated code, do not edit +var gc_engine_func = { + + load: function (sContext, oSpellChecker) { + _sAppContext = sContext + _oSpellChecker = oSpellChecker + }, + + // callables for regex rules +${callablesJS} + + // callables for graph rules +${graph_callablesJS} +} + + +if (typeof(exports) !== 'undefined') { + exports.load = gc_engine_func.load; +} Index: gc_core/js/lang_core/gc_options.js ================================================================== --- gc_core/js/lang_core/gc_options.js +++ gc_core/js/lang_core/gc_options.js @@ -6,53 +6,98 @@ ${map} var gc_options = { - getOptions: function (sContext="JavaScript") { - if (this.dOpt.hasOwnProperty(sContext)) { - return this.dOpt[sContext]; + + dOptions: new Map(), + + sAppContext: "JavaScript", + + load: function (sContext="JavaScript") { + this.sAppContext = sContext; + this.dOptions = this.getDefaultOptions(sContext); + }, + + setOption: function (sOpt, bVal) { + if (this.dOptions.has(sOpt)) { + this.dOptions.set(sOpt, bVal); + } + }, + + setOptions: function (dOpt) { + this.dOptions.gl_updateOnlyExistingKeys(dOpt); + }, + + getOptions: function () { + return this.dOptions.gl_shallowCopy(); + }, + + resetOptions: function () { + this.dOptions = this.getDefaultOptions(this._sAppContext); + }, + + getDefaultOptions: function (sContext="") { + if (!sContext) { + sContext = this.sAppContext; + } + if (this.oDefaultOpt.hasOwnProperty(sContext)) { + return this.oDefaultOpt[sContext].gl_shallowCopy(); + } + return this.oDefaultOpt["JavaScript"].gl_shallowCopy(); + }, + + getOptionLabels: function (sLang="${sLang}") { + if (this.oOptLabel.hasOwnProperty(sLang)) { + return this.oOptLabel[sLang]; } - return this.dOpt["JavaScript"]; + return this.oOptLabel["{$sLang}"]; }, getOptionsColors: function (sTheme="Default", sColorType="aRGB") { - let dOptColor = (this.dOptColor.hasOwnProperty(sTheme)) ? this.dOptColor[sTheme] : this.dOptColor["Default"]; - let dColorType = (this.dColorType.hasOwnProperty(sColorType)) ? this.dColorType[sColorType] : this.dColorType["aRGB"]; - let dColor = {}; + let oOptColor = (this.oOptColor.hasOwnProperty(sTheme)) ? this.oOptColor[sTheme] : this.oOptColor["Default"]; + let oColorType = (this.oColorType.hasOwnProperty(sColorType)) ? this.oColorType[sColorType] : this.oColorType["aRGB"]; + let oColor = {}; try { - for (let [sOpt, sColor] of Object.entries(dOptColor)) { - dColor[sOpt] = dColorType[sColor]; + for (let [sOpt, sColor] of Object.entries(oOptColor)) { + oColor[sOpt] = oColorType[sColor]; } - return dColor; + return oColor; } catch (e) { console.error(e); return {}; } }, lStructOpt: ${lStructOpt}, - dOpt: { + oDefaultOpt: { "JavaScript": new Map (${dOptJavaScript}), "Firefox": new Map (${dOptFirefox}), "Thunderbird": new Map (${dOptThunderbird}), }, - dColorType: ${dColorType}, + oColorType: ${dColorType}, - dOptColor: ${dOptColor}, + oOptColor: ${dOptColor}, - dOptLabel: ${dOptLabel} + oOptLabel: ${dOptLabel} }; if (typeof(exports) !== 'undefined') { + exports.dOptions = gc_options.dOptions; + exports.sAppContext = gc_options.sAppContext; + exports.load = gc_options.load; + exports.setOption = gc_options.setOption; + exports.setOptions = gc_options.setOptions; + exports.resetOptions = gc_options.resetOptions; + exports.getDefaultOptions = gc_options.getDefaultOptions; exports.getOptions = gc_options.getOptions; exports.getOptionsColors = gc_options.getOptionsColors; exports.lStructOpt = gc_options.lStructOpt; - exports.dOpt = gc_options.dOpt; + exports.oDefaultOpt = gc_options.oDefaultOpt; exports.dColorType = gc_options.dColorType; - exports.dOptColor = gc_options.dOptColor; - exports.dOptLabel = gc_options.dOptLabel; + exports.oOptColor = gc_options.oOptColor; + exports.oOptLabel = gc_options.oOptLabel; } Index: gc_core/py/__init__.py ================================================================== --- gc_core/py/__init__.py +++ gc_core/py/__init__.py @@ -1,5 +1,5 @@ """ Grammar checker """ -from .grammar_checker import * +from .${lang}.gc_engine import * DELETED gc_core/py/grammar_checker.py Index: gc_core/py/grammar_checker.py ================================================================== --- gc_core/py/grammar_checker.py +++ /dev/null @@ -1,82 +0,0 @@ -""" -Grammalecte, grammar checker -""" - -import importlib -import json - -from . import text - - -class GrammarChecker: - "GrammarChecker: Wrapper for the grammar checker engine" - - def __init__ (self, sLangCode, sContext="Python"): - self.sLangCode = sLangCode - # Grammar checker engine - self.gce = importlib.import_module("."+sLangCode, "grammalecte") - self.gce.load(sContext) - # Spell checker - self.oSpellChecker = self.gce.getSpellChecker() - # Lexicographer - self.oLexicographer = None - # Text formatter - self.oTextFormatter = None - - def getGCEngine (self): - "return the grammar checker object" - return self.gce - - def getSpellChecker (self): - "return the spell checker object" - return self.oSpellChecker - - def getTextFormatter (self): - "load and return the text formatter" - if self.oTextFormatter is None: - tf = importlib.import_module("."+self.sLangCode+".textformatter", "grammalecte") - self.oTextFormatter = tf.TextFormatter() - return self.oTextFormatter - - def getLexicographer (self): - "load and return the lexicographer" - if self.oLexicographer is None: - lxg = importlib.import_module("."+self.sLangCode+".lexicographe", "grammalecte") - self.oLexicographer = lxg.Lexicographe(self.oSpellChecker) - return self.oLexicographer - - def displayGCOptions (self): - "display the grammar checker options" - self.gce.displayOptions() - - def getParagraphErrors (self, sText, dOptions=None, bContext=False, bSpellSugg=False, bDebug=False): - "returns a tuple: (grammar errors, spelling errors)" - aGrammErrs = self.gce.parse(sText, "FR", bDebug=bDebug, dOptions=dOptions, bContext=bContext) - aSpellErrs = self.oSpellChecker.parseParagraph(sText, bSpellSugg) - return aGrammErrs, aSpellErrs - - def getParagraphWithErrors (self, sText, dOptions=None, bEmptyIfNoErrors=False, bSpellSugg=False, nWidth=100, bDebug=False): - "parse text and return a readable text with underline errors" - aGrammErrs, aSpellErrs = self.getParagraphErrors(sText, dOptions, False, bSpellSugg, bDebug) - if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs: - return ("", []) - return text.generateParagraph(sText, aGrammErrs, aSpellErrs, nWidth) - - def getTextWithErrors (self, sText, bEmptyIfNoErrors=False, bSpellSugg=False, nWidth=100, bDebug=False): - "[todo]" - - def getParagraphErrorsAsJSON (self, iIndex, sText, dOptions=None, bContext=False, bEmptyIfNoErrors=False, bSpellSugg=False, bReturnText=False, lLineSet=None, bDebug=False): - "parse text and return errors as a JSON string" - aGrammErrs, aSpellErrs = self.getParagraphErrors(sText, dOptions, bContext, bSpellSugg, bDebug) - aGrammErrs = list(aGrammErrs) - if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs: - return "" - if lLineSet: - aGrammErrs, aSpellErrs = text.convertToXY(aGrammErrs, aSpellErrs, lLineSet) - return json.dumps({ "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False) - if bReturnText: - return json.dumps({ "iParagraph": iIndex, "sText": sText, "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False) - return json.dumps({ "iParagraph": iIndex, "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False) - - def getTextErrorsAsJSON (self, sText, bContext=False, bEmptyIfNoErrors=False, bSpellSugg=False, bReturnText=False, bDebug=False): - "[todo]" Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -3,19 +3,23 @@ Grammar checker engine """ import re import traceback +import json +import importlib #import unicodedata from itertools import chain from ..graphspell.spellchecker import SpellChecker from ..graphspell.echo import echo from .. import text +from . import gc_engine_func as gce_func from . import gc_options + try: # LibreOffice / OpenOffice from com.sun.star.linguistic2 import SingleProofreadingError from com.sun.star.text.TextMarkupType import PROOFREADING @@ -24,14 +28,13 @@ _bWriterError = True except ImportError: _bWriterError = False -__all__ = [ "lang", "locales", "pkg", "name", "version", "author", \ - "load", "parse", "getSpellChecker", \ - "setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \ - "ignoreRule", "resetIgnoreRules", "reactivateRule", "listRules", "displayRules", "setWriterUnderliningStyle" ] +#__all__ = [ "lang", "locales", "pkg", "name", "version", "author", \ +# "load", "parse", "getSpellChecker", "getTextFormatter", "getLexicographer" \ +# "ignoreRule", "resetIgnoreRules", "reactivateRule", "listRules", "displayRules", "setWriterUnderliningStyle" ] __version__ = "${version}" lang = "${lang}" @@ -43,42 +46,42 @@ # Modules _rules = None # module gc_rules _rules_graph = None # module gc_rules_graph -# Data -_sAppContext = "" # what software is running -_dOptions = None -_dOptionsColors = None +# Tools _oSpellChecker = None _oTokenizer = None + +# Data _aIgnoredRules = set() # Writer underlining style +_dOptionsColors = None _bMulticolor = True _nUnderliningStyle = 0 #### Initialization def load (sContext="Python", sColorType="aRGB"): "initialization of the grammar checker" global _oSpellChecker - global _sAppContext - global _dOptions global _dOptionsColors global _oTokenizer try: _oSpellChecker = SpellChecker("${lang}", "${dic_main_filename_py}", "${dic_community_filename_py}", "${dic_personal_filename_py}") - _sAppContext = sContext - _dOptions = gc_options.getOptions(sContext).copy() # duplication necessary, to be able to reset to default - _dOptionsColors = gc_options.getOptionsColors(sContext, sColorType) + _oSpellChecker.activateStorage() _oTokenizer = _oSpellChecker.getTokenizer() - _oSpellChecker.activateStorage() + gce_func.load(sContext, _oSpellChecker) + gc_options.load(sContext) + _dOptionsColors = gc_options.getOptionsColors(sContext, sColorType) except: traceback.print_exc() + +#### Tools def getSpellChecker (): "return the spellchecker object" return _oSpellChecker @@ -129,11 +132,11 @@ "(re)activate rule " _aIgnoredRules.discard(sRuleId) def listRules (sFilter=None): - "generator: returns typle (sOption, sLineId, sRuleId)" + "generator: returns tuple (sRuleType, sOption, sLineId, sRuleId)" if sFilter: try: zFilter = re.compile(sFilter) except re.error: echo("# Error. List rules: wrong regex.") @@ -155,53 +158,10 @@ "display the name of rules, with the filter " echo("List of rules. Filter: << " + str(sFilter) + " >>") for sOption, sLineId, sRuleId, sType in listRules(sFilter): echo("{:<8} {:<10} {:<10} {}".format(sOption, sLineId, sRuleId, sType)) - -#### Options - -def setOption (sOpt, bVal): - "set option with if it exists" - if sOpt in _dOptions: - _dOptions[sOpt] = bVal - - -def setOptions (dOpt): - "update the dictionary of options with " - for sKey, bVal in dOpt.items(): - if sKey in _dOptions: - _dOptions[sKey] = bVal - - -def getOptions (): - "return the dictionary of current options" - return _dOptions - - -def getDefaultOptions (): - "return the dictionary of default options" - return gc_options.getOptions(_sAppContext).copy() - - -def getOptionsLabels (sLang): - "return options labels" - return gc_options.getUI(sLang) - - -def displayOptions (sLang="${lang}"): - "display the list of grammar checking options" - echo("Options:") - echo("\n".join( [ k+":\t"+str(v)+"\t"+gc_options.getUI(sLang).get(k, ("?", ""))[0] for k, v in sorted(_dOptions.items()) ] )) - echo("") - - -def resetOptions (): - "set options to default values" - global _dOptions - _dOptions = getDefaultOptions() - def setWriterUnderliningStyle (sStyle="BOLDWAVE", bMulticolor=True): "set underlining style for Writer (WAVE, BOLDWAVE, BOLD)" global _nUnderliningStyle global _bMulticolor @@ -219,10 +179,39 @@ _nUnderliningStyle = 0 _bMulticolor = bMulticolor #### Parsing + +def getParagraphErrors (sText, dOptions=None, bContext=False, bSpellSugg=False, bDebug=False): + "returns a tuple: (grammar errors, spelling errors)" + aGrammErrs = parse(sText, "FR", bDebug=bDebug, dOptions=dOptions, bContext=bContext) + aSpellErrs = _oSpellChecker.parseParagraph(sText, bSpellSugg) + return aGrammErrs, aSpellErrs + + +def getParagraphWithErrors (sText, dOptions=None, bEmptyIfNoErrors=False, bSpellSugg=False, nWidth=100, bDebug=False): + "parse text and return a readable text with underline errors" + aGrammErrs, aSpellErrs = getParagraphErrors(sText, dOptions, False, bSpellSugg, bDebug) + if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs: + return ("", []) + return text.generateParagraph(sText, aGrammErrs, aSpellErrs, nWidth) + + +def getParagraphErrorsAsJSON (iIndex, sText, dOptions=None, bContext=False, bEmptyIfNoErrors=False, bSpellSugg=False, bReturnText=False, lLineSet=None, bDebug=False): + "parse text and return errors as a JSON string" + aGrammErrs, aSpellErrs = getParagraphErrors(sText, dOptions, bContext, bSpellSugg, bDebug) + aGrammErrs = list(aGrammErrs) + if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs: + return "" + if lLineSet: + aGrammErrs, aSpellErrs = text.convertToXY(aGrammErrs, aSpellErrs, lLineSet) + return json.dumps({ "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False) + if bReturnText: + return json.dumps({ "iParagraph": iIndex, "sText": sText, "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False) + return json.dumps({ "iParagraph": iIndex, "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False) + def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False, bFullInfo=False): "init point to analyse and returns an iterable of errors or (with option ) paragraphs errors and sentences with tokens and errors" oText = TextParser(sText) return oText.parse(sCountry, bDebug, dOptions, bContext, bFullInfo) @@ -262,12 +251,12 @@ return s def parse (self, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False, bFullInfo=False): "analyses and returns an iterable of errors or (with option ) paragraphs errors and sentences with tokens and errors" #sText = unicodedata.normalize("NFC", sText) - dOpt = dOptions or _dOptions - bShowRuleId = option('idrule') + dOpt = dOptions or gc_options.dOptions + bShowRuleId = gc_options.dOptions.get('idrule', False) # parse paragraph try: self.parseText(self.sText, self.sText0, True, 0, sCountry, dOpt, bShowRuleId, bDebug, bContext) except: raise @@ -340,11 +329,11 @@ for m in zRegex.finditer(sText): bCondMemo = None for sFuncCond, cActionType, sWhat, *eAct in lActions: # action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ] try: - bCondMemo = not sFuncCond or globals()[sFuncCond](sText, sText0, m, self.dTokenPos, sCountry, bCondMemo) + bCondMemo = not sFuncCond or getattr(gce_func, sFuncCond)(sText, sText0, m, self.dTokenPos, sCountry, bCondMemo) if bCondMemo: if bDebug: echo("RULE: " + sLineId) if cActionType == "-": # grammar error @@ -360,11 +349,11 @@ if bDebug: echo("~ " + sText + " -- " + m.group(eAct[0]) + " # " + sLineId) elif cActionType == "=": # disambiguation if not bParagraph: - globals()[sWhat](sText, m, self.dTokenPos) + getattr(gce_func, sWhat)(sText, m, self.dTokenPos) if bDebug: echo("= " + m.group(0) + " # " + sLineId) elif cActionType == ">": # we do nothing, this test is just a condition to apply all following actions pass @@ -586,18 +575,18 @@ for sRuleId in dGraph[nextNodeKey]: try: if bDebug: echo(" >TRY: " + sRuleId + " " + sLineId) _, sOption, sFuncCond, cActionType, sWhat, *eAct = _rules_graph.dRule[sRuleId] - # Suggestion [ option, condition, "-", replacement/suggestion/action, iTokenStart, iTokenEnd, cStartLimit, cEndLimit, bCaseSvty, nPriority, sMessage, sURL ] - # TextProcessor [ option, condition, "~", replacement/suggestion/action, iTokenStart, iTokenEnd, bCaseSvty ] - # Disambiguator [ option, condition, "=", replacement/suggestion/action ] - # Tag [ option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ] - # Immunity [ option, condition, "!", "", iTokenStart, iTokenEnd ] - # Test [ option, condition, ">", "" ] + # Suggestion [ sActionLineId, option, condition, "-", replacement/suggestion/action, iTokenStart, iTokenEnd, cStartLimit, cEndLimit, bCaseSvty, nPriority, sMessage, sURL ] + # TextProcessor [ sActionLineId, option, condition, "~", replacement/suggestion/action, iTokenStart, iTokenEnd, bCaseSvty ] + # Disambiguator [ sActionLineId, option, condition, "=", replacement/suggestion/action ] + # Tag [ sActionLineId, option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ] + # Immunity [ sActionLineId, option, condition, "!", "", iTokenStart, iTokenEnd ] + # Test [ sActionLineId, option, condition, ">", "" ] if not sOption or dOptions.get(sOption, False): - bCondMemo = not sFuncCond or globals()[sFuncCond](self.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, self.dTags, self.sSentence, self.sSentence0) + bCondMemo = not sFuncCond or getattr(gce_func, sFuncCond)(self.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, self.dTags, self.sSentence, self.sSentence0) if bCondMemo: if cActionType == "-": # grammar error iTokenStart, iTokenEnd, cStartLimit, cEndLimit, bCaseSvty, nPriority, sMessage, sURL = eAct nTokenErrorStart = nTokenOffset + iTokenStart if iTokenStart > 0 else nLastToken + iTokenStart @@ -619,11 +608,11 @@ bChange = True if bDebug: echo(" TEXT_PROCESSOR: [{}:{}] > {}".format(self.lToken[nTokenStart]["sValue"], self.lToken[nTokenEnd]["sValue"], sWhat)) elif cActionType == "=": # disambiguation - globals()[sWhat](self.lToken, nTokenOffset, nLastToken) + getattr(gce_func, sWhat)(self.lToken, nTokenOffset, nLastToken) if bDebug: echo(" DISAMBIGUATOR: ({}) [{}:{}]".format(sWhat, self.lToken[nTokenOffset+1]["sValue"], self.lToken[nLastToken]["sValue"])) elif cActionType == ">": # we do nothing, this test is just a condition to apply all following actions if bDebug: @@ -674,20 +663,20 @@ def _createErrorFromRegex (self, sText, sText0, sRepl, nOffset, m, iGroup, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext): nStart = nOffset + m.start(iGroup) nEnd = nOffset + m.end(iGroup) # suggestions if sRepl[0:1] == "=": - sSugg = globals()[sRepl[1:]](sText, m) + sSugg = getattr(gce_func, sRepl[1:])(sText, m) lSugg = sSugg.split("|") if sSugg else [] elif sRepl == "_": lSugg = [] else: lSugg = m.expand(sRepl).split("|") if bUppercase and lSugg and m.group(iGroup)[0:1].isupper(): lSugg = list(map(lambda s: s[0:1].upper()+s[1:], lSugg)) # Message - sMessage = globals()[sMsg[1:]](sText, m) if sMsg[0:1] == "=" else m.expand(sMsg) + sMessage = getattr(gce_func, sMsg[1:])(sText, m) if sMsg[0:1] == "=" else m.expand(sMsg) if bShowRuleId: sMessage += " #" + sLineId + " / " + sRuleId # if _bWriterError: return self._createErrorForWriter(nStart, nEnd - nStart, sRuleId, sOption, sMessage, lSugg, sURL) @@ -694,20 +683,20 @@ return self._createErrorAsDict(nStart, nEnd, sLineId, sRuleId, sOption, sMessage, lSugg, sURL, bContext) def _createErrorFromTokens (self, sSugg, nTokenOffset, nLastToken, iFirstToken, nStart, nEnd, sLineId, sRuleId, bCaseSvty, sMsg, sURL, bShowRuleId, sOption, bContext): # suggestions if sSugg[0:1] == "=": - sSugg = globals()[sSugg[1:]](self.lToken, nTokenOffset, nLastToken) + sSugg = getattr(gce_func, sSugg[1:])(self.lToken, nTokenOffset, nLastToken) lSugg = sSugg.split("|") if sSugg else [] elif sSugg == "_": lSugg = [] else: lSugg = self._expand(sSugg, nTokenOffset, nLastToken).split("|") if bCaseSvty and lSugg and self.lToken[iFirstToken]["sValue"][0:1].isupper(): lSugg = list(map(lambda s: s[0:1].upper()+s[1:], lSugg)) # Message - sMessage = globals()[sMsg[1:]](self.lToken, nTokenOffset, nLastToken) if sMsg[0:1] == "=" else self._expand(sMsg, nTokenOffset, nLastToken) + sMessage = getattr(gce_func, sMsg[1:])(self.lToken, nTokenOffset, nLastToken) if sMsg[0:1] == "=" else self._expand(sMsg, nTokenOffset, nLastToken) if bShowRuleId: sMessage += " #" + sLineId + " / " + sRuleId # if _bWriterError: return self._createErrorForWriter(nStart, nEnd - nStart, sRuleId, sOption, sMessage, lSugg, sURL) @@ -767,11 +756,11 @@ elif sRepl == "_": sNew = "_" * nLen elif sRepl == "@": sNew = "@" * nLen elif sRepl[0:1] == "=": - sNew = globals()[sRepl[1:]](sText, m) + sNew = getattr(gce_func, sRepl[1:])(sText, m) sNew = sNew + " " * (nLen-len(sNew)) if bUppercase and m.group(iGroup)[0:1].isupper(): sNew = sNew.capitalize() else: sNew = m.expand(sRepl) @@ -797,11 +786,11 @@ else: for i in range(nTokenRewriteStart, nTokenRewriteEnd+1): self.lToken[i]["sNewValue"] = "_" else: if sWhat.startswith("="): - sWhat = globals()[sWhat[1:]](self.lToken, nTokenOffset, nLastToken) + sWhat = getattr(gce_func, sWhat[1:])(self.lToken, nTokenOffset, nLastToken) else: sWhat = self._expand(sWhat, nTokenOffset, nLastToken) bUppercase = bCaseSvty and self.lToken[nTokenRewriteStart]["sValue"][0:1].isupper() if nTokenRewriteEnd - nTokenRewriteStart == 0: # one token @@ -871,410 +860,5 @@ echo(dToken) if bDebug: echo(" TEXT REWRITED: " + self.sSentence) self.lToken.clear() self.lToken = lNewToken - - -#### common functions - -def option (sOpt): - "return True if option is active" - return _dOptions.get(sOpt, False) - - -#### Functions to get text outside pattern scope - -# warning: check compile_rules.py to understand how it works - -_zNextWord = re.compile(r" +(\w[\w-]*)") -_zPrevWord = re.compile(r"(\w[\w-]*) +$") - -def nextword (s, iStart, n): - "get the nth word of the input string or empty string" - m = re.match("(?: +[\\w%-]+){" + str(n-1) + "} +([\\w%-]+)", s[iStart:]) - if not m: - return None - return (iStart+m.start(1), m.group(1)) - - -def prevword (s, iEnd, n): - "get the (-)nth word of the input string or empty string" - m = re.search("([\\w%-]+) +(?:[\\w%-]+ +){" + str(n-1) + "}$", s[:iEnd]) - if not m: - return None - return (m.start(1), m.group(1)) - - -def nextword1 (s, iStart): - "get next word (optimization)" - m = _zNextWord.match(s[iStart:]) - if not m: - return None - return (iStart+m.start(1), m.group(1)) - - -def prevword1 (s, iEnd): - "get previous word (optimization)" - m = _zPrevWord.search(s[:iEnd]) - if not m: - return None - return (m.start(1), m.group(1)) - - -def look (s, sPattern, sNegPattern=None): - "seek sPattern in s (before/after/fulltext), if sNegPattern not in s" - if sNegPattern and re.search(sNegPattern, s): - return False - if re.search(sPattern, s): - return True - return False - - -def look_chk1 (dTokenPos, s, nOffset, sPattern, sPatternGroup1, sNegPatternGroup1=""): - "returns True if s has pattern sPattern and m.group(1) has pattern sPatternGroup1" - m = re.search(sPattern, s) - if not m: - return False - try: - sWord = m.group(1) - nPos = m.start(1) + nOffset - except IndexError: - return False - return morph(dTokenPos, (nPos, sWord), sPatternGroup1, sNegPatternGroup1) - - - -#### Analyse groups for regex rules - -def displayInfo (dTokenPos, tWord): - "for debugging: retrieve info of word" - if not tWord: - echo("> nothing to find") - return True - lMorph = _oSpellChecker.getMorph(tWord[1]) - if not lMorph: - echo("> not in dictionary") - return True - echo("TOKENS:", dTokenPos) - if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]]: - echo("DA: " + str(dTokenPos[tWord[0]]["lMorph"])) - echo("FSA: " + str(lMorph)) - return True - - -def morph (dTokenPos, tWord, sPattern, sNegPattern="", bNoWord=False): - "analyse a tuple (position, word), returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)" - if not tWord: - return bNoWord - lMorph = dTokenPos[tWord[0]]["lMorph"] if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]] else _oSpellChecker.getMorph(tWord[1]) - if not lMorph: - return False - # check negative condition - if sNegPattern: - if sNegPattern == "*": - # all morph must match sPattern - zPattern = re.compile(sPattern) - return all(zPattern.search(sMorph) for sMorph in lMorph) - zNegPattern = re.compile(sNegPattern) - if any(zNegPattern.search(sMorph) for sMorph in lMorph): - return False - # search sPattern - zPattern = re.compile(sPattern) - return any(zPattern.search(sMorph) for sMorph in lMorph) - - -def analyse (sWord, sPattern, sNegPattern=""): - "analyse a word, returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation off)" - lMorph = _oSpellChecker.getMorph(sWord) - if not lMorph: - return False - # check negative condition - if sNegPattern: - if sNegPattern == "*": - zPattern = re.compile(sPattern) - return all(zPattern.search(sMorph) for sMorph in lMorph) - zNegPattern = re.compile(sNegPattern) - if any(zNegPattern.search(sMorph) for sMorph in lMorph): - return False - # search sPattern - zPattern = re.compile(sPattern) - return any(zPattern.search(sMorph) for sMorph in lMorph) - - -#### Analyse tokens for graph rules - -def g_value (dToken, sValues, nLeft=None, nRight=None): - "test if is in sValues (each value should be separated with |)" - sValue = "|"+dToken["sValue"]+"|" if nLeft is None else "|"+dToken["sValue"][slice(nLeft, nRight)]+"|" - if sValue in sValues: - return True - if dToken["sValue"][0:2].istitle(): # we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". - if sValue.lower() in sValues: - return True - elif dToken["sValue"].isupper(): - #if sValue.lower() in sValues: - # return True - sValue = "|"+sValue[1:].capitalize() - if sValue in sValues: - return True - sValue = sValue.lower() - if sValue in sValues: - return True - return False - - -def g_morph (dToken, sPattern, sNegPattern="", nLeft=None, nRight=None, bMemorizeMorph=True): - "analyse a token, return True if not in morphologies and in morphologies" - if "lMorph" in dToken: - lMorph = dToken["lMorph"] - else: - if nLeft is not None: - lMorph = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)]) - if bMemorizeMorph: - dToken["lMorph"] = lMorph - else: - lMorph = _oSpellChecker.getMorph(dToken["sValue"]) - if not lMorph: - return False - # check negative condition - if sNegPattern: - if sNegPattern == "*": - # all morph must match sPattern - zPattern = re.compile(sPattern) - return all(zPattern.search(sMorph) for sMorph in lMorph) - zNegPattern = re.compile(sNegPattern) - if any(zNegPattern.search(sMorph) for sMorph in lMorph): - return False - # search sPattern - zPattern = re.compile(sPattern) - return any(zPattern.search(sMorph) for sMorph in lMorph) - - -def g_analyse (dToken, sPattern, sNegPattern="", nLeft=None, nRight=None, bMemorizeMorph=True): - "analyse a token, return True if not in morphologies and in morphologies (disambiguation off)" - if nLeft is not None: - lMorph = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)]) - if bMemorizeMorph: - dToken["lMorph"] = lMorph - else: - lMorph = _oSpellChecker.getMorph(dToken["sValue"]) - if not lMorph: - return False - # check negative condition - if sNegPattern: - if sNegPattern == "*": - # all morph must match sPattern - zPattern = re.compile(sPattern) - return all(zPattern.search(sMorph) for sMorph in lMorph) - zNegPattern = re.compile(sNegPattern) - if any(zNegPattern.search(sMorph) for sMorph in lMorph): - return False - # search sPattern - zPattern = re.compile(sPattern) - return any(zPattern.search(sMorph) for sMorph in lMorph) - - -def g_merged_analyse (dToken1, dToken2, cMerger, sPattern, sNegPattern="", bSetMorph=True): - "merge two token values, return True if not in morphologies and in morphologies (disambiguation off)" - lMorph = _oSpellChecker.getMorph(dToken1["sValue"] + cMerger + dToken2["sValue"]) - if not lMorph: - return False - # check negative condition - if sNegPattern: - if sNegPattern == "*": - # all morph must match sPattern - zPattern = re.compile(sPattern) - bResult = all(zPattern.search(sMorph) for sMorph in lMorph) - if bResult and bSetMorph: - dToken1["lMorph"] = lMorph - return bResult - zNegPattern = re.compile(sNegPattern) - if any(zNegPattern.search(sMorph) for sMorph in lMorph): - return False - # search sPattern - zPattern = re.compile(sPattern) - bResult = any(zPattern.search(sMorph) for sMorph in lMorph) - if bResult and bSetMorph: - dToken1["lMorph"] = lMorph - return bResult - - -def g_tag_before (dToken, dTags, sTag): - "returns True if is present on tokens before " - if sTag not in dTags: - return False - if dToken["i"] > dTags[sTag][0]: - return True - return False - - -def g_tag_after (dToken, dTags, sTag): - "returns True if is present on tokens after " - if sTag not in dTags: - return False - if dToken["i"] < dTags[sTag][1]: - return True - return False - - -def g_tag (dToken, sTag): - "returns True if is present on token " - return "aTags" in dToken and sTag in dToken["aTags"] - - -def g_meta (dToken, sType): - "returns True if is equal to the token type" - return dToken["sType"] == sType - - -def g_space_between_tokens (dToken1, dToken2, nMin, nMax=None): - "checks if spaces between tokens is >= and <= " - nSpace = dToken2["nStart"] - dToken1["nEnd"] - if nSpace < nMin: - return False - if nMax is not None and nSpace > nMax: - return False - return True - - -def g_token (lToken, i): - "return token at index in lToken (or the closest one)" - if i < 0: - return lToken[0] - if i >= len(lToken): - return lToken[-1] - return lToken[i] - - - -#### Disambiguator for regex rules - -def select (dTokenPos, nPos, sWord, sPattern, lDefault=None): - "Disambiguation: select morphologies of matching " - if not sWord: - return True - if nPos not in dTokenPos: - echo("Error. There should be a token at this position: ", nPos) - return True - lMorph = _oSpellChecker.getMorph(sWord) - if not lMorph or len(lMorph) == 1: - return True - lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ] - if lSelect: - if len(lSelect) != len(lMorph): - dTokenPos[nPos]["lMorph"] = lSelect - elif lDefault: - dTokenPos[nPos]["lMorph"] = lDefault - return True - - -def exclude (dTokenPos, nPos, sWord, sPattern, lDefault=None): - "Disambiguation: exclude morphologies of matching " - if not sWord: - return True - if nPos not in dTokenPos: - echo("Error. There should be a token at this position: ", nPos) - return True - lMorph = _oSpellChecker.getMorph(sWord) - if not lMorph or len(lMorph) == 1: - return True - lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ] - if lSelect: - if len(lSelect) != len(lMorph): - dTokenPos[nPos]["lMorph"] = lSelect - elif lDefault: - dTokenPos[nPos]["lMorph"] = lDefault - return True - - -def define (dTokenPos, nPos, lMorph): - "Disambiguation: set morphologies of token at with " - if nPos not in dTokenPos: - echo("Error. There should be a token at this position: ", nPos) - return True - dTokenPos[nPos]["lMorph"] = lMorph - return True - - -#### Disambiguation for graph rules - -def g_select (dToken, sPattern, lDefault=None): - "Disambiguation: select morphologies for according to , always return True" - lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) - if not lMorph or len(lMorph) == 1: - if lDefault: - dToken["lMorph"] = lDefault - #echo("DA:", dToken["sValue"], dToken["lMorph"]) - return True - lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ] - if lSelect: - if len(lSelect) != len(lMorph): - dToken["lMorph"] = lSelect - elif lDefault: - dToken["lMorph"] = lDefault - #echo("DA:", dToken["sValue"], dToken["lMorph"]) - return True - - -def g_exclude (dToken, sPattern, lDefault=None): - "Disambiguation: select morphologies for according to , always return True" - lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) - if not lMorph or len(lMorph) == 1: - if lDefault: - dToken["lMorph"] = lDefault - #echo("DA:", dToken["sValue"], dToken["lMorph"]) - return True - lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ] - if lSelect: - if len(lSelect) != len(lMorph): - dToken["lMorph"] = lSelect - elif lDefault: - dToken["lMorph"] = lDefault - #echo("DA:", dToken["sValue"], dToken["lMorph"]) - return True - - -def g_add_morph (dToken, lNewMorph): - "Disambiguation: add a morphology to a token" - lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) - lMorph.extend(lNewMorph) - dToken["lMorph"] = lMorph - return True - - -def g_define (dToken, lMorph): - "Disambiguation: set morphologies of , always return True" - dToken["lMorph"] = lMorph - #echo("DA:", dToken["sValue"], lMorph) - return True - - -def g_define_from (dToken, nLeft=None, nRight=None): - "Disambiguation: set morphologies of with slicing its value with and " - if nLeft is not None: - dToken["lMorph"] = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)]) - else: - dToken["lMorph"] = _oSpellChecker.getMorph(dToken["sValue"]) - return True - - -def g_change_meta (dToken, sType): - "Disambiguation: change type of token" - dToken["sType"] = sType - return True - - - -#### GRAMMAR CHECKER PLUGINS - -${plugins} - - -#### CALLABLES FOR REGEX RULES (generated code) - -${callables} - - -#### CALLABLES FOR GRAPH RULES (generated code) - -${graph_callables} ADDED gc_core/py/lang_core/gc_engine_func.py Index: gc_core/py/lang_core/gc_engine_func.py ================================================================== --- /dev/null +++ gc_core/py/lang_core/gc_engine_func.py @@ -0,0 +1,427 @@ +""" +Grammar checking functions +""" + +# generated code, do not edit +# source: gc_core/py/lang_core/gc_engine_func.py + +import re + +from . import gc_options +from ..graphspell.echo import echo + + +_sAppContext = "Python" # what software is running +_oSpellChecker = None + + +def load (sContext, oSpellChecker): + global _sAppContext + global _oSpellChecker + _sAppContext = sContext + _oSpellChecker = oSpellChecker + + +#### common functions + +def option (sOpt): + "return True if option is active" + return gc_options.dOptions.get(sOpt, False) + + +#### Functions to get text outside pattern scope + +# warning: check compile_rules.py to understand how it works + +_zNextWord = re.compile(r" +(\w[\w-]*)") +_zPrevWord = re.compile(r"(\w[\w-]*) +$") + +def nextword (s, iStart, n): + "get the nth word of the input string or empty string" + m = re.match("(?: +[\\w%-]+){" + str(n-1) + "} +([\\w%-]+)", s[iStart:]) + if not m: + return None + return (iStart+m.start(1), m.group(1)) + + +def prevword (s, iEnd, n): + "get the (-)nth word of the input string or empty string" + m = re.search("([\\w%-]+) +(?:[\\w%-]+ +){" + str(n-1) + "}$", s[:iEnd]) + if not m: + return None + return (m.start(1), m.group(1)) + + +def nextword1 (s, iStart): + "get next word (optimization)" + m = _zNextWord.match(s[iStart:]) + if not m: + return None + return (iStart+m.start(1), m.group(1)) + + +def prevword1 (s, iEnd): + "get previous word (optimization)" + m = _zPrevWord.search(s[:iEnd]) + if not m: + return None + return (m.start(1), m.group(1)) + + +def look (s, sPattern, sNegPattern=None): + "seek sPattern in s (before/after/fulltext), if sNegPattern not in s" + if sNegPattern and re.search(sNegPattern, s): + return False + if re.search(sPattern, s): + return True + return False + + +def look_chk1 (dTokenPos, s, nOffset, sPattern, sPatternGroup1, sNegPatternGroup1=""): + "returns True if s has pattern sPattern and m.group(1) has pattern sPatternGroup1" + m = re.search(sPattern, s) + if not m: + return False + try: + sWord = m.group(1) + nPos = m.start(1) + nOffset + except IndexError: + return False + return morph(dTokenPos, (nPos, sWord), sPatternGroup1, sNegPatternGroup1) + + + +#### Analyse groups for regex rules + +def displayInfo (dTokenPos, tWord): + "for debugging: retrieve info of word" + if not tWord: + print("> nothing to find") + return True + lMorph = _oSpellChecker.getMorph(tWord[1]) + if not lMorph: + print("> not in dictionary") + return True + print("TOKENS:", dTokenPos) + if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]]: + print("DA: " + str(dTokenPos[tWord[0]]["lMorph"])) + print("FSA: " + str(lMorph)) + return True + + +def morph (dTokenPos, tWord, sPattern, sNegPattern="", bNoWord=False): + "analyse a tuple (position, word), returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)" + if not tWord: + return bNoWord + lMorph = dTokenPos[tWord[0]]["lMorph"] if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]] else _oSpellChecker.getMorph(tWord[1]) + if not lMorph: + return False + # check negative condition + if sNegPattern: + if sNegPattern == "*": + # all morph must match sPattern + zPattern = re.compile(sPattern) + return all(zPattern.search(sMorph) for sMorph in lMorph) + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(sMorph) for sMorph in lMorph): + return False + # search sPattern + zPattern = re.compile(sPattern) + return any(zPattern.search(sMorph) for sMorph in lMorph) + + +def analyse (sWord, sPattern, sNegPattern=""): + "analyse a word, returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation off)" + lMorph = _oSpellChecker.getMorph(sWord) + if not lMorph: + return False + # check negative condition + if sNegPattern: + if sNegPattern == "*": + zPattern = re.compile(sPattern) + return all(zPattern.search(sMorph) for sMorph in lMorph) + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(sMorph) for sMorph in lMorph): + return False + # search sPattern + zPattern = re.compile(sPattern) + return any(zPattern.search(sMorph) for sMorph in lMorph) + + +#### Analyse tokens for graph rules + +def g_value (dToken, sValues, nLeft=None, nRight=None): + "test if is in sValues (each value should be separated with |)" + sValue = "|"+dToken["sValue"]+"|" if nLeft is None else "|"+dToken["sValue"][slice(nLeft, nRight)]+"|" + if sValue in sValues: + return True + if dToken["sValue"][0:2].istitle(): # we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". + if sValue.lower() in sValues: + return True + elif dToken["sValue"].isupper(): + #if sValue.lower() in sValues: + # return True + sValue = "|"+sValue[1:].capitalize() + if sValue in sValues: + return True + sValue = sValue.lower() + if sValue in sValues: + return True + return False + + +def g_morph (dToken, sPattern, sNegPattern="", nLeft=None, nRight=None, bMemorizeMorph=True): + "analyse a token, return True if not in morphologies and in morphologies" + if "lMorph" in dToken: + lMorph = dToken["lMorph"] + else: + if nLeft is not None: + lMorph = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)]) + if bMemorizeMorph: + dToken["lMorph"] = lMorph + else: + lMorph = _oSpellChecker.getMorph(dToken["sValue"]) + if not lMorph: + return False + # check negative condition + if sNegPattern: + if sNegPattern == "*": + # all morph must match sPattern + zPattern = re.compile(sPattern) + return all(zPattern.search(sMorph) for sMorph in lMorph) + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(sMorph) for sMorph in lMorph): + return False + # search sPattern + zPattern = re.compile(sPattern) + return any(zPattern.search(sMorph) for sMorph in lMorph) + + +def g_analyse (dToken, sPattern, sNegPattern="", nLeft=None, nRight=None, bMemorizeMorph=True): + "analyse a token, return True if not in morphologies and in morphologies (disambiguation off)" + if nLeft is not None: + lMorph = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)]) + if bMemorizeMorph: + dToken["lMorph"] = lMorph + else: + lMorph = _oSpellChecker.getMorph(dToken["sValue"]) + if not lMorph: + return False + # check negative condition + if sNegPattern: + if sNegPattern == "*": + # all morph must match sPattern + zPattern = re.compile(sPattern) + return all(zPattern.search(sMorph) for sMorph in lMorph) + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(sMorph) for sMorph in lMorph): + return False + # search sPattern + zPattern = re.compile(sPattern) + return any(zPattern.search(sMorph) for sMorph in lMorph) + + +def g_merged_analyse (dToken1, dToken2, cMerger, sPattern, sNegPattern="", bSetMorph=True): + "merge two token values, return True if not in morphologies and in morphologies (disambiguation off)" + lMorph = _oSpellChecker.getMorph(dToken1["sValue"] + cMerger + dToken2["sValue"]) + if not lMorph: + return False + # check negative condition + if sNegPattern: + if sNegPattern == "*": + # all morph must match sPattern + zPattern = re.compile(sPattern) + bResult = all(zPattern.search(sMorph) for sMorph in lMorph) + if bResult and bSetMorph: + dToken1["lMorph"] = lMorph + return bResult + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(sMorph) for sMorph in lMorph): + return False + # search sPattern + zPattern = re.compile(sPattern) + bResult = any(zPattern.search(sMorph) for sMorph in lMorph) + if bResult and bSetMorph: + dToken1["lMorph"] = lMorph + return bResult + + +def g_tag_before (dToken, dTags, sTag): + "returns True if is present on tokens before " + if sTag not in dTags: + return False + if dToken["i"] > dTags[sTag][0]: + return True + return False + + +def g_tag_after (dToken, dTags, sTag): + "returns True if is present on tokens after " + if sTag not in dTags: + return False + if dToken["i"] < dTags[sTag][1]: + return True + return False + + +def g_tag (dToken, sTag): + "returns True if is present on token " + return "aTags" in dToken and sTag in dToken["aTags"] + + +def g_meta (dToken, sType): + "returns True if is equal to the token type" + return dToken["sType"] == sType + + +def g_space_between_tokens (dToken1, dToken2, nMin, nMax=None): + "checks if spaces between tokens is >= and <= " + nSpace = dToken2["nStart"] - dToken1["nEnd"] + if nSpace < nMin: + return False + if nMax is not None and nSpace > nMax: + return False + return True + + +def g_token (lToken, i): + "return token at index in lToken (or the closest one)" + if i < 0: + return lToken[0] + if i >= len(lToken): + return lToken[-1] + return lToken[i] + + + +#### Disambiguator for regex rules + +def select (dTokenPos, nPos, sWord, sPattern, lDefault=None): + "Disambiguation: select morphologies of matching " + if not sWord: + return True + if nPos not in dTokenPos: + print("Error. There should be a token at this position: ", nPos) + return True + lMorph = _oSpellChecker.getMorph(sWord) + if not lMorph or len(lMorph) == 1: + return True + lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ] + if lSelect: + if len(lSelect) != len(lMorph): + dTokenPos[nPos]["lMorph"] = lSelect + elif lDefault: + dTokenPos[nPos]["lMorph"] = lDefault + return True + + +def exclude (dTokenPos, nPos, sWord, sPattern, lDefault=None): + "Disambiguation: exclude morphologies of matching " + if not sWord: + return True + if nPos not in dTokenPos: + print("Error. There should be a token at this position: ", nPos) + return True + lMorph = _oSpellChecker.getMorph(sWord) + if not lMorph or len(lMorph) == 1: + return True + lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ] + if lSelect: + if len(lSelect) != len(lMorph): + dTokenPos[nPos]["lMorph"] = lSelect + elif lDefault: + dTokenPos[nPos]["lMorph"] = lDefault + return True + + +def define (dTokenPos, nPos, lMorph): + "Disambiguation: set morphologies of token at with " + if nPos not in dTokenPos: + print("Error. There should be a token at this position: ", nPos) + return True + dTokenPos[nPos]["lMorph"] = lMorph + return True + + +#### Disambiguation for graph rules + +def g_select (dToken, sPattern, lDefault=None): + "Disambiguation: select morphologies for according to , always return True" + lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) + if not lMorph or len(lMorph) == 1: + if lDefault: + dToken["lMorph"] = lDefault + #print("DA:", dToken["sValue"], dToken["lMorph"]) + return True + lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ] + if lSelect: + if len(lSelect) != len(lMorph): + dToken["lMorph"] = lSelect + elif lDefault: + dToken["lMorph"] = lDefault + #print("DA:", dToken["sValue"], dToken["lMorph"]) + return True + + +def g_exclude (dToken, sPattern, lDefault=None): + "Disambiguation: select morphologies for according to , always return True" + lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) + if not lMorph or len(lMorph) == 1: + if lDefault: + dToken["lMorph"] = lDefault + #print("DA:", dToken["sValue"], dToken["lMorph"]) + return True + lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ] + if lSelect: + if len(lSelect) != len(lMorph): + dToken["lMorph"] = lSelect + elif lDefault: + dToken["lMorph"] = lDefault + #print("DA:", dToken["sValue"], dToken["lMorph"]) + return True + + +def g_add_morph (dToken, lNewMorph): + "Disambiguation: add a morphology to a token" + lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) + lMorph.extend(lNewMorph) + dToken["lMorph"] = lMorph + return True + + +def g_define (dToken, lMorph): + "Disambiguation: set morphologies of , always return True" + dToken["lMorph"] = lMorph + #print("DA:", dToken["sValue"], lMorph) + return True + + +def g_define_from (dToken, nLeft=None, nRight=None): + "Disambiguation: set morphologies of with slicing its value with and " + if nLeft is not None: + dToken["lMorph"] = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)]) + else: + dToken["lMorph"] = _oSpellChecker.getMorph(dToken["sValue"]) + return True + + +def g_change_meta (dToken, sType): + "Disambiguation: change type of token" + dToken["sType"] = sType + return True + + + +#### GRAMMAR CHECKER PLUGINS + +${plugins} + + +#### CALLABLES FOR REGEX RULES (generated code) + +${callables} + + +#### CALLABLES FOR GRAPH RULES (generated code) + +${graph_callables} Index: gc_core/py/lang_core/gc_options.py ================================================================== --- gc_core/py/lang_core/gc_options.py +++ gc_core/py/lang_core/gc_options.py @@ -1,42 +1,89 @@ """ Grammar checker default options """ # generated code, do not edit +# source: gc_core/py/lang_core/gc_options.py import traceback -def getUI (sLang): + +dOptions = {} + +_sAppContext = "Python" + + +def load (sContext="Python"): + global dOptions + global _sAppContext + _sAppContext = sContext + dOptions = getDefaultOptions(sContext) + + +def setOption (sOpt, bVal): + "set option with if it exists" + if sOpt in dOptions: + dOptions[sOpt] = bVal + + +def setOptions (dOpt): + "update the dictionary of options with , only known options are updated" + for sKey, bVal in dOpt.items(): + if sKey in dOptions: + dOptions[sKey] = bVal + + +def getOptions (): + "return a copy of options as dictionary" + return dOptions.copy() + + +def resetOptions (): + "set options to default values" + global dOptions + dOptions = getDefaultOptions() + + +def displayOptions (sLang="${lang}"): + "display the list of grammar checking options" + print("Options:") + print("\n".join( [ k+":\t"+str(v)+"\t"+getOptionLabels(sLang).get(k, ("?", ""))[0] for k, v in sorted(dOptions.items()) ] )) + print("") + + +def getOptionLabels (sLang="${sLang}"): "returns dictionary of UI labels" if sLang in _dOptLabel: return _dOptLabel[sLang] - return _dOptLabel["fr"] + return _dOptLabel["${sLang}"] -def getOptions (sContext="Python"): +def getDefaultOptions (sContext=""): "returns dictionary of options" - if sContext in _dOpt: - return _dOpt[sContext] - return _dOpt["Python"] + if not sContext: + sContext = _sAppContext + if sContext in _dDefaultOpt: + return _dDefaultOpt[sContext].copy() # duplication necessary, to be able to reset to default + return _dDefaultOpt["Python"].copy() # duplication necessary, to be able to reset to default def getOptionsColors (sTheme="Default", sColorType="aRGB"): "returns dictionary of options colors" dOptColor = _dOptColor[sTheme] if sTheme in _dOptColor else _dOptColor["Default"] dColorType = _dColorType[sColorType] if sColorType in _dColorType else _dColorType["aRGB"] try: - return { sOpt: dColorType[sColor] for sOpt, sColor in dOptColor.items() } + return { sOpt: dColorType[sColor] for sOpt, sColor in dOptColor.items() } except KeyError: traceback.print_exc() return {} lStructOpt = ${lStructOpt} -_dOpt = { +_dDefaultOpt = { "Python": ${dOptPython}, "Server": ${dOptServer}, "Writer": ${dOptWriter} } Index: gc_core/py/oxt/Grammalecte.py ================================================================== --- gc_core/py/oxt/Grammalecte.py +++ gc_core/py/oxt/Grammalecte.py @@ -13,36 +13,36 @@ from com.sun.star.linguistic2 import ProofreadingResult from com.sun.star.lang import XServiceInfo, XServiceName, XServiceDisplayName from com.sun.star.lang import Locale import helpers -import grammalecte.${lang} as gce +import grammalecte.${lang} as gc_engine #import lightproof_handler_${implname} as opt_handler import Options class Grammalecte (unohelper.Base, XProofreader, XServiceInfo, XServiceName, XServiceDisplayName, XSupportedLocales): def __init__ (self, ctx, *args): self.ctx = ctx self.ServiceName = "com.sun.star.linguistic2.Proofreader" - self.ImplementationName = "org.openoffice.comp.pyuno.Lightproof." + gce.pkg + self.ImplementationName = "org.openoffice.comp.pyuno.Lightproof." + gc_engine.pkg self.SupportedServiceNames = (self.ServiceName, ) self.locales = [] - for i in gce.locales: - l = gce.locales[i] + for i in gc_engine.locales: + l = gc_engine.locales[i] self.locales.append(Locale(l[0], l[1], l[2])) self.locales = tuple(self.locales) # debug #helpers.startConsole() # init - gce.load("Writer", "nInt") + gc_engine.load("Writer", "nInt") # GC options #xContext = uno.getComponentContext() #opt_handler.load(xContext) dOpt = Options.loadOptions("${lang}") - gce.setOptions(dOpt) + gc_engine.gc_options.setOptions(dOpt) # dictionaries options self.loadUserDictionaries() # underlining options self.setWriterUnderliningStyle() # store for results of big paragraphs @@ -109,11 +109,11 @@ # WORKAROUND ->>> xRes.nBehindEndOfSentencePosition = xRes.nStartOfNextSentencePosition try: - xRes.aErrors = tuple(gce.parse(rText, rLocale.Country)) + xRes.aErrors = tuple(gc_engine.parse(rText, rLocale.Country)) # ->>> WORKAROUND if xRes.nStartOfNextSentencePosition > 3000: self.dResult[nHashedVal] = xRes self.nRes += 1 if self.nRes > self.nMaxRes: @@ -124,31 +124,31 @@ except: traceback.print_exc() return xRes def ignoreRule (self, rid, aLocale): - gce.ignoreRule(rid) + gc_engine.ignoreRule(rid) def resetIgnoreRules (self): - gce.resetIgnoreRules() + gc_engine.resetIgnoreRules() # XServiceDisplayName def getServiceDisplayName (self, aLocale): - return gce.name + return gc_engine.name # Grammalecte def getSpellChecker (self): - return gce.getSpellChecker() + return gc_engine.getSpellChecker() def loadUserDictionaries (self): try: xSettingNode = helpers.getConfigSetting("/org.openoffice.Lightproof_${implname}/Other/", False) xChild = xSettingNode.getByName("o_${lang}") if xChild.getPropertyValue("use_personal_dic"): sJSON = xChild.getPropertyValue("personal_dic") if sJSON: - oSpellChecker = gce.getSpellChecker(); + oSpellChecker = gc_engine.getSpellChecker(); oSpellChecker.setPersonalDictionary(json.loads(sJSON)) except: traceback.print_exc() def setWriterUnderliningStyle (self): @@ -155,15 +155,15 @@ try: xSettingNode = helpers.getConfigSetting("/org.openoffice.Lightproof_${implname}/Other/", False) xChild = xSettingNode.getByName("o_${lang}") sLineType = xChild.getPropertyValue("line_type") bMulticolor = bool(xChild.getPropertyValue("line_multicolor")) - gce.setWriterUnderliningStyle(sLineType, bMulticolor) + gc_engine.setWriterUnderliningStyle(sLineType, bMulticolor) except: traceback.print_exc() g_ImplementationHelper = unohelper.ImplementationHelper() -g_ImplementationHelper.addImplementation(Grammalecte, "org.openoffice.comp.pyuno.Lightproof."+gce.pkg, ("com.sun.star.linguistic2.Proofreader",),) +g_ImplementationHelper.addImplementation(Grammalecte, "org.openoffice.comp.pyuno.Lightproof."+gc_engine.pkg, ("com.sun.star.linguistic2.Proofreader",),) # g_ImplementationHelper.addImplementation( opt_handler.LightproofOptionsEventHandler, \ -# "org.openoffice.comp.pyuno.LightproofOptionsEventHandler." + gce.pkg, ("com.sun.star.awt.XContainerWindowEventHandler",),) +# "org.openoffice.comp.pyuno.LightproofOptionsEventHandler." + gc_engine.pkg, ("com.sun.star.awt.XContainerWindowEventHandler",),) Index: gc_core/py/oxt/Options.py ================================================================== --- gc_core/py/oxt/Options.py +++ gc_core/py/oxt/Options.py @@ -11,30 +11,30 @@ import helpers import op_strings try: - import grammalecte.${lang} as gce + import grammalecte.${lang} as gc_engine except: traceback.print_exc() def loadOptions (sLang): "load options from Grammalecte and change them according to LibreOffice settings, returns a dictionary {option_name: boolean}" try: xNode = helpers.getConfigSetting("/org.openoffice.Lightproof_${implname}/Leaves", False) xChild = xNode.getByName(sLang) - dOpt = gce.gc_options.getOptions("Writer") + dOpt = gc_engine.gc_options.getDefaultOptions("Writer") for sKey in dOpt: sValue = xChild.getPropertyValue(sKey) if sValue != '': dOpt[sKey] = bool(int(sValue)) return dOpt except: print("# Error. Unable to load options of language:", sLang) traceback.print_exc() - return gce.gc_options.getOptions("Writer") + return gc_engine.gc_options.getDefaultOptions("Writer") def saveOptions (sLang, dOpt): "save options in LibreOffice profile" try: @@ -70,11 +70,11 @@ return xWidget def run (self, sUI): try: dUI = op_strings.getUI(sUI) - dOptionUI = gce.gc_options.getUI(sUI) + dOptionUI = gc_engine.gc_options.getOptionLabels(sUI) # fonts xFDTitle = uno.createUnoStruct("com.sun.star.awt.FontDescriptor") xFDTitle.Height = 9 xFDTitle.Weight = uno.getConstantByName("com.sun.star.awt.FontWeight.BOLD") @@ -99,11 +99,11 @@ self.lOptionWidgets = [] sProdName, sVersion = helpers.getProductNameAndVersion() if True: # no tab available (bug) - for sOptionType, lOptions in gce.gc_options.lStructOpt: + for sOptionType, lOptions in gc_engine.gc_options.lStructOpt: x = 10 y += 10 self._addWidget(sOptionType, 'FixedLine', x, y, nWidth, nHeight, Label = dOptionUI.get(sOptionType, "#err")[0], FontDescriptor= xFDTitle) y += 3 for lOptLine in lOptions: @@ -162,11 +162,11 @@ # XActionListener def actionPerformed (self, xActionEvent): try: if xActionEvent.ActionCommand == 'Default': - self._setWidgets(gce.gc_options.getOptions("Writer")) + self._setWidgets(gc_engine.gc_options.getDefaultOptions("Writer")) elif xActionEvent.ActionCommand == 'Apply': self._save("${lang}") self.xContainer.endExecute() elif xActionEvent.ActionCommand == 'Cancel': self.xContainer.endExecute() @@ -181,8 +181,8 @@ w.State = dOpt.get(w.Name, False) def _save (self, sLang): try: saveOptions(sLang, { w.Name: str(w.State) for w in self.lOptionWidgets }) - gce.setOptions({ w.Name: bool(w.State) for w in self.lOptionWidgets }) + gc_engine.gc_options.setOptions({ w.Name: bool(w.State) for w in self.lOptionWidgets }) except: traceback.print_exc() Index: gc_lang/fr/config.ini ================================================================== --- gc_lang/fr/config.ini +++ gc_lang/fr/config.ini @@ -4,11 +4,11 @@ locales = fr_FR fr_BE fr_CA fr_CH fr_LU fr_BF fr_BJ fr_CD fr_CI fr_CM fr_MA fr_ML fr_MU fr_NE fr_RE fr_SN fr_TG country_default = FR name = Grammalecte implname = grammalecte # always use 3 numbers for version: x.y.z -version = 1.8.2 +version = 2.0.0 author = Olivier R. provider = Grammalecte.net link = https://grammalecte.net description = Correcteur grammatical, orthographique et typographique pour le français. extras = README_fr.txt DELETED gc_lang/fr/modules/lexicographe.py Index: gc_lang/fr/modules/lexicographe.py ================================================================== --- gc_lang/fr/modules/lexicographe.py +++ /dev/null @@ -1,234 +0,0 @@ -""" -Grammalecte - Lexicographe -""" - -# License: MPL 2 - - -import re -import traceback - - -_dTAGS = { - ':N': (" nom,", "Nom"), - ':A': (" adjectif,", "Adjectif"), - ':M1': (" prénom,", "Prénom"), - ':M2': (" patronyme,", "Patronyme, matronyme, nom de famille…"), - ':MP': (" nom propre,", "Nom propre"), - ':W': (" adverbe,", "Adverbe"), - ':J': (" interjection,", "Interjection"), - ':B': (" nombre,", "Nombre"), - ':T': (" titre,", "Titre de civilité"), - - ':e': (" épicène", "épicène"), - ':m': (" masculin", "masculin"), - ':f': (" féminin", "féminin"), - ':s': (" singulier", "singulier"), - ':p': (" pluriel", "pluriel"), - ':i': (" invariable", "invariable"), - - ':V1': (" verbe (1ᵉʳ gr.),", "Verbe du 1ᵉʳ groupe"), - ':V2': (" verbe (2ᵉ gr.),", "Verbe du 2ᵉ groupe"), - ':V3': (" verbe (3ᵉ gr.),", "Verbe du 3ᵉ groupe"), - ':V0e': (" verbe,", "Verbe auxiliaire être"), - ':V0a': (" verbe,", "Verbe auxiliaire avoir"), - - ':Y': (" infinitif,", "infinitif"), - ':P': (" participe présent,", "participe présent"), - ':Q': (" participe passé,", "participe passé"), - ':Ip': (" présent,", "indicatif présent"), - ':Iq': (" imparfait,", "indicatif imparfait"), - ':Is': (" passé simple,", "indicatif passé simple"), - ':If': (" futur,", "indicatif futur"), - ':K': (" conditionnel présent,", "conditionnel présent"), - ':Sp': (" subjonctif présent,", "subjonctif présent"), - ':Sq': (" subjonctif imparfait,", "subjonctif imparfait"), - ':E': (" impératif,", "impératif"), - - ':1s': (" 1ʳᵉ p. sg.,", "verbe : 1ʳᵉ personne du singulier"), - ':1ŝ': (" présent interr. 1ʳᵉ p. sg.,", "verbe : 1ʳᵉ personne du singulier (présent interrogatif)"), - ':1ś': (" présent interr. 1ʳᵉ p. sg.,", "verbe : 1ʳᵉ personne du singulier (présent interrogatif)"), - ':2s': (" 2ᵉ p. sg.,", "verbe : 2ᵉ personne du singulier"), - ':3s': (" 3ᵉ p. sg.,", "verbe : 3ᵉ personne du singulier"), - ':1p': (" 1ʳᵉ p. pl.,", "verbe : 1ʳᵉ personne du pluriel"), - ':2p': (" 2ᵉ p. pl.,", "verbe : 2ᵉ personne du pluriel"), - ':3p': (" 3ᵉ p. pl.,", "verbe : 3ᵉ personne du pluriel"), - ':3p!': (" 3ᵉ p. pl.,", "verbe : 3ᵉ personne du pluriel (prononciation distinctive)"), - - ':G': ("", "Mot grammatical"), - ':X': (" adverbe de négation,", "Adverbe de négation"), - ':U': (" adverbe interrogatif,", "Adverbe interrogatif"), - ':R': (" préposition,", "Préposition"), - ':Rv': (" préposition verbale,", "Préposition verbale"), - ':D': (" déterminant,", "Déterminant"), - ':Dd': (" déterminant démonstratif,", "Déterminant démonstratif"), - ':De': (" déterminant exclamatif,", "Déterminant exclamatif"), - ':Dp': (" déterminant possessif,", "Déterminant possessif"), - ':Di': (" déterminant indéfini,", "Déterminant indéfini"), - ':Dn': (" déterminant négatif,", "Déterminant négatif"), - ':Od': (" pronom démonstratif,", "Pronom démonstratif"), - ':Oi': (" pronom indéfini,", "Pronom indéfini"), - ':On': (" pronom indéfini négatif,", "Pronom indéfini négatif"), - ':Ot': (" pronom interrogatif,", "Pronom interrogatif"), - ':Or': (" pronom relatif,", "Pronom relatif"), - ':Ow': (" pronom adverbial,", "Pronom adverbial"), - ':Os': (" pronom personnel sujet,", "Pronom personnel sujet"), - ':Oo': (" pronom personnel objet,", "Pronom personnel objet"), - ':Ov': (" préverbe,", "Préverbe (pronom personnel objet, +ne)"), - ':O1': (" 1ʳᵉ pers.,", "Pronom : 1ʳᵉ personne"), - ':O2': (" 2ᵉ pers.,", "Pronom : 2ᵉ personne"), - ':O3': (" 3ᵉ pers.,", "Pronom : 3ᵉ personne"), - ':C': (" conjonction,", "Conjonction"), - ':Ĉ': (" conjonction (él.),", "Conjonction (élément)"), - ':Cc': (" conjonction de coordination,", "Conjonction de coordination"), - ':Cs': (" conjonction de subordination,", "Conjonction de subordination"), - ':Ĉs': (" conjonction de subordination (él.),", "Conjonction de subordination (élément)"), - - ':Ñ': (" locution nominale (él.),", "Locution nominale (élément)"), - ':Â': (" locution adjectivale (él.),", "Locution adjectivale (élément)"), - ':Ṽ': (" locution verbale (él.),", "Locution verbale (élément)"), - ':Ŵ': (" locution adverbiale (él.),", "Locution adverbiale (élément)"), - ':Ŕ': (" locution prépositive (él.),", "Locution prépositive (élément)"), - ':Ĵ': (" locution interjective (él.),", "Locution interjective (élément)"), - - ':Zp': (" préfixe,", "Préfixe"), - ':Zs': (" suffixe,", "Suffixe"), - - ':H': ("", ""), - - ':@': ("", ""), - ':@p': ("signe de ponctuation", "Signe de ponctuation"), - ':@s': ("signe", "Signe divers"), - - ';S': (" : symbole (unité de mesure)", "Symbole (unité de mesure)"), - - '/*': ("", "Sous-dictionnaire "), - '/C': (" ", "Sous-dictionnaire "), - '/M': ("", "Sous-dictionnaire "), - '/R': (" ", "Sous-dictionnaire "), - '/A': ("", "Sous-dictionnaire "), - '/X': ("", "Sous-dictionnaire ") -} - -_dPFX = { - 'd': "(de), déterminant épicène invariable", - 'l': "(le/la), déterminant masculin/féminin singulier", - 'j': "(je), pronom personnel sujet, 1ʳᵉ pers., épicène singulier", - 'm': "(me), pronom personnel objet, 1ʳᵉ pers., épicène singulier", - 't': "(te), pronom personnel objet, 2ᵉ pers., épicène singulier", - 's': "(se), pronom personnel objet, 3ᵉ pers., épicène singulier/pluriel", - 'n': "(ne), adverbe de négation", - 'c': "(ce), pronom démonstratif, masculin singulier/pluriel", - 'ç': "(ça), pronom démonstratif, masculin singulier", - 'qu': "(que), conjonction de subordination", - 'lorsqu': "(lorsque), conjonction de subordination", - 'puisqu': "(puisque), conjonction de subordination", - 'quoiqu': "(quoique), conjonction de subordination", - 'jusqu': "(jusque), préposition", -} - -_dAD = { - 'je': " pronom personnel sujet, 1ʳᵉ pers. sing.", - 'tu': " pronom personnel sujet, 2ᵉ pers. sing.", - 'il': " pronom personnel sujet, 3ᵉ pers. masc. sing.", - 'on': " pronom personnel sujet, 3ᵉ pers. sing. ou plur.", - 'elle': " pronom personnel sujet, 3ᵉ pers. fém. sing.", - 'nous': " pronom personnel sujet/objet, 1ʳᵉ pers. plur.", - 'vous': " pronom personnel sujet/objet, 2ᵉ pers. plur.", - 'ils': " pronom personnel sujet, 3ᵉ pers. masc. plur.", - 'elles': " pronom personnel sujet, 3ᵉ pers. masc. plur.", - - "là": " particule démonstrative", - "ci": " particule démonstrative", - - 'le': " COD, masc. sing.", - 'la': " COD, fém. sing.", - 'les': " COD, plur.", - - 'moi': " COI (à moi), sing.", - 'toi': " COI (à toi), sing.", - 'lui': " COI (à lui ou à elle), sing.", - 'nous2': " COI (à nous), plur.", - 'vous2': " COI (à vous), plur.", - 'leur': " COI (à eux ou à elles), plur.", - - 'y': " pronom adverbial", - "m'y": " (me) pronom personnel objet + (y) pronom adverbial", - "t'y": " (te) pronom personnel objet + (y) pronom adverbial", - "s'y": " (se) pronom personnel objet + (y) pronom adverbial", - - 'en': " pronom adverbial", - "m'en": " (me) pronom personnel objet + (en) pronom adverbial", - "t'en": " (te) pronom personnel objet + (en) pronom adverbial", - "s'en": " (se) pronom personnel objet + (en) pronom adverbial", -} - - -class Lexicographe: - "Lexicographer - word analyzer" - - def __init__ (self, oSpellChecker): - self.oSpellChecker = oSpellChecker - self._zElidedPrefix = re.compile("(?i)^([dljmtsncç]|quoiqu|lorsqu|jusqu|puisqu|qu)['’](.+)") - self._zCompoundWord = re.compile("(?i)(\\w+)-((?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts][’'](?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous)$") - self._zTag = re.compile("[:;/][\\w*][^:;/]*") - - def analyzeWord (self, sWord): - "returns a tuple (a list of morphologies, a set of verb at infinitive form)" - try: - if not sWord: - return (None, None) - if sWord.count("-") > 4: - return (["élément complexe indéterminé"], None) - if sWord.isdigit(): - return (["nombre"], None) - - aMorph = [] - # préfixes élidés - m = self._zElidedPrefix.match(sWord) - if m: - sWord = m.group(2) - aMorph.append( "{}’ : {}".format(m.group(1), _dPFX.get(m.group(1).lower(), "[?]")) ) - # mots composés - m2 = self._zCompoundWord.match(sWord) - if m2: - sWord = m2.group(1) - # Morphologies - lMorph = self.oSpellChecker.getMorph(sWord) - if len(lMorph) > 1: - # sublist - aMorph.append( (sWord, [ self.formatTags(s) for s in lMorph if ":" in s ]) ) - elif len(lMorph) == 1: - aMorph.append( "{} : {}".format(sWord, self.formatTags(lMorph[0])) ) - else: - aMorph.append( "{} : inconnu du dictionnaire".format(sWord) ) - # suffixe d’un mot composé - if m2: - aMorph.append( "-{} : {}".format(m2.group(2), self._formatSuffix(m2.group(2).lower())) ) - # Verbes - aVerb = { s[1:s.find("/")] for s in lMorph if ":V" in s } - return (aMorph, aVerb) - except (IndexError, TypeError): - traceback.print_exc() - return (["#erreur"], None) - - def formatTags (self, sTags): - "returns string: readable tags" - sRes = "" - sTags = re.sub("(?<=V[1-3])[itpqnmr_eaxz]+", "", sTags) - sTags = re.sub("(?<=V0[ea])[itpqnmr_eaxz]+", "", sTags) - for m in self._zTag.finditer(sTags): - sRes += _dTAGS.get(m.group(0), " [{}]".format(m.group(0)))[0] - if sRes.startswith(" verbe") and not sRes.endswith("infinitif"): - sRes += " [{}]".format(sTags[1:sTags.find("/")]) - return sRes.rstrip(",") - - def _formatSuffix (self, s): - if s.startswith("t-"): - return "“t” euphonique +" + _dAD.get(s[2:], "[?]") - if not "-" in s: - return _dAD.get(s.replace("’", "'"), "[?]") - if s.endswith("ous"): - s += '2' - nPos = s.find("-") - return "%s +%s" % (_dAD.get(s[:nPos], "[?]"), _dAD.get(s[nPos+1:], "[?]")) Index: gc_lang/fr/modules/tests.py ================================================================== --- gc_lang/fr/modules/tests.py +++ gc_lang/fr/modules/tests.py @@ -10,11 +10,11 @@ import time from contextlib import contextmanager from ..graphspell.ibdawg import IBDAWG from ..graphspell.echo import echo -from . import gc_engine as gce +from . import gc_engine from . import conj from . import phonet from . import mfsp @@ -32,20 +32,20 @@ def perf (sVersion, hDst=None): "performance tests" print("\nPerformance tests") - gce.load() - gce.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.") + gc_engine.load() + gc_engine.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.") spHere, _ = os.path.split(__file__) with open(os.path.join(spHere, "perf.txt"), "r", encoding="utf-8") as hSrc: if hDst: hDst.write("{:<12}{:<20}".format(sVersion, time.strftime("%Y.%m.%d %H:%M"))) for sText in ( s.strip() for s in hSrc if not s.startswith("#") and s.strip() ): with timeblock(sText[:sText.find(".")], hDst): - gce.parse(sText) + gc_engine.parse(sText) if hDst: hDst.write("\n") def _fuckBackslashUTF8 (s): @@ -157,11 +157,11 @@ class TestGrammarChecking (unittest.TestCase): "Tests du correcteur grammatical" @classmethod def setUpClass (cls): - gce.load() + gc_engine.load() cls._zError = re.compile(r"\{\{.*?\}\}") cls._aTestedRules = set() def test_parse (self): zOption = re.compile("^__([a-zA-Z0-9]+)__ ") @@ -205,11 +205,11 @@ if nError: print("Unexpected errors:", nError) # untested rules i = 0 echo("Untested rules:") - for _, sOpt, sLineId, sRuleId in gce.listRules(): + for _, sOpt, sLineId, sRuleId in gc_engine.listRules(): if sOpt != "@@@@" and sRuleId not in self._aTestedRules and not re.search("^[0-9]+[sp]$|^[pd]_", sRuleId): echo(sLineId + "/" + sRuleId) i += 1 echo("[{} untested rules]".format(i)) @@ -217,15 +217,15 @@ sText, sSugg = sLine.split("->>") return (sText.strip(), sSugg.strip()) def _getFoundErrors (self, sLine, sOption): if sOption: - gce.setOption(sOption, True) - aErrs = gce.parse(sLine) - gce.setOption(sOption, False) + gc_engine.gc_options.setOption(sOption, True) + aErrs = gc_engine.parse(sLine) + gc_engine.gc_options.setOption(sOption, False) else: - aErrs = gce.parse(sLine) + aErrs = gc_engine.parse(sLine) sRes = " " * len(sLine) sListErr = "" lAllSugg = [] for dErr in aErrs: sRes = sRes[:dErr["nStart"]] + "~" * (dErr["nEnd"] - dErr["nStart"]) + sRes[dErr["nEnd"]:] Index: gc_lang/fr/modules/textformatter.py ================================================================== --- gc_lang/fr/modules/textformatter.py +++ gc_lang/fr/modules/textformatter.py @@ -242,24 +242,32 @@ "ma_1letter_lowercase": False, "ma_1letter_uppercase": False } -class TextFormatter: - "Text Formatter: purge typographic mistakes from text" - - def __init__ (self): - for _, lTup in dReplTable.items(): - for i, t in enumerate(lTup): - lTup[i] = (re.compile(t[0]), t[1]) - - def formatText (self, sText): - "returns formatted text" - for sOptName, bVal in dDefaultOptions.items(): - if bVal: - for zRgx, sRep in dReplTable[sOptName]: - sText = zRgx.sub(sRep, sText) - return sText - - def getDefaultOptions (self): - "returns default options" - return dDefaultOptions.copy() +_bCompiled = False + +def _compileRegex(): + global _bCompiled + for _, lTup in dReplTable.items(): + for i, t in enumerate(lTup): + lTup[i] = (re.compile(t[0]), t[1]) + _bCompiled = True + + +def formatText (sText, dOpt=None): + "returns formatted text" + if not _bCompiled: + _compileRegex() + dOptions = getDefaultOptions() + if dOpt: + dOptions.update(dOpt) + for sOptName, bVal in dOptions.items(): + if bVal: + for zRgx, sRep in dReplTable[sOptName]: + sText = zRgx.sub(sRep, sText) + return sText + + +def getDefaultOptions (): + "returns default options" + return dDefaultOptions.copy() Index: gc_lang/fr/oxt/ContextMenu/ContextMenu.py ================================================================== --- gc_lang/fr/oxt/ContextMenu/ContextMenu.py +++ gc_lang/fr/oxt/ContextMenu/ContextMenu.py @@ -8,30 +8,28 @@ from com.sun.star.task import XJob from com.sun.star.ui import XContextMenuInterceptor #from com.sun.star.ui.ContextMenuInterceptorAction import IGNORED #from com.sun.star.ui.ContextMenuInterceptorAction import EXECUTE_MODIFIED -import grammalecte.fr.lexicographe as lxg from grammalecte.graphspell.spellchecker import SpellChecker from grammalecte.graphspell.echo import echo import helpers xDesktop = None oSpellChecker = None -oLexicographe = None class MyContextMenuInterceptor (XContextMenuInterceptor, unohelper.Base): def __init__ (self, ctx): self.ctx = ctx def notifyContextMenuExecute (self, xEvent): sWord = self._getWord() try: - aItem, aVerb = oLexicographe.analyzeWord(sWord) - if not aItem: + lWordAndMorph = oSpellChecker.analyze(sWord) + if not lWordAndMorph: return uno.Enum("com.sun.star.ui.ContextMenuInterceptorAction", "IGNORED") # don’t work on AOO, have to import the value #return IGNORED xContextMenu = xEvent.ActionTriggerContainer if xContextMenu: # entries index @@ -38,30 +36,31 @@ i = xContextMenu.Count nUnoConstantLine = uno.getConstantByName("com.sun.star.ui.ActionTriggerSeparatorType.LINE") # word analysis i = self._addItemToContextMenu(xContextMenu, i, "ActionTriggerSeparator", SeparatorType=nUnoConstantLine) - for item in aItem: - if isinstance(item, str): - i = self._addItemToContextMenu(xContextMenu, i, "ActionTrigger", Text=item, CommandURL="service:net.grammalecte.AppLauncher?None") - elif isinstance(item, tuple): - sRoot, lMorph = item + for sWord, lMorph in lWordAndMorph: + if len(lMorph) == 1: + sMorph, sReadableMorph = lMorph[0] + i = self._addItemToContextMenu(xContextMenu, i, "ActionTrigger", Text=sWord + " : " + sReadableMorph, CommandURL="service:net.grammalecte.AppLauncher?None") + elif len(lMorph) >= 1: # submenu xSubMenuContainer = xContextMenu.createInstance("com.sun.star.ui.ActionTriggerContainer") - for j, s in enumerate(lMorph): - self._addItemToContextMenu(xSubMenuContainer, j, "ActionTrigger", Text=s, CommandURL="service:net.grammalecte.AppLauncher?None") + for j, (sMorph, sReadableMorph) in enumerate(lMorph): + self._addItemToContextMenu(xSubMenuContainer, j, "ActionTrigger", Text=sReadableMorph, CommandURL="service:net.grammalecte.AppLauncher?None") # create root menu entry - i = self._addItemToContextMenu(xContextMenu, i, "ActionTrigger", Text=sRoot, SubContainer=xSubMenuContainer) + i = self._addItemToContextMenu(xContextMenu, i, "ActionTrigger", Text=sWord, SubContainer=xSubMenuContainer) else: - i = self._addItemToContextMenu(xContextMenu, i, "ActionTrigger", Text="# erreur : {}".format(item)) + i = self._addItemToContextMenu(xContextMenu, i, "ActionTrigger", Text=sWord + " : [erreur] aucun résultat trouvé.") # Links to Conjugueur + aVerb = { sMorph[1:sMorph.find("/")] for sMorph in oSpellChecker.getMorph(sWord) if ":V" in sMorph } if aVerb: i = self._addItemToContextMenu(xContextMenu, i, "ActionTriggerSeparator", SeparatorType=nUnoConstantLine) for sVerb in aVerb: - i = self._addItemToContextMenu(xContextMenu, i, "ActionTrigger", Text="Conjuguer “{}”…".format(sVerb), - CommandURL="service:net.grammalecte.AppLauncher?CJ/"+sVerb) + i = self._addItemToContextMenu(xContextMenu, i, "ActionTrigger", Text="Conjuguer “{}”…".format(sVerb), \ + CommandURL="service:net.grammalecte.AppLauncher?CJ/"+sVerb) # Search xDoc = xDesktop.getCurrentComponent() xViewCursor = xDoc.CurrentController.ViewCursor if not xViewCursor.isCollapsed(): @@ -116,11 +115,10 @@ class JobExecutor (XJob, unohelper.Base): def __init__ (self, ctx): self.ctx = ctx global xDesktop global oSpellChecker - global oLexicographe try: if not xDesktop: xDesktop = self.ctx.getServiceManager().createInstanceWithContext('com.sun.star.frame.Desktop', self.ctx) if not oSpellChecker: xCurCtx = uno.getComponentContext() @@ -128,12 +126,10 @@ if hasattr(oGC, "getSpellChecker"): # https://bugs.documentfoundation.org/show_bug.cgi?id=97790 oSpellChecker = oGC.getSpellChecker() else: oSpellChecker = SpellChecker("${lang}", "fr-allvars.bdic") - if not oLexicographe: - oLexicographe = lxg.Lexicographe(oSpellChecker) except: traceback.print_exc() def execute (self, args): if not args: Index: gc_lang/fr/webext/gce_worker.js ================================================================== --- gc_lang/fr/webext/gce_worker.js +++ gc_lang/fr/webext/gce_worker.js @@ -45,10 +45,11 @@ importScripts("grammalecte/fr/phonet.js"); importScripts("grammalecte/fr/cregex.js"); importScripts("grammalecte/fr/gc_options.js"); importScripts("grammalecte/fr/gc_rules.js"); importScripts("grammalecte/fr/gc_rules_graph.js"); +importScripts("grammalecte/fr/gc_engine_func.js"); importScripts("grammalecte/fr/gc_engine.js"); importScripts("grammalecte/fr/lexicographe.js"); importScripts("grammalecte/tests.js"); /* Warning. @@ -184,19 +185,19 @@ oLxg = new Lexicographe(oSpellChecker, oTokenizer, oLocution); if (dOptions !== null) { if (!(dOptions instanceof Map)) { dOptions = helpers.objectToMap(dOptions); } - gc_engine.setOptions(dOptions); + gc_options.setOptions(dOptions); } //tests(); bInitDone = true; } else { console.log("[Worker] Already initialized…") } - // we always retrieve options from the gc_engine, for setOptions filters obsolete options - dOptions = helpers.mapToObject(gc_engine.getOptions()); + // we always retrieve options from the gc_options, for setOptions filters obsolete options + dOptions = helpers.mapToObject(gc_options.getOptions()); postMessage(createResponse("init", dOptions, oInfo, true)); } catch (e) { console.error(e); postMessage(createResponse("init", createErrorResult(e, "init failed"), oInfo, true, true)); Index: grammalecte-cli.py ================================================================== --- grammalecte-cli.py +++ grammalecte-cli.py @@ -11,10 +11,11 @@ import re import traceback import grammalecte import grammalecte.text as txt +import grammalecte.fr.textformatter as tf from grammalecte.graphspell.echo import echo _EXAMPLE = "Quoi ? Racontes ! Racontes-moi ! Bon sangg, parles ! Oui. Il y a des menteur partout. " \ "Je suit sidéré par la brutales arrogance de cette homme-là. Quelle salopard ! Un escrocs de la pire espece. " \ @@ -149,29 +150,27 @@ xParser.add_argument("-off", "--opt_off", nargs="+", help="deactivate options") xParser.add_argument("-roff", "--rule_off", nargs="+", help="deactivate rules") xParser.add_argument("-d", "--debug", help="debugging mode (only in interactive mode)", action="store_true") xArgs = xParser.parse_args() - oGrammarChecker = grammalecte.GrammarChecker("fr") - oSpellChecker = oGrammarChecker.getSpellChecker() - oLexicographer = oGrammarChecker.getLexicographer() - oTextFormatter = oGrammarChecker.getTextFormatter() + grammalecte.load() + oSpellChecker = grammalecte.getSpellChecker() if xArgs.personal_dict: oJSON = loadDictionary(xArgs.personal_dict) if oJSON: oSpellChecker.setPersonalDictionary(oJSON) if not xArgs.json: echo("Python v" + sys.version) - echo("Grammalecte v{}".format(oGrammarChecker.gce.version)) + echo("Grammalecte v{}".format(grammalecte.version)) # list options or rules if xArgs.list_options or xArgs.list_rules: if xArgs.list_options: - oGrammarChecker.gce.displayOptions("fr") + grammalecte.gc_options.displayOptions() if xArgs.list_rules: - oGrammarChecker.gce.displayRules(None if xArgs.list_rules == "*" else xArgs.list_rules) + grammalecte.displayRules(None if xArgs.list_rules == "*" else xArgs.list_rules) exit() # spell suggestions if xArgs.suggest: for lSugg in oSpellChecker.suggest(xArgs.suggest): @@ -187,40 +186,40 @@ xArgs.context = False if xArgs.concat_lines: xArgs.textformatter = False # grammar options - oGrammarChecker.gce.setOptions({"html": True, "latex": True}) + grammalecte.gc_options.setOptions({"html": True, "latex": True}) if xArgs.opt_on: - oGrammarChecker.gce.setOptions({ opt:True for opt in xArgs.opt_on }) + grammalecte.gc_options.setOptions({ opt:True for opt in xArgs.opt_on }) if xArgs.opt_off: - oGrammarChecker.gce.setOptions({ opt:False for opt in xArgs.opt_off }) + grammalecte.gc_options.setOptions({ opt:False for opt in xArgs.opt_off }) # disable grammar rules if xArgs.rule_off: for sRule in xArgs.rule_off: - oGrammarChecker.gce.ignoreRule(sRule) + grammalecte.ignoreRule(sRule) if xArgs.file or xArgs.file_to_file: # file processing sFile = xArgs.file or xArgs.file_to_file hDst = open(sFile[:sFile.rfind(".")]+".res.txt", "w", encoding="utf-8", newline="\n") if xArgs.file_to_file or sys.platform == "win32" else None bComma = False if xArgs.json: - output('{ "grammalecte": "'+oGrammarChecker.gce.version+'", "lang": "'+oGrammarChecker.gce.lang+'", "data" : [\n', hDst) + output('{ "grammalecte": "'+grammalecte.version+'", "lang": "'+grammalecte.lang+'", "data" : [\n', hDst) for i, sText, lLineSet in generateParagraphFromFile(sFile, xArgs.concat_lines): if xArgs.textformatter or xArgs.textformatteronly: - sText = oTextFormatter.formatText(sText) + sText = tf.formatText(sText) if xArgs.textformatteronly: output(sText, hDst) continue if xArgs.json: - sText = oGrammarChecker.getParagraphErrorsAsJSON(i, sText, bContext=xArgs.context, bEmptyIfNoErrors=xArgs.only_when_errors, \ + sText = grammalecte.getParagraphErrorsAsJSON(i, sText, bContext=xArgs.context, bEmptyIfNoErrors=xArgs.only_when_errors, \ bSpellSugg=xArgs.with_spell_sugg, bReturnText=xArgs.textformatter, lLineSet=lLineSet) else: - sText, _ = oGrammarChecker.getParagraphWithErrors(sText, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSugg=xArgs.with_spell_sugg, nWidth=xArgs.width) + sText, _ = grammalecte.getParagraphWithErrors(sText, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSugg=xArgs.with_spell_sugg, nWidth=xArgs.width) if sText: if xArgs.json and bComma: output(",\n", hDst) output(sText, hDst) bComma = True @@ -232,13 +231,13 @@ # file processing: interactive mode sFile = xArgs.interactive_file_to_file hDst = open(sFile[:sFile.rfind(".")]+".res.txt", "w", encoding="utf-8", newline="\n") for i, sText, lLineSet in generateParagraphFromFile(sFile, xArgs.concat_lines): if xArgs.textformatter: - sText = oTextFormatter.formatText(sText) + sText = tf.formatText(sText) while True: - sResult, lErrors = oGrammarChecker.getParagraphWithErrors(sText, bEmptyIfNoErrors=False, bSpellSugg=True, nWidth=xArgs.width) + sResult, lErrors = grammalecte.getParagraphWithErrors(sText, bEmptyIfNoErrors=False, bSpellSugg=True, nWidth=xArgs.width) print("\n\n============================== Paragraph " + str(i) + " ==============================\n") echo(sResult) print("\n") vCommand = getCommand() if vCommand == "q": @@ -269,12 +268,14 @@ while True: if sText.startswith("?"): for sWord in sText[1:].strip().split(): if sWord: echo("* " + sWord) - for sMorph in oSpellChecker.getMorph(sWord): - echo(" {:<32} {}".format(sMorph, oLexicographer.formatTags(sMorph))) + for sElem, aRes in oSpellChecker.analyze(sWord): + echo(" - " + sElem) + for sMorph, sMeaning in aRes: + echo(" {:<40} {}".format(sMorph, sMeaning)) elif sText.startswith("!"): for sWord in sText[1:].strip().split(): if sWord: for lSugg in oSpellChecker.suggest(sWord): echo(" | ".join(lSugg)) @@ -290,47 +291,47 @@ sFlexPattern = sSearch sTagsPattern = "" for aRes in oSpellChecker.select(sFlexPattern, sTagsPattern): echo("{:<30} {:<30} {}".format(*aRes)) elif sText.startswith("/o+ "): - oGrammarChecker.gce.setOptions({ opt:True for opt in sText[3:].strip().split() if opt in oGrammarChecker.gce.getOptions() }) + grammalecte.gc_options.setOptions({ opt:True for opt in sText[3:].strip().split() if opt in grammalecte.gc_options.dOptions }) echo("done") elif sText.startswith("/o- "): - oGrammarChecker.gce.setOptions({ opt:False for opt in sText[3:].strip().split() if opt in oGrammarChecker.gce.getOptions() }) + grammalecte.gc_options.setOptions({ opt:False for opt in sText[3:].strip().split() if opt in grammalecte.gc_options.dOptions }) echo("done") elif sText.startswith("/r- "): for sRule in sText[3:].strip().split(): - oGrammarChecker.gce.ignoreRule(sRule) + grammalecte.ignoreRule(sRule) echo("done") elif sText.startswith("/r+ "): for sRule in sText[3:].strip().split(): - oGrammarChecker.gce.reactivateRule(sRule) + grammalecte.reactivateRule(sRule) echo("done") elif sText in ("/debug", "/d"): xArgs.debug = not xArgs.debug echo("debug mode on" if xArgs.debug else "debug mode off") elif sText in ("/textformatter", "/tf"): xArgs.textformatter = not xArgs.textformatter - echo("textformatter on" if xArgs.debug else "textformatter off") + echo("textformatter on" if xArgs.textformatter else "textformatter off") elif sText in ("/help", "/h"): echo(_HELP) elif sText in ("/lopt", "/lo"): - oGrammarChecker.gce.displayOptions("fr") + grammalecte.gc_options.displayOptions() elif sText.startswith("/lr"): sText = sText.strip() sFilter = sText[sText.find(" "):].strip() if " " in sText else None - oGrammarChecker.gce.displayRules(sFilter) + grammalecte.displayRules(sFilter) elif sText in ("/quit", "/q"): break elif sText.startswith("/rl"): # reload (todo) pass elif sText.startswith("$"): for sParagraph in txt.getParagraph(sText[1:]): if xArgs.textformatter: - sParagraph = oTextFormatter.formatText(sParagraph) - lParagraphErrors, lSentences = oGrammarChecker.gce.parse(sParagraph, bDebug=xArgs.debug, bFullInfo=True) + sParagraph = tf.formatText(sParagraph) + lParagraphErrors, lSentences = grammalecte.parse(sParagraph, bDebug=xArgs.debug, bFullInfo=True) echo(txt.getReadableErrors(lParagraphErrors, xArgs.width)) for dSentence in lSentences: echo("{nStart}:{nEnd}".format(**dSentence)) echo(" <" + dSentence["sSentence"]+">") for dToken in dSentence["lToken"]: @@ -341,16 +342,16 @@ "·".join(dToken.get("aTags", "")) ) ) echo(txt.getReadableErrors(dSentence["lGrammarErrors"], xArgs.width)) else: for sParagraph in txt.getParagraph(sText): if xArgs.textformatter: - sParagraph = oTextFormatter.formatText(sParagraph) - sRes, _ = oGrammarChecker.getParagraphWithErrors(sParagraph, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width, bDebug=xArgs.debug) + sParagraph = tf.formatText(sParagraph) + sRes, _ = grammalecte.getParagraphWithErrors(sParagraph, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width, bDebug=xArgs.debug) if sRes: echo("\n" + sRes) else: echo("\nNo error found.") sText = _getText(sInputText) if __name__ == '__main__': main() Index: grammalecte-server.py ================================================================== --- grammalecte-server.py +++ grammalecte-server.py @@ -14,29 +14,28 @@ from grammalecte.bottle import Bottle, run, request, response #, template, static_file import grammalecte import grammalecte.text as txt +import grammalecte.fr.textformatter as tf from grammalecte.graphspell.echo import echo #### GRAMMAR CHECKER #### -oGrammarChecker = grammalecte.GrammarChecker("fr", "Server") -oSpellChecker = oGrammarChecker.getSpellChecker() -oTextFormatter = oGrammarChecker.getTextFormatter() -oGCE = oGrammarChecker.getGCEngine() +grammalecte.load("Server") +oSpellChecker = grammalecte.getSpellChecker() def parseText (sText, dOptions=None, bFormatText=False, sError=""): "parse and return errors in a JSON format" - sJSON = '{ "program": "grammalecte-fr", "version": "'+oGCE.version+'", "lang": "'+oGCE.lang+'", "error": "'+sError+'", "data" : [\n' + sJSON = '{ "program": "grammalecte-fr", "version": "'+grammalecte.version+'", "lang": "'+grammalecte.lang+'", "error": "'+sError+'", "data" : [\n' sDataJSON = "" for i, sParagraph in enumerate(txt.getParagraph(sText), 1): if bFormatText: - sParagraph = oTextFormatter.formatText(sParagraph) - sResult = oGrammarChecker.getParagraphErrorsAsJSON(i, sParagraph, dOptions=dOptions, bEmptyIfNoErrors=True, bReturnText=bFormatText) + sParagraph = tf.formatText(sParagraph) + sResult = grammalecte.getParagraphErrorsAsJSON(i, sParagraph, dOptions=dOptions, bEmptyIfNoErrors=True, bReturnText=bFormatText) if sResult: if sDataJSON: sDataJSON += ",\n" sDataJSON += sResult sJSON += sDataJSON + "\n]}\n" @@ -185,13 +184,13 @@ @app.route("/get_options/fr") def listOptions (): "returns grammar options in a text JSON format" sUserId = request.cookies.user_id - dOptions = dUser[sUserId]["gc_options"] if sUserId and sUserId in dUser else oGCE.getOptions() + dOptions = dUser[sUserId]["gc_options"] if sUserId and sUserId in dUser else grammalecte.gc_options.getOptions() response.set_header("Content-Type", "application/json; charset=UTF-8") - return '{ "values": ' + json.dumps(dOptions, ensure_ascii=False) + ', "labels": ' + json.dumps(oGCE.getOptionsLabels("fr"), ensure_ascii=False) + ' }' + return '{ "values": ' + json.dumps(dOptions, ensure_ascii=False) + ', "labels": ' + json.dumps(grammalecte.gc_options.getOptionsLabels("fr"), ensure_ascii=False) + ' }' @app.route("/suggest/fr/") def suggestGet (token): response.set_header("Content-Type", "application/json; charset=UTF-8") try: @@ -217,11 +216,11 @@ response.set_cookie("user_id", request.cookies.user_id, path="/", max_age=86400) # we renew cookie for 24h else: response.delete_cookie("user_id", path="/") if request.forms.options: try: - dUserOptions = dict(oGCE.getOptions()) if not dUserOptions else dict(dUserOptions) + dUserOptions = grammalecte.gc_options.getOptions() if not dUserOptions else dict(dUserOptions) dUserOptions.update(json.loads(request.forms.options)) except (TypeError, json.JSONDecodeError): sError = "Request options not used." response.set_header("Content-Type", "application/json; charset=UTF-8") try: @@ -237,11 +236,11 @@ def setOptions (): "set grammar options for current user" response.set_header("Content-Type", "application/json; charset=UTF-8") if request.forms.options: sUserId = request.cookies.user_id if request.cookies.user_id else next(userGenerator) - dOptions = dUser[sUserId]["gc_options"] if sUserId in dUser else dict(oGCE.getOptions()) + dOptions = dUser[sUserId]["gc_options"] if sUserId in dUser else grammalecte.gc_options.getOptions() try: dOptions.update(json.loads(request.forms.options)) dUser[sUserId] = { "time": int(time.time()), "gc_options": dOptions } response.set_cookie("user_id", sUserId, path="/", max_age=86400) # 24h return json.dumps(dUser[sUserId]["gc_options"], ensure_ascii=False) @@ -262,11 +261,11 @@ return '{"message" : "Done."}' @app.route("/format_text/fr", method="POST") def formatText (): "apply the text formatter and returns text" - return oTextFormatter.formatText(request.forms.text) + return tf.formatText(request.forms.text) #@app.route('/static/') #def server_static (filepath): # return static_file(filepath, root='./views/static') @@ -314,20 +313,20 @@ if bTestPage: TESTPAGE = True HOMEPAGE = HOMEPAGE.replace("{SERVER_PORT}", str(nPort)) if dOptions: - oGCE.setOptions(dOptions) + grammalecte.gc_options.setOptions(dOptions) # Python version print("Python: " + sys.version) if sys.version < "3.7": print("Python 3.7+ required") return # Grammalecte - echo("Grammalecte v{}".format(oGCE.version)) - oGCE.displayOptions() + echo("Grammalecte v{}".format(grammalecte.version)) + grammalecte.gc_options.displayOptions() # Process Pool Executor initExecutor(nMultiCPU) # Server (Bottle) run(app, host=sHost, port=nPort) Index: graphspell/fr.py ================================================================== --- graphspell/fr.py +++ graphspell/fr.py @@ -1,9 +1,23 @@ """ -Default suggestion for French language +Lexicographer for the French language """ +# Note: +# This mode must contains at least: +# : a dictionary for default suggestions. +# : a boolean False +# if the boolean is True, 3 functions are required: +# split(sWord) -> returns a list of string (that will be analyzed) +# analyze(sWord) -> returns a string with the meaning of word +# formatTags(sTags) -> returns a string with the meaning of tags + + +import re + +#### Suggestions + dSugg = { "bcp": "beaucoup", "ca": "ça", "cad": "c’est-à-dire", "cb": "combien|CB", @@ -82,10 +96,11 @@ "XXVIième": "XXVIᵉ", "XXVIIième": "XXVIIᵉ", "XXVIIIième": "XXVIIIᵉ", "XXIXième": "XXIXᵉ", "XXXième": "XXXᵉ", + "Ier": "Iᵉʳ", "Ière": "Iʳᵉ", "IIème": "IIᵉ", "IIIème": "IIIᵉ", "IVème": "IVᵉ", @@ -114,5 +129,226 @@ "XXVIIème": "XXVIIᵉ", "XXVIIIème": "XXVIIIᵉ", "XXIXème": "XXIXᵉ", "XXXème": "XXXᵉ" } + + +#### Lexicographer + +bLexicographer = True + +_dTAGS = { + ':N': (" nom,", "Nom"), + ':A': (" adjectif,", "Adjectif"), + ':M1': (" prénom,", "Prénom"), + ':M2': (" patronyme,", "Patronyme, matronyme, nom de famille…"), + ':MP': (" nom propre,", "Nom propre"), + ':W': (" adverbe,", "Adverbe"), + ':J': (" interjection,", "Interjection"), + ':B': (" nombre,", "Nombre"), + ':T': (" titre,", "Titre de civilité"), + + ':e': (" épicène", "épicène"), + ':m': (" masculin", "masculin"), + ':f': (" féminin", "féminin"), + ':s': (" singulier", "singulier"), + ':p': (" pluriel", "pluriel"), + ':i': (" invariable", "invariable"), + + ':V1': (" verbe (1ᵉʳ gr.),", "Verbe du 1ᵉʳ groupe"), + ':V2': (" verbe (2ᵉ gr.),", "Verbe du 2ᵉ groupe"), + ':V3': (" verbe (3ᵉ gr.),", "Verbe du 3ᵉ groupe"), + ':V0e': (" verbe,", "Verbe auxiliaire être"), + ':V0a': (" verbe,", "Verbe auxiliaire avoir"), + + ':Y': (" infinitif,", "infinitif"), + ':P': (" participe présent,", "participe présent"), + ':Q': (" participe passé,", "participe passé"), + ':Ip': (" présent,", "indicatif présent"), + ':Iq': (" imparfait,", "indicatif imparfait"), + ':Is': (" passé simple,", "indicatif passé simple"), + ':If': (" futur,", "indicatif futur"), + ':K': (" conditionnel présent,", "conditionnel présent"), + ':Sp': (" subjonctif présent,", "subjonctif présent"), + ':Sq': (" subjonctif imparfait,", "subjonctif imparfait"), + ':E': (" impératif,", "impératif"), + + ':1s': (" 1ʳᵉ p. sg.,", "verbe : 1ʳᵉ personne du singulier"), + ':1ŝ': (" présent interr. 1ʳᵉ p. sg.,", "verbe : 1ʳᵉ personne du singulier (présent interrogatif)"), + ':1ś': (" présent interr. 1ʳᵉ p. sg.,", "verbe : 1ʳᵉ personne du singulier (présent interrogatif)"), + ':2s': (" 2ᵉ p. sg.,", "verbe : 2ᵉ personne du singulier"), + ':3s': (" 3ᵉ p. sg.,", "verbe : 3ᵉ personne du singulier"), + ':1p': (" 1ʳᵉ p. pl.,", "verbe : 1ʳᵉ personne du pluriel"), + ':2p': (" 2ᵉ p. pl.,", "verbe : 2ᵉ personne du pluriel"), + ':3p': (" 3ᵉ p. pl.,", "verbe : 3ᵉ personne du pluriel"), + ':3p!': (" 3ᵉ p. pl.,", "verbe : 3ᵉ personne du pluriel (prononciation distinctive)"), + + ':G': ("", "Mot grammatical"), + ':X': (" adverbe de négation,", "Adverbe de négation"), + ':U': (" adverbe interrogatif,", "Adverbe interrogatif"), + ':R': (" préposition,", "Préposition"), + ':Rv': (" préposition verbale,", "Préposition verbale"), + ':D': (" déterminant,", "Déterminant"), + ':Dd': (" déterminant démonstratif,", "Déterminant démonstratif"), + ':De': (" déterminant exclamatif,", "Déterminant exclamatif"), + ':Dp': (" déterminant possessif,", "Déterminant possessif"), + ':Di': (" déterminant indéfini,", "Déterminant indéfini"), + ':Dn': (" déterminant négatif,", "Déterminant négatif"), + ':Od': (" pronom démonstratif,", "Pronom démonstratif"), + ':Oi': (" pronom indéfini,", "Pronom indéfini"), + ':On': (" pronom indéfini négatif,", "Pronom indéfini négatif"), + ':Ot': (" pronom interrogatif,", "Pronom interrogatif"), + ':Or': (" pronom relatif,", "Pronom relatif"), + ':Ow': (" pronom adverbial,", "Pronom adverbial"), + ':Os': (" pronom personnel sujet,", "Pronom personnel sujet"), + ':Oo': (" pronom personnel objet,", "Pronom personnel objet"), + ':Ov': (" préverbe,", "Préverbe (pronom personnel objet, +ne)"), + ':O1': (" 1ʳᵉ pers.,", "Pronom : 1ʳᵉ personne"), + ':O2': (" 2ᵉ pers.,", "Pronom : 2ᵉ personne"), + ':O3': (" 3ᵉ pers.,", "Pronom : 3ᵉ personne"), + ':C': (" conjonction,", "Conjonction"), + ':Ĉ': (" conjonction (él.),", "Conjonction (élément)"), + ':Cc': (" conjonction de coordination,", "Conjonction de coordination"), + ':Cs': (" conjonction de subordination,", "Conjonction de subordination"), + ':Ĉs': (" conjonction de subordination (él.),", "Conjonction de subordination (élément)"), + + ':Ñ': (" locution nominale (él.),", "Locution nominale (élément)"), + ':Â': (" locution adjectivale (él.),", "Locution adjectivale (élément)"), + ':Ṽ': (" locution verbale (él.),", "Locution verbale (élément)"), + ':Ŵ': (" locution adverbiale (él.),", "Locution adverbiale (élément)"), + ':Ŕ': (" locution prépositive (él.),", "Locution prépositive (élément)"), + ':Ĵ': (" locution interjective (él.),", "Locution interjective (élément)"), + + ':Zp': (" préfixe,", "Préfixe"), + ':Zs': (" suffixe,", "Suffixe"), + + ':H': ("", ""), + + ':@': ("", ""), + ':@p': ("signe de ponctuation", "Signe de ponctuation"), + ':@s': ("signe", "Signe divers"), + + ';S': (" : symbole (unité de mesure)", "Symbole (unité de mesure)"), + + '/*': ("", "Sous-dictionnaire "), + '/C': (" ", "Sous-dictionnaire "), + '/M': ("", "Sous-dictionnaire "), + '/R': (" ", "Sous-dictionnaire "), + '/A': ("", "Sous-dictionnaire "), + '/X': ("", "Sous-dictionnaire ") +} + +_dValues = { + 'd’': "(de), préposition ou déterminant épicène invariable", + 'l’': "(le/la), déterminant ou pronom personnel objet, masculin/féminin singulier", + 'j’': "(je), pronom personnel sujet, 1ʳᵉ pers., épicène singulier", + 'm’': "(me), pronom personnel objet, 1ʳᵉ pers., épicène singulier", + 't’': "(te), pronom personnel objet, 2ᵉ pers., épicène singulier", + 's’': "(se), pronom personnel objet, 3ᵉ pers., épicène singulier/pluriel", + 'n’': "(ne), adverbe de négation", + 'c’': "(ce), pronom démonstratif, masculin singulier/pluriel", + 'ç’': "(ça), pronom démonstratif, masculin singulier", + 'qu’': "(que), conjonction de subordination", + 'lorsqu’': "(lorsque), conjonction de subordination", + 'puisqu’': "(puisque), conjonction de subordination", + 'quoiqu’': "(quoique), conjonction de subordination", + 'jusqu’': "(jusque), préposition", + + '-je': " pronom personnel sujet, 1ʳᵉ pers. sing.", + '-tu': " pronom personnel sujet, 2ᵉ pers. sing.", + '-il': " pronom personnel sujet, 3ᵉ pers. masc. sing.", + '-on': " pronom personnel sujet, 3ᵉ pers. sing. ou plur.", + '-elle': " pronom personnel sujet, 3ᵉ pers. fém. sing.", + '-t-il': " “t” euphonique + pronom personnel sujet, 3ᵉ pers. masc. sing.", + '-t-on': " “t” euphonique + pronom personnel sujet, 3ᵉ pers. sing. ou plur.", + '-t-elle': " “t” euphonique + pronom personnel sujet, 3ᵉ pers. fém. sing.", + '-nous': " pronom personnel sujet/objet, 1ʳᵉ pers. plur. ou COI (à nous), plur.", + '-vous': " pronom personnel sujet/objet, 2ᵉ pers. plur. ou COI (à vous), plur.", + '-ils': " pronom personnel sujet, 3ᵉ pers. masc. plur.", + '-elles': " pronom personnel sujet, 3ᵉ pers. masc. plur.", + + "-là": " particule démonstrative", + "-ci": " particule démonstrative", + + '-le': " COD, masc. sing.", + '-la': " COD, fém. sing.", + '-les': " COD, plur.", + + '-moi': " COI (à moi), sing.", + '-toi': " COI (à toi), sing.", + '-lui': " COI (à lui ou à elle), sing.", + '-leur': " COI (à eux ou à elles), plur.", + + '-le-moi': " COD, masc. sing. + COI (à moi), sing.", + '-le-toi': " COD, masc. sing. + COI (à toi), sing.", + '-le-lui': " COD, masc. sing. + COI (à lui ou à elle), sing.", + '-le-nous': " COD, masc. sing. + COI (à nous), plur.", + '-le-vous': " COD, masc. sing. + COI (à vous), plur.", + '-le-leur': " COD, masc. sing. + COI (à eux ou à elles), plur.", + + '-la-moi': " COD, fém. sing. + COI (à moi), sing.", + '-la-toi': " COD, fém. sing. + COI (à toi), sing.", + '-la-lui': " COD, fém. sing. + COI (à lui ou à elle), sing.", + '-la-nous': " COD, fém. sing. + COI (à nous), plur.", + '-la-vous': " COD, fém. sing. + COI (à vous), plur.", + '-la-leur': " COD, fém. sing. + COI (à eux ou à elles), plur.", + + '-les-moi': " COD, plur. + COI (à moi), sing.", + '-les-toi': " COD, plur. + COI (à toi), sing.", + '-les-lui': " COD, plur. + COI (à lui ou à elle), sing.", + '-les-nous': " COD, plur. + COI (à nous), plur.", + '-les-vous': " COD, plur. + COI (à vous), plur.", + '-les-leur': " COD, plur. + COI (à eux ou à elles), plur.", + + '-y': " pronom adverbial", + "-m’y": " (me) pronom personnel objet + (y) pronom adverbial", + "-t’y": " (te) pronom personnel objet + (y) pronom adverbial", + "-s’y": " (se) pronom personnel objet + (y) pronom adverbial", + + '-en': " pronom adverbial", + "-m’en": " (me) pronom personnel objet + (en) pronom adverbial", + "-t’en": " (te) pronom personnel objet + (en) pronom adverbial", + "-s’en": " (se) pronom personnel objet + (en) pronom adverbial", +} + + +_zElidedPrefix = re.compile("(?i)^((?:[dljmtsncç]|quoiqu|lorsqu|jusqu|puisqu|qu)’)(.+)") +_zCompoundWord = re.compile("(?i)(\\w+)(-(?:(?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts]’(?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous))$") +_zTag = re.compile("[:;/][\\w*][^:;/]*") + +def split (sWord): + "split word in 3 parts: prefix, root, suffix" + sWord = sWord.replace("'", "’") + sPrefix = "" + sSuffix = "" + # préfixe élidé + m = _zElidedPrefix.match(sWord) + if m: + sPrefix = m.group(1) + sWord = m.group(2) + # mots composés + m = _zCompoundWord.match(sWord) + if m: + sWord = m.group(1) + sSuffix = m.group(2) + return sPrefix, sWord, sSuffix + + +def analyze (sWord): + "return meaning of if found else an empty string" + sWord = sWord.lower() + if sWord in _dValues: + return _dValues[sWord] + return "" + + +def formatTags (sTags): + "returns string: readable tags" + sRes = "" + sTags = re.sub("(?<=V[1-3])[itpqnmr_eaxz]+", "", sTags) + sTags = re.sub("(?<=V0[ea])[itpqnmr_eaxz]+", "", sTags) + for m in _zTag.finditer(sTags): + sRes += _dTAGS.get(m.group(0), " [{}]".format(m.group(0)))[0] + if sRes.startswith(" verbe") and not sRes.endswith("infinitif"): + sRes += " [{}]".format(sTags[1:sTags.find("/")]) + return sRes.rstrip(",") Index: graphspell/spellchecker.py ================================================================== --- graphspell/spellchecker.py +++ graphspell/spellchecker.py @@ -34,12 +34,12 @@ self.oPersonalDic = self._loadDictionary(sfPersonalDic) self.bCommunityDic = bool(self.oCommunityDic) self.bPersonalDic = bool(self.oPersonalDic) self.oTokenizer = None # Default suggestions - self.dDefaultSugg = None - self.loadSuggestions(sLangCode) + self.lexicographer = None + self.loadLang(sLangCode) # storage self.bStorage = False self._dMorphologies = {} # key: flexion, value: list of morphologies self._dLemmas = {} # key: flexion, value: list of lemmas @@ -100,18 +100,34 @@ self.bPersonalDic = False # Default suggestions - def loadSuggestions (self, sLangCode): + def loadLang (self, sLangCode): "load default suggestion module for " try: - suggest = importlib.import_module("."+sLangCode, "grammalecte.graphspell") + self.lexicographer = importlib.import_module("."+sLangCode, "grammalecte.graphspell") except ImportError: print("No suggestion module for language <"+sLangCode+">") return - self.dDefaultSugg = suggest.dSugg + + def analyze (self, sWord): + "returns a list of words and their morphologies" + if not self.lexicographer: + return [] + lWordAndMorph = [] + for sElem in self.lexicographer.split(sWord): + if sElem: + lMorph = self.getMorph(sElem) + sLex = self.lexicographer.analyze(sElem) + if sLex: + aRes = [ (" | ".join(lMorph), sLex) ] + else: + aRes = [ (sMorph, self.lexicographer.formatTags(sMorph)) for sMorph in lMorph ] + if aRes: + lWordAndMorph.append((sElem, aRes)) + return lWordAndMorph # Storage def activateStorage (self): @@ -159,10 +175,11 @@ dWord[dToken['sValue']] = dWord.get(dToken['sValue'], 0) + 1 else: for sLemma in self.getLemma(dToken['sValue']): dWord[sLemma] = dWord.get(sLemma, 0) + 1 return dWord + # IBDAWG functions def isValidToken (self, sToken): "checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)" @@ -216,15 +233,15 @@ return self._dLemmas[sWord] return { s[1:s.find("/")] for s in self.getMorph(sWord) } def suggest (self, sWord, nSuggLimit=10): "generator: returns 1, 2 or 3 lists of suggestions" - if self.dDefaultSugg: - if sWord in self.dDefaultSugg: - yield self.dDefaultSugg[sWord].split("|") - elif sWord.istitle() and sWord.lower() in self.dDefaultSugg: - lRes = self.dDefaultSugg[sWord.lower()].split("|") + if self.lexicographer.dSugg: + if sWord in self.lexicographer.dSugg: + yield self.lexicographer.dSugg[sWord].split("|") + elif sWord.istitle() and sWord.lower() in self.lexicographer.dSugg: + lRes = self.lexicographer.dSugg[sWord.lower()].split("|") yield list(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes)) else: yield self.oMainDic.suggest(sWord, nSuggLimit, True) else: yield self.oMainDic.suggest(sWord, nSuggLimit, True)