Index: gc_core/js/lang_core/gc_engine.js ================================================================== --- gc_core/js/lang_core/gc_engine.js +++ gc_core/js/lang_core/gc_engine.js @@ -9,17 +9,20 @@ ${string} ${regex} ${map} -if(typeof(process) !== 'undefined') { +if (typeof(process) !== 'undefined') { + var gc_engine_func = require("./gc_engine_func.js"); var gc_options = require("./gc_options.js"); var gc_rules = require("./gc_rules.js"); var gc_rules_graph = require("./gc_rules_graph.js"); var cregex = require("./cregex.js"); var text = require("../text.js"); -} else if (typeof(require) !== 'undefined') { +} +else if (typeof(require) !== 'undefined') { + var gc_engine_func = require("resource://grammalecte/${lang}/gc_engine_func.js"); var gc_options = require("resource://grammalecte/${lang}/gc_options.js"); var gc_rules = require("resource://grammalecte/${lang}/gc_rules.js"); var gc_rules_graph = require("resource://grammalecte/${lang}/gc_rules_graph.js"); var cregex = require("resource://grammalecte/${lang}/cregex.js"); var text = require("resource://grammalecte/text.js"); @@ -33,25 +36,10 @@ aNew[i] = aArray[i].slice(0,1).toUpperCase() + aArray[i].slice(1); } return aNew; } - -// data -let _sAppContext = ""; // what software is running -let _dOptions = null; -let _dOptionsColors = null; -let _oSpellChecker = null; -let _oTokenizer = null; -let _aIgnoredRules = new Set(); - - -function echo (x) { - console.log(x); - return true; -} - var gc_engine = { //// Informations @@ -60,36 +48,46 @@ pkg: "${implname}", name: "${name}", version: "${version}", author: "${author}", + //// Tools + oSpellChecker: null, + oTokenizer: null, + + //// Data + aIgnoredRules: new Set(), + oOptionsColors: null, + //// Initialization load: function (sContext="JavaScript", sColorType="aRGB", sPath="") { try { - if(typeof(process) !== 'undefined') { + if (typeof(process) !== 'undefined') { var spellchecker = require("../graphspell/spellchecker.js"); - _oSpellChecker = new spellchecker.SpellChecker("${lang}", "", "${dic_main_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}"); - } else if (typeof(require) !== 'undefined') { + this.oSpellChecker = new spellchecker.SpellChecker("${lang}", "", "${dic_main_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}"); + } + else if (typeof(require) !== 'undefined') { var spellchecker = require("resource://grammalecte/graphspell/spellchecker.js"); - _oSpellChecker = new spellchecker.SpellChecker("${lang}", "", "${dic_main_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}"); - } else { - _oSpellChecker = new SpellChecker("${lang}", sPath, "${dic_main_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}"); - } - _sAppContext = sContext; - _dOptions = gc_options.getOptions(sContext).gl_shallowCopy(); // duplication necessary, to be able to reset to default - _dOptionsColors = gc_options.getOptionsColors(sContext, sColorType); - _oTokenizer = _oSpellChecker.getTokenizer(); - _oSpellChecker.activateStorage(); + this.oSpellChecker = new spellchecker.SpellChecker("${lang}", "", "${dic_main_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}"); + } + else { + this.oSpellChecker = new SpellChecker("${lang}", sPath, "${dic_main_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}"); + } + this.oOptionsColors = gc_options.getOptionsColors(sContext, sColorType); + this.oTokenizer = this.oSpellChecker.getTokenizer(); + this.oSpellChecker.activateStorage(); + gc_engine_func.load(sContext, this.oSpellChecker) + gc_options.load(sContext) } catch (e) { console.error(e); } }, getSpellChecker: function () { - return _oSpellChecker; + return this.oSpellChecker; }, //// Rules getRules: function (bParagraph) { @@ -98,19 +96,19 @@ } return gc_rules.lParagraphRules; }, ignoreRule: function (sRuleId) { - _aIgnoredRules.add(sRuleId); + this.aIgnoredRules.add(sRuleId); }, resetIgnoreRules: function () { - _aIgnoredRules.clear(); + this.aIgnoredRules.clear(); }, reactivateRule: function (sRuleId) { - _aIgnoredRules.delete(sRuleId); + this.aIgnoredRules.delete(sRuleId); }, listRules: function* (sFilter=null) { // generator: returns tuple (sOption, sLineId, sRuleId) try { @@ -132,34 +130,10 @@ catch (e) { console.error(e); } }, - //// Options - - setOption: function (sOpt, bVal) { - if (_dOptions.has(sOpt)) { - _dOptions.set(sOpt, bVal); - } - }, - - setOptions: function (dOpt) { - _dOptions.gl_updateOnlyExistingKeys(dOpt); - }, - - getOptions: function () { - return _dOptions; - }, - - getDefaultOptions: function () { - return gc_options.getOptions(_sAppContext).gl_shallowCopy(); - }, - - resetOptions: function () { - _dOptions = gc_options.getOptions(_sAppContext).gl_shallowCopy(); - }, - //// Parsing parse: function (sText, sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false, bFullInfo=false) { // init point to analyse and returns an iterable of errors or (with option ) a list of sentences with tokens and errors let oText = new TextParser(sText); @@ -201,12 +175,12 @@ return s; } parse (sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false, bFullInfo=false) { // analyses and returns an iterable of errors or (with option ) a list of sentences with tokens and errors - let dOpt = dOptions || _dOptions; - let bShowRuleId = option('idrule'); + let dOpt = dOptions || gc_options.dOptions; + let bShowRuleId = gc_options.dOptions.gl_get('idrule', false); // parse paragraph try { this.parseText(this.sText, this.sText0, true, 0, sCountry, dOpt, bShowRuleId, bDebug, bContext); } catch (e) { @@ -224,11 +198,11 @@ for (let [iStart, iEnd] of text.getSentenceBoundaries(sText)) { try { this.sSentence = sText.slice(iStart, iEnd); this.sSentence0 = this.sText0.slice(iStart, iEnd); this.nOffsetWithinParagraph = iStart; - this.lToken = Array.from(_oTokenizer.genTokens(this.sSentence, true)); + this.lToken = Array.from(gc_engine.oTokenizer.genTokens(this.sSentence, true)); this.dTokenPos.clear(); for (let dToken of this.lToken) { if (dToken["sType"] != "INFO") { this.dTokenPos.set(dToken["nStart"], dToken); } @@ -235,11 +209,11 @@ } if (bFullInfo) { oSentence = { "nStart": iStart, "nEnd": iEnd, "sSentence": this.sSentence, "lToken": Array.from(this.lToken) }; for (let oToken of oSentence["lToken"]) { if (oToken["sType"] == "WORD") { - oToken["bValidToken"] = _oSpellChecker.isValidToken(oToken["sValue"]); + oToken["bValidToken"] = gc_engine.oSpellChecker.isValidToken(oToken["sValue"]); } } // the list of tokens is duplicated, to keep all tokens from being deleted when analysis } this.parseText(this.sSentence, this.sSentence0, false, iStart, sCountry, dOpt, bShowRuleId, bDebug, bContext); @@ -300,19 +274,19 @@ } sText = this.parseGraph(gc_rules_graph.dAllGraph[sGraphName], sCountry, dOptions, bShowRuleId, bDebug, bContext); } } } - else if (!sOption || option(sOption)) { + else if (!sOption || gc_options.dOptions.gl_get(sOption, false)) { for (let [zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions, lGroups, lNegLookBefore] of lRuleGroup) { - if (!_aIgnoredRules.has(sRuleId)) { + if (!gc_engine.aIgnoredRules.has(sRuleId)) { while ((m = zRegex.gl_exec2(sText, lGroups, lNegLookBefore)) !== null) { let bCondMemo = null; for (let [sFuncCond, cActionType, sWhat, ...eAct] of lActions) { // action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ] try { - bCondMemo = (!sFuncCond || oEvalFunc[sFuncCond](sText, sText0, m, this.dTokenPos, sCountry, bCondMemo)); + bCondMemo = (!sFuncCond || gc_engine_func[sFuncCond](sText, sText0, m, this.dTokenPos, sCountry, bCondMemo)); if (bCondMemo) { switch (cActionType) { case "-": // grammar error //console.log("-> error detected in " + sLineId + "\nzRegex: " + zRegex.source); @@ -333,11 +307,11 @@ } break; case "=": // disambiguation //console.log("-> disambiguation by " + sLineId + "\nzRegex: " + zRegex.source); - oEvalFunc[sWhat](sText, m, this.dTokenPos); + gc_engine_func[sWhat](sText, m, this.dTokenPos); if (bDebug) { console.log("= " + m[0] + " # " + sLineId, "\nDA:", this.dTokenPos); } break; case ">": @@ -373,11 +347,11 @@ } update (sSentence, bDebug=false) { // update and retokenize this.sSentence = sSentence; - let lNewToken = Array.from(_oTokenizer.genTokens(sSentence, true)); + let lNewToken = Array.from(gc_engine.oTokenizer.genTokens(sSentence, true)); for (let oToken of lNewToken) { if (this.dTokenPos.gl_get(oToken["nStart"], {}).hasOwnProperty("lMorph")) { oToken["lMorph"] = this.dTokenPos.get(oToken["nStart"])["lMorph"]; } if (this.dTokenPos.gl_get(oToken["nStart"], {}).hasOwnProperty("aTags")) { @@ -471,11 +445,11 @@ } // analysable tokens if (oToken["sType"].slice(0,4) == "WORD") { // token lemmas if (oNode.hasOwnProperty("")) { - for (let sLemma of _oSpellChecker.getLemma(oToken["sValue"])) { + for (let sLemma of gc_engine.oSpellChecker.getLemma(oToken["sValue"])) { if (oNode[""].hasOwnProperty(sLemma)) { if (bDebug) { console.log(" MATCH: >" + sLemma); } yield { "iToken1": iToken1, "iNode": oNode[""][sLemma] }; @@ -483,11 +457,11 @@ } } } // morph arcs if (oNode.hasOwnProperty("")) { - let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]); + let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : gc_engine.oSpellChecker.getMorph(oToken["sValue"]); if (lMorph.length > 0) { for (let sSearch in oNode[""]) { if (!sSearch.includes("¬")) { // no anti-pattern if (lMorph.some(sMorph => (sMorph.includes(sSearch)))) { @@ -527,11 +501,11 @@ } } } // regex morph arcs if (oNode.hasOwnProperty("")) { - let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]); + let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : gc_engine.oSpellChecker.getMorph(oToken["sValue"]); if (lMorph.length > 0) { for (let sRegex in oNode[""]) { if (!sRegex.includes("¬")) { // no anti-pattern if (lMorph.some(sMorph => (sMorph.search(sRegex) !== -1))) { @@ -676,11 +650,11 @@ // Disambiguator [ option, condition, "=", replacement/suggestion/action ] // Tag [ option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ] // Immunity [ option, condition, "!", "", iTokenStart, iTokenEnd ] // Test [ option, condition, ">", "" ] if (!sOption || dOptions.gl_get(sOption, false)) { - bCondMemo = !sFuncCond || oEvalFunc[sFuncCond](this.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, this.dTags, this.sSentence, this.sSentence0); + bCondMemo = !sFuncCond || gc_engine_func[sFuncCond](this.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, this.dTags, this.sSentence, this.sSentence0); if (bCondMemo) { if (cActionType == "-") { // grammar error let [iTokenStart, iTokenEnd, cStartLimit, cEndLimit, bCaseSvty, nPriority, sMessage, sURL] = eAct; let nTokenErrorStart = (iTokenStart > 0) ? nTokenOffset + iTokenStart : nLastToken + iTokenStart; @@ -708,11 +682,11 @@ console.log(` TEXT_PROCESSOR: [${this.lToken[nTokenStart]["sValue"]}:${this.lToken[nTokenEnd]["sValue"]}] > ${sWhat}`); } } else if (cActionType == "=") { // disambiguation - oEvalFunc[sWhat](this.lToken, nTokenOffset, nLastToken); + gc_engine_func[sWhat](this.lToken, nTokenOffset, nLastToken); if (bDebug) { console.log(` DISAMBIGUATOR: (${sWhat}) [${this.lToken[nTokenOffset+1]["sValue"]}:${this.lToken[nLastToken]["sValue"]}]`); } } else if (cActionType == ">") { @@ -790,11 +764,11 @@ let nStart = nOffset + m.start[iGroup]; let nEnd = nOffset + m.end[iGroup]; // suggestions let lSugg = []; if (sSugg.startsWith("=")) { - sSugg = oEvalFunc[sSugg.slice(1)](sText, m); + sSugg = gc_engine_func[sSugg.slice(1)](sText, m); lSugg = (sSugg) ? sSugg.split("|") : []; } else if (sSugg == "_") { lSugg = []; } else { lSugg = sSugg.gl_expand(m).split("|"); @@ -801,11 +775,11 @@ } if (bUppercase && lSugg.length > 0 && m[iGroup].slice(0,1).gl_isUpperCase()) { lSugg = capitalizeArray(lSugg); } // Message - let sMessage = (sMsg.startsWith("=")) ? oEvalFunc[sMsg.slice(1)](sText, m) : sMsg.gl_expand(m); + let sMessage = (sMsg.startsWith("=")) ? gc_engine_func[sMsg.slice(1)](sText, m) : sMsg.gl_expand(m); if (bShowRuleId) { sMessage += " #" + sLineId + " / " + sRuleId; } // return this._createError(nStart, nEnd, sLineId, sRuleId, sOption, sMessage, lSugg, sURL, bContext); @@ -813,11 +787,11 @@ _createErrorFromTokens (sSugg, nTokenOffset, nLastToken, iFirstToken, nStart, nEnd, sLineId, sRuleId, bCaseSvty, sMsg, sURL, bShowRuleId, sOption, bContext) { // suggestions let lSugg = []; if (sSugg.startsWith("=")) { - sSugg = oEvalFunc[sSugg.slice(1)](this.lToken, nTokenOffset, nLastToken); + sSugg = gc_engine_func[sSugg.slice(1)](this.lToken, nTokenOffset, nLastToken); lSugg = (sSugg) ? sSugg.split("|") : []; } else if (sSugg == "_") { lSugg = []; } else { lSugg = this._expand(sSugg, nTokenOffset, nLastToken).split("|"); @@ -824,11 +798,11 @@ } if (bCaseSvty && lSugg.length > 0 && this.lToken[iFirstToken]["sValue"].slice(0,1).gl_isUpperCase()) { lSugg = capitalizeArray(lSugg); } // Message - let sMessage = (sMsg.startsWith("=")) ? oEvalFunc[sMsg.slice(1)](this.lToken, nTokenOffset, nLastToken) : this._expand(sMsg, nTokenOffset, nLastToken); + let sMessage = (sMsg.startsWith("=")) ? gc_engine_func[sMsg.slice(1)](this.lToken, nTokenOffset, nLastToken) : this._expand(sMsg, nTokenOffset, nLastToken); if (bShowRuleId) { sMessage += " #" + sLineId + " / " + sRuleId; } // return this._createError(nStart, nEnd, sLineId, sRuleId, sOption, sMessage, lSugg, sURL, bContext); @@ -839,11 +813,11 @@ "nStart": nStart, "nEnd": nEnd, "sLineId": sLineId, "sRuleId": sRuleId, "sType": sOption || "notype", - "aColor": _dOptionsColors[sOption], + "aColor": gc_engine.oOptionsColors[sOption], "sMessage": sMessage, "aSuggestions": lSugg, "URL": sURL } if (bContext) { @@ -878,11 +852,11 @@ } else if (sRepl === "@") { sNew = "@".repeat(ln); } else if (sRepl.slice(0,1) === "=") { - sNew = oEvalFunc[sRepl.slice(1)](sText, m); + sNew = gc_engine_func[sRepl.slice(1)](sText, m); sNew = sNew + " ".repeat(ln-sNew.length); if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) { sNew = sNew.gl_toCapitalize(); } } else { @@ -919,11 +893,11 @@ } } } else { if (sWhat.startsWith("=")) { - sWhat = oEvalFunc[sWhat.slice(1)](this.lToken, nTokenOffset, nLastToken); + sWhat = gc_engine_func[sWhat.slice(1)](this.lToken, nTokenOffset, nLastToken); } else { sWhat = this._expand(sWhat, nTokenOffset, nLastToken); } let bUppercase = bCaseSvty && this.lToken[nTokenRewriteStart]["sValue"].slice(0,1).gl_isUpperCase(); if (nTokenRewriteEnd - nTokenRewriteStart == 0) { @@ -1027,485 +1001,23 @@ this.lToken.length = 0; this.lToken = lNewToken; } }; - -//////// Common functions - -function option (sOpt) { - // return true if option sOpt is active - return _dOptions.get(sOpt); -} - -var re = { - search: function (sRegex, sText) { - if (sRegex.startsWith("(?i)")) { - return sText.search(new RegExp(sRegex.slice(4), "i")) !== -1; - } else { - return sText.search(sRegex) !== -1; - } - }, - - createRegExp: function (sRegex) { - if (sRegex.startsWith("(?i)")) { - return new RegExp(sRegex.slice(4), "i"); - } else { - return new RegExp(sRegex); - } - } -} - - -//////// functions to get text outside pattern scope - -// warning: check compile_rules.py to understand how it works - -function nextword (s, iStart, n) { - // get the nth word of the input string or empty string - let z = new RegExp("^(?: +[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+){" + (n-1).toString() + "} +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+)", "ig"); - let m = z.exec(s.slice(iStart)); - if (!m) { - return null; - } - return [iStart + z.lastIndex - m[1].length, m[1]]; -} - -function prevword (s, iEnd, n) { - // get the (-)nth word of the input string or empty string - let z = new RegExp("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+) +(?:[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+ +){" + (n-1).toString() + "}$", "i"); - let m = z.exec(s.slice(0, iEnd)); - if (!m) { - return null; - } - return [m.index, m[1]]; -} - -function nextword1 (s, iStart) { - // get next word (optimization) - let _zNextWord = new RegExp ("^ +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_-]*)", "ig"); - let m = _zNextWord.exec(s.slice(iStart)); - if (!m) { - return null; - } - return [iStart + _zNextWord.lastIndex - m[1].length, m[1]]; -} - -const _zPrevWord = new RegExp ("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_-]*) +$", "i"); - -function prevword1 (s, iEnd) { - // get previous word (optimization) - let m = _zPrevWord.exec(s.slice(0, iEnd)); - if (!m) { - return null; - } - return [m.index, m[1]]; -} - -function look (s, sPattern, sNegPattern=null) { - // seek sPattern in s (before/after/fulltext), if antipattern sNegPattern not in s - try { - if (sNegPattern && re.search(sNegPattern, s)) { - return false; - } - return re.search(sPattern, s); - } - catch (e) { - console.error(e); - } - return false; -} - - -//////// Analyse groups for regex rules - -function displayInfo (dTokenPos, aWord) { - // for debugging: info of word - if (!aWord) { - console.log("> nothing to find"); - return true; - } - let lMorph = _oSpellChecker.getMorph(aWord[1]); - if (lMorph.length === 0) { - console.log("> not in dictionary"); - return true; - } - if (dTokenPos.has(aWord[0])) { - console.log("DA: " + dTokenPos.get(aWord[0])); - } - console.log("FSA: " + lMorph); - return true; -} - -function morph (dTokenPos, aWord, sPattern, sNegPattern, bNoWord=false) { - // analyse a tuple (position, word), returns true if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on) - if (!aWord) { - return bNoWord; - } - let lMorph = (dTokenPos.has(aWord[0]) && dTokenPos.get(aWord[0]))["lMorph"] ? dTokenPos.get(aWord[0])["lMorph"] : _oSpellChecker.getMorph(aWord[1]); - if (lMorph.length === 0) { - return false; - } - if (sNegPattern) { - // check negative condition - if (sNegPattern === "*") { - // all morph must match sPattern - return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); - } - else { - if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { - return false; - } - } - } - // search sPattern - return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); -} - -function analyse (sWord, sPattern, sNegPattern) { - // analyse a word, returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation off) - let lMorph = _oSpellChecker.getMorph(sWord); - if (lMorph.length === 0) { - return false; - } - if (sNegPattern) { - // check negative condition - if (sNegPattern === "*") { - // all morph must match sPattern - return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); - } - else { - if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { - return false; - } - } - } - // search sPattern - return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); -} - - -//// Analyse tokens for graph rules - -function g_value (oToken, sValues, nLeft=null, nRight=null) { - // test if is in sValues (each value should be separated with |) - let sValue = (nLeft === null) ? "|"+oToken["sValue"]+"|" : "|"+oToken["sValue"].slice(nLeft, nRight)+"|"; - if (sValues.includes(sValue)) { - return true; - } - if (oToken["sValue"].slice(0,2).gl_isTitle()) { // we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". - if (sValues.includes(sValue.toLowerCase())) { - return true; - } - } - else if (oToken["sValue"].gl_isUpperCase()) { - //if sValue.lower() in sValues: - // return true; - sValue = "|"+sValue.slice(1).gl_toCapitalize(); - if (sValues.includes(sValue)) { - return true; - } - sValue = sValue.toLowerCase(); - if (sValues.includes(sValue)) { - return true; - } - } - return false; -} - -function g_morph (oToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) { - // analyse a token, return True if not in morphologies and in morphologies - let lMorph; - if (oToken.hasOwnProperty("lMorph")) { - lMorph = oToken["lMorph"]; - } - else { - if (nLeft !== null) { - let sValue = (nRight !== null) ? oToken["sValue"].slice(nLeft, nRight) : oToken["sValue"].slice(nLeft); - lMorph = _oSpellChecker.getMorph(sValue); - if (bMemorizeMorph) { - oToken["lMorph"] = lMorph; - } - } else { - lMorph = _oSpellChecker.getMorph(oToken["sValue"]); - } - } - if (lMorph.length == 0) { - return false; - } - // check negative condition - if (sNegPattern) { - if (sNegPattern == "*") { - // all morph must match sPattern - return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); - } - else { - if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { - return false; - } - } - } - // search sPattern - return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); -} - -function g_analyse (oToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) { - // analyse a token, return True if not in morphologies and in morphologies - let lMorph; - if (nLeft !== null) { - let sValue = (nRight !== null) ? oToken["sValue"].slice(nLeft, nRight) : oToken["sValue"].slice(nLeft); - lMorph = _oSpellChecker.getMorph(sValue); - if (bMemorizeMorph) { - oToken["lMorph"] = lMorph; - } - } else { - lMorph = _oSpellChecker.getMorph(oToken["sValue"]); - } - if (lMorph.length == 0) { - return false; - } - // check negative condition - if (sNegPattern) { - if (sNegPattern == "*") { - // all morph must match sPattern - return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); - } - else { - if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { - return false; - } - } - } - // search sPattern - return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); -} - -function g_merged_analyse (oToken1, oToken2, cMerger, sPattern, sNegPattern="", bSetMorph=true) { - // merge two token values, return True if not in morphologies and in morphologies (disambiguation off) - let lMorph = _oSpellChecker.getMorph(oToken1["sValue"] + cMerger + oToken2["sValue"]); - if (lMorph.length == 0) { - return false; - } - // check negative condition - if (sNegPattern) { - if (sNegPattern == "*") { - // all morph must match sPattern - let bResult = lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); - if (bResult && bSetMorph) { - oToken1["lMorph"] = lMorph; - } - return bResult; - } - else { - if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { - return false; - } - } - } - // search sPattern - let bResult = lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); - if (bResult && bSetMorph) { - oToken1["lMorph"] = lMorph; - } - return bResult; -} - -function g_tag_before (oToken, dTags, sTag) { - if (!dTags.has(sTag)) { - return false; - } - if (oToken["i"] > dTags.get(sTag)[0]) { - return true; - } - return false; -} - -function g_tag_after (oToken, dTags, sTag) { - if (!dTags.has(sTag)) { - return false; - } - if (oToken["i"] < dTags.get(sTag)[1]) { - return true; - } - return false; -} - -function g_tag (oToken, sTag) { - return oToken.hasOwnProperty("aTags") && oToken["aTags"].has(sTag); -} - -function g_space_between_tokens (oToken1, oToken2, nMin, nMax=null) { - let nSpace = oToken2["nStart"] - oToken1["nEnd"] - if (nSpace < nMin) { - return false; - } - if (nMax !== null && nSpace > nMax) { - return false; - } - return true; -} - -function g_token (lToken, i) { - if (i < 0) { - return lToken[0]; - } - if (i >= lToken.length) { - return lToken[lToken.length-1]; - } - return lToken[i]; -} - - -//////// Disambiguator - -function select (dTokenPos, nPos, sWord, sPattern, lDefault=null) { - if (!sWord) { - return true; - } - if (!dTokenPos.has(nPos)) { - console.log("Error. There should be a token at this position: ", nPos); - return true; - } - let lMorph = _oSpellChecker.getMorph(sWord); - if (lMorph.length === 0 || lMorph.length === 1) { - return true; - } - let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) !== -1 ); - if (lSelect.length > 0) { - if (lSelect.length != lMorph.length) { - dTokenPos.get(nPos)["lMorph"] = lSelect; - } - } else if (lDefault) { - dTokenPos.get(nPos)["lMorph"] = lDefault; - } - return true; -} - -function exclude (dTokenPos, nPos, sWord, sPattern, lDefault=null) { - if (!sWord) { - return true; - } - if (!dTokenPos.has(nPos)) { - console.log("Error. There should be a token at this position: ", nPos); - return true; - } - let lMorph = _oSpellChecker.getMorph(sWord); - if (lMorph.length === 0 || lMorph.length === 1) { - return true; - } - let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) === -1 ); - if (lSelect.length > 0) { - if (lSelect.length != lMorph.length) { - dTokenPos.get(nPos)["lMorph"] = lSelect; - } - } else if (lDefault) { - dTokenPos.get(nPos)["lMorph"] = lDefault; - } - return true; -} - -function define (dTokenPos, nPos, lMorph) { - dTokenPos.get(nPos)["lMorph"] = lMorph; - return true; -} - - -//// Disambiguation for graph rules - -function g_select (oToken, sPattern, lDefault=null) { - // select morphologies for according to , always return true - let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]); - if (lMorph.length === 0 || lMorph.length === 1) { - if (lDefault) { - oToken["lMorph"] = lDefault; - } - return true; - } - let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) !== -1 ); - if (lSelect.length > 0) { - if (lSelect.length != lMorph.length) { - oToken["lMorph"] = lSelect; - } - } else if (lDefault) { - oToken["lMorph"] = lDefault; - } - return true; -} - -function g_exclude (oToken, sPattern, lDefault=null) { - // select morphologies for according to , always return true - let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]); - if (lMorph.length === 0 || lMorph.length === 1) { - if (lDefault) { - oToken["lMorph"] = lDefault; - } - return true; - } - let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) === -1 ); - if (lSelect.length > 0) { - if (lSelect.length != lMorph.length) { - oToken["lMorph"] = lSelect; - } - } else if (lDefault) { - oToken["lMorph"] = lDefault; - } - return true; -} - -function g_add_morph (oToken, lNewMorph) { - "Disambiguation: add a morphology to a token" - let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]); - lMorph.push(...lNewMorph); - oToken["lMorph"] = lMorph; - return true; -} - -function g_define (oToken, lMorph) { - // set morphologies of , always return true - oToken["lMorph"] = lMorph; - return true; -} - -function g_define_from (oToken, nLeft=null, nRight=null) { - let sValue = oToken["sValue"]; - if (nLeft !== null) { - sValue = (nRight !== null) ? sValue.slice(nLeft, nRight) : sValue.slice(nLeft); - } - oToken["lMorph"] = _oSpellChecker.getMorph(sValue); - return true; -} - -function g_change_meta (oToken, sType) { - // Disambiguation: change type of token - oToken["sType"] = sType; - return true; -} - - - -//////// GRAMMAR CHECKER PLUGINS - -${pluginsJS} - - -// generated code, do not edit -const oEvalFunc = { - // callables for regex rules -${callablesJS} - - // callables for graph rules -${graph_callablesJS} -} - if (typeof(exports) !== 'undefined') { exports.lang = gc_engine.lang; exports.locales = gc_engine.locales; exports.pkg = gc_engine.pkg; exports.name = gc_engine.name; exports.version = gc_engine.version; exports.author = gc_engine.author; + // objects + exports.oSpellChecker = gc_engine.oSpellChecker; + exports.oTokenizer = gc_engine.oTokenizer; + exports.aIgnoredRules = gc_engine.aIgnoredRules; + exports.oOptionsColors = gc_engine.oOptionsColors; // init exports.load = gc_engine.load; exports.parse = gc_engine.parse; exports.getSpellChecker = gc_engine.getSpellChecker; // rules @@ -1512,14 +1024,8 @@ exports.ignoreRule = gc_engine.ignoreRule; exports.resetIgnoreRules = gc_engine.resetIgnoreRules; exports.reactivateRule = gc_engine.reactivateRule; exports.listRules = gc_engine.listRules; exports.getRules = gc_engine.getRules; - // options - exports.setOption = gc_engine.setOption; - exports.setOptions = gc_engine.setOptions; - exports.getOptions = gc_engine.getOptions; - exports.getDefaultOptions = gc_engine.getDefaultOptions; - exports.resetOptions = gc_engine.resetOptions; // other exports.TextParser = TextParser; } ADDED gc_core/js/lang_core/gc_engine_func.js Index: gc_core/js/lang_core/gc_engine_func.js ================================================================== --- /dev/null +++ gc_core/js/lang_core/gc_engine_func.js @@ -0,0 +1,501 @@ +// JavaScript +// Grammar checker engine functions + +${string} +${regex} +${map} + + +if (typeof(process) !== 'undefined') { + var gc_options = require("./gc_options.js"); +} +else if (typeof(require) !== 'undefined') { + var gc_options = require("resource://grammalecte/${lang}/gc_options.js"); +} + + +let _sAppContext = "JavaScript"; // what software is running +let _oSpellChecker = null; + + +//////// Common functions + +function option (sOpt) { + // return true if option sOpt is active + return gc_options.dOptions.gl_get(sOpt, false); +} + +function echo (x) { + console.log(x); + return true; +} + +var re = { + search: function (sRegex, sText) { + if (sRegex.startsWith("(?i)")) { + return sText.search(new RegExp(sRegex.slice(4), "i")) !== -1; + } else { + return sText.search(sRegex) !== -1; + } + }, + + createRegExp: function (sRegex) { + if (sRegex.startsWith("(?i)")) { + return new RegExp(sRegex.slice(4), "i"); + } else { + return new RegExp(sRegex); + } + } +} + + +//////// functions to get text outside pattern scope + +// warning: check compile_rules.py to understand how it works + +function nextword (s, iStart, n) { + // get the nth word of the input string or empty string + let z = new RegExp("^(?: +[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+){" + (n-1).toString() + "} +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+)", "ig"); + let m = z.exec(s.slice(iStart)); + if (!m) { + return null; + } + return [iStart + z.lastIndex - m[1].length, m[1]]; +} + +function prevword (s, iEnd, n) { + // get the (-)nth word of the input string or empty string + let z = new RegExp("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+) +(?:[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+ +){" + (n-1).toString() + "}$", "i"); + let m = z.exec(s.slice(0, iEnd)); + if (!m) { + return null; + } + return [m.index, m[1]]; +} + +function nextword1 (s, iStart) { + // get next word (optimization) + let _zNextWord = new RegExp ("^ +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_-]*)", "ig"); + let m = _zNextWord.exec(s.slice(iStart)); + if (!m) { + return null; + } + return [iStart + _zNextWord.lastIndex - m[1].length, m[1]]; +} + +const _zPrevWord = new RegExp ("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_-]*) +$", "i"); + +function prevword1 (s, iEnd) { + // get previous word (optimization) + let m = _zPrevWord.exec(s.slice(0, iEnd)); + if (!m) { + return null; + } + return [m.index, m[1]]; +} + +function look (s, sPattern, sNegPattern=null) { + // seek sPattern in s (before/after/fulltext), if antipattern sNegPattern not in s + try { + if (sNegPattern && re.search(sNegPattern, s)) { + return false; + } + return re.search(sPattern, s); + } + catch (e) { + console.error(e); + } + return false; +} + + +//////// Analyse groups for regex rules + +function displayInfo (dTokenPos, aWord) { + // for debugging: info of word + if (!aWord) { + console.log("> nothing to find"); + return true; + } + let lMorph = _oSpellChecker.getMorph(aWord[1]); + if (lMorph.length === 0) { + console.log("> not in dictionary"); + return true; + } + if (dTokenPos.has(aWord[0])) { + console.log("DA: " + dTokenPos.get(aWord[0])); + } + console.log("FSA: " + lMorph); + return true; +} + +function morph (dTokenPos, aWord, sPattern, sNegPattern, bNoWord=false) { + // analyse a tuple (position, word), returns true if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on) + if (!aWord) { + return bNoWord; + } + let lMorph = (dTokenPos.has(aWord[0]) && dTokenPos.get(aWord[0]))["lMorph"] ? dTokenPos.get(aWord[0])["lMorph"] : _oSpellChecker.getMorph(aWord[1]); + if (lMorph.length === 0) { + return false; + } + if (sNegPattern) { + // check negative condition + if (sNegPattern === "*") { + // all morph must match sPattern + return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); + } + else { + if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { + return false; + } + } + } + // search sPattern + return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); +} + +function analyse (sWord, sPattern, sNegPattern) { + // analyse a word, returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation off) + let lMorph = _oSpellChecker.getMorph(sWord); + if (lMorph.length === 0) { + return false; + } + if (sNegPattern) { + // check negative condition + if (sNegPattern === "*") { + // all morph must match sPattern + return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); + } + else { + if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { + return false; + } + } + } + // search sPattern + return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); +} + + +//// Analyse tokens for graph rules + +function g_value (oToken, sValues, nLeft=null, nRight=null) { + // test if is in sValues (each value should be separated with |) + let sValue = (nLeft === null) ? "|"+oToken["sValue"]+"|" : "|"+oToken["sValue"].slice(nLeft, nRight)+"|"; + if (sValues.includes(sValue)) { + return true; + } + if (oToken["sValue"].slice(0,2).gl_isTitle()) { // we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". + if (sValues.includes(sValue.toLowerCase())) { + return true; + } + } + else if (oToken["sValue"].gl_isUpperCase()) { + //if sValue.lower() in sValues: + // return true; + sValue = "|"+sValue.slice(1).gl_toCapitalize(); + if (sValues.includes(sValue)) { + return true; + } + sValue = sValue.toLowerCase(); + if (sValues.includes(sValue)) { + return true; + } + } + return false; +} + +function g_morph (oToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) { + // analyse a token, return True if not in morphologies and in morphologies + let lMorph; + if (oToken.hasOwnProperty("lMorph")) { + lMorph = oToken["lMorph"]; + } + else { + if (nLeft !== null) { + let sValue = (nRight !== null) ? oToken["sValue"].slice(nLeft, nRight) : oToken["sValue"].slice(nLeft); + lMorph = _oSpellChecker.getMorph(sValue); + if (bMemorizeMorph) { + oToken["lMorph"] = lMorph; + } + } else { + lMorph = _oSpellChecker.getMorph(oToken["sValue"]); + } + } + if (lMorph.length == 0) { + return false; + } + // check negative condition + if (sNegPattern) { + if (sNegPattern == "*") { + // all morph must match sPattern + return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); + } + else { + if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { + return false; + } + } + } + // search sPattern + return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); +} + +function g_analyse (oToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) { + // analyse a token, return True if not in morphologies and in morphologies + let lMorph; + if (nLeft !== null) { + let sValue = (nRight !== null) ? oToken["sValue"].slice(nLeft, nRight) : oToken["sValue"].slice(nLeft); + lMorph = _oSpellChecker.getMorph(sValue); + if (bMemorizeMorph) { + oToken["lMorph"] = lMorph; + } + } else { + lMorph = _oSpellChecker.getMorph(oToken["sValue"]); + } + if (lMorph.length == 0) { + return false; + } + // check negative condition + if (sNegPattern) { + if (sNegPattern == "*") { + // all morph must match sPattern + return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); + } + else { + if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { + return false; + } + } + } + // search sPattern + return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); +} + +function g_merged_analyse (oToken1, oToken2, cMerger, sPattern, sNegPattern="", bSetMorph=true) { + // merge two token values, return True if not in morphologies and in morphologies (disambiguation off) + let lMorph = _oSpellChecker.getMorph(oToken1["sValue"] + cMerger + oToken2["sValue"]); + if (lMorph.length == 0) { + return false; + } + // check negative condition + if (sNegPattern) { + if (sNegPattern == "*") { + // all morph must match sPattern + let bResult = lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); + if (bResult && bSetMorph) { + oToken1["lMorph"] = lMorph; + } + return bResult; + } + else { + if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { + return false; + } + } + } + // search sPattern + let bResult = lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); + if (bResult && bSetMorph) { + oToken1["lMorph"] = lMorph; + } + return bResult; +} + +function g_tag_before (oToken, dTags, sTag) { + if (!dTags.has(sTag)) { + return false; + } + if (oToken["i"] > dTags.get(sTag)[0]) { + return true; + } + return false; +} + +function g_tag_after (oToken, dTags, sTag) { + if (!dTags.has(sTag)) { + return false; + } + if (oToken["i"] < dTags.get(sTag)[1]) { + return true; + } + return false; +} + +function g_tag (oToken, sTag) { + return oToken.hasOwnProperty("aTags") && oToken["aTags"].has(sTag); +} + +function g_space_between_tokens (oToken1, oToken2, nMin, nMax=null) { + let nSpace = oToken2["nStart"] - oToken1["nEnd"] + if (nSpace < nMin) { + return false; + } + if (nMax !== null && nSpace > nMax) { + return false; + } + return true; +} + +function g_token (lToken, i) { + if (i < 0) { + return lToken[0]; + } + if (i >= lToken.length) { + return lToken[lToken.length-1]; + } + return lToken[i]; +} + + +//////// Disambiguator + +function select (dTokenPos, nPos, sWord, sPattern, lDefault=null) { + if (!sWord) { + return true; + } + if (!dTokenPos.has(nPos)) { + console.log("Error. There should be a token at this position: ", nPos); + return true; + } + let lMorph = _oSpellChecker.getMorph(sWord); + if (lMorph.length === 0 || lMorph.length === 1) { + return true; + } + let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) !== -1 ); + if (lSelect.length > 0) { + if (lSelect.length != lMorph.length) { + dTokenPos.get(nPos)["lMorph"] = lSelect; + } + } else if (lDefault) { + dTokenPos.get(nPos)["lMorph"] = lDefault; + } + return true; +} + +function exclude (dTokenPos, nPos, sWord, sPattern, lDefault=null) { + if (!sWord) { + return true; + } + if (!dTokenPos.has(nPos)) { + console.log("Error. There should be a token at this position: ", nPos); + return true; + } + let lMorph = _oSpellChecker.getMorph(sWord); + if (lMorph.length === 0 || lMorph.length === 1) { + return true; + } + let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) === -1 ); + if (lSelect.length > 0) { + if (lSelect.length != lMorph.length) { + dTokenPos.get(nPos)["lMorph"] = lSelect; + } + } else if (lDefault) { + dTokenPos.get(nPos)["lMorph"] = lDefault; + } + return true; +} + +function define (dTokenPos, nPos, lMorph) { + dTokenPos.get(nPos)["lMorph"] = lMorph; + return true; +} + + +//// Disambiguation for graph rules + +function g_select (oToken, sPattern, lDefault=null) { + // select morphologies for according to , always return true + let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]); + if (lMorph.length === 0 || lMorph.length === 1) { + if (lDefault) { + oToken["lMorph"] = lDefault; + } + return true; + } + let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) !== -1 ); + if (lSelect.length > 0) { + if (lSelect.length != lMorph.length) { + oToken["lMorph"] = lSelect; + } + } else if (lDefault) { + oToken["lMorph"] = lDefault; + } + return true; +} + +function g_exclude (oToken, sPattern, lDefault=null) { + // select morphologies for according to , always return true + let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]); + if (lMorph.length === 0 || lMorph.length === 1) { + if (lDefault) { + oToken["lMorph"] = lDefault; + } + return true; + } + let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) === -1 ); + if (lSelect.length > 0) { + if (lSelect.length != lMorph.length) { + oToken["lMorph"] = lSelect; + } + } else if (lDefault) { + oToken["lMorph"] = lDefault; + } + return true; +} + +function g_add_morph (oToken, lNewMorph) { + "Disambiguation: add a morphology to a token" + let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]); + lMorph.push(...lNewMorph); + oToken["lMorph"] = lMorph; + return true; +} + +function g_define (oToken, lMorph) { + // set morphologies of , always return true + oToken["lMorph"] = lMorph; + return true; +} + +function g_define_from (oToken, nLeft=null, nRight=null) { + let sValue = oToken["sValue"]; + if (nLeft !== null) { + sValue = (nRight !== null) ? sValue.slice(nLeft, nRight) : sValue.slice(nLeft); + } + oToken["lMorph"] = _oSpellChecker.getMorph(sValue); + return true; +} + +function g_change_meta (oToken, sType) { + // Disambiguation: change type of token + oToken["sType"] = sType; + return true; +} + + + +//////// GRAMMAR CHECKER PLUGINS + +${pluginsJS} + + +// generated code, do not edit +var gc_engine_func = { + + load: function (sContext, oSpellChecker) { + _sAppContext = sContext + _oSpellChecker = oSpellChecker + }, + + // callables for regex rules +${callablesJS} + + // callables for graph rules +${graph_callablesJS} +} + + +if (typeof(exports) !== 'undefined') { + exports.load = gc_engine_func.load; +} Index: gc_core/js/lang_core/gc_options.js ================================================================== --- gc_core/js/lang_core/gc_options.js +++ gc_core/js/lang_core/gc_options.js @@ -6,53 +6,98 @@ ${map} var gc_options = { - getOptions: function (sContext="JavaScript") { - if (this.dOpt.hasOwnProperty(sContext)) { - return this.dOpt[sContext]; + + dOptions: new Map(), + + sAppContext: "JavaScript", + + load: function (sContext="JavaScript") { + this.sAppContext = sContext; + this.dOptions = this.getDefaultOptions(sContext); + }, + + setOption: function (sOpt, bVal) { + if (this.dOptions.has(sOpt)) { + this.dOptions.set(sOpt, bVal); + } + }, + + setOptions: function (dOpt) { + this.dOptions.gl_updateOnlyExistingKeys(dOpt); + }, + + getOptions: function () { + return this.dOptions.gl_shallowCopy(); + }, + + resetOptions: function () { + this.dOptions = this.getDefaultOptions(this._sAppContext); + }, + + getDefaultOptions: function (sContext="") { + if (!sContext) { + sContext = this.sAppContext; + } + if (this.oDefaultOpt.hasOwnProperty(sContext)) { + return this.oDefaultOpt[sContext].gl_shallowCopy(); + } + return this.oDefaultOpt["JavaScript"].gl_shallowCopy(); + }, + + getOptionLabels: function (sLang="${sLang}") { + if (this.oOptLabel.hasOwnProperty(sLang)) { + return this.oOptLabel[sLang]; } - return this.dOpt["JavaScript"]; + return this.oOptLabel["{$sLang}"]; }, getOptionsColors: function (sTheme="Default", sColorType="aRGB") { - let dOptColor = (this.dOptColor.hasOwnProperty(sTheme)) ? this.dOptColor[sTheme] : this.dOptColor["Default"]; - let dColorType = (this.dColorType.hasOwnProperty(sColorType)) ? this.dColorType[sColorType] : this.dColorType["aRGB"]; - let dColor = {}; + let oOptColor = (this.oOptColor.hasOwnProperty(sTheme)) ? this.oOptColor[sTheme] : this.oOptColor["Default"]; + let oColorType = (this.oColorType.hasOwnProperty(sColorType)) ? this.oColorType[sColorType] : this.oColorType["aRGB"]; + let oColor = {}; try { - for (let [sOpt, sColor] of Object.entries(dOptColor)) { - dColor[sOpt] = dColorType[sColor]; + for (let [sOpt, sColor] of Object.entries(oOptColor)) { + oColor[sOpt] = oColorType[sColor]; } - return dColor; + return oColor; } catch (e) { console.error(e); return {}; } }, lStructOpt: ${lStructOpt}, - dOpt: { + oDefaultOpt: { "JavaScript": new Map (${dOptJavaScript}), "Firefox": new Map (${dOptFirefox}), "Thunderbird": new Map (${dOptThunderbird}), }, - dColorType: ${dColorType}, + oColorType: ${dColorType}, - dOptColor: ${dOptColor}, + oOptColor: ${dOptColor}, - dOptLabel: ${dOptLabel} + oOptLabel: ${dOptLabel} }; if (typeof(exports) !== 'undefined') { + exports.dOptions = gc_options.dOptions; + exports.sAppContext = gc_options.sAppContext; + exports.load = gc_options.load; + exports.setOption = gc_options.setOption; + exports.setOptions = gc_options.setOptions; + exports.resetOptions = gc_options.resetOptions; + exports.getDefaultOptions = gc_options.getDefaultOptions; exports.getOptions = gc_options.getOptions; exports.getOptionsColors = gc_options.getOptionsColors; exports.lStructOpt = gc_options.lStructOpt; - exports.dOpt = gc_options.dOpt; + exports.oDefaultOpt = gc_options.oDefaultOpt; exports.dColorType = gc_options.dColorType; - exports.dOptColor = gc_options.dOptColor; - exports.dOptLabel = gc_options.dOptLabel; + exports.oOptColor = gc_options.oOptColor; + exports.oOptLabel = gc_options.oOptLabel; } Index: gc_core/js/tests.js ================================================================== --- gc_core/js/tests.js +++ gc_core/js/tests.js @@ -14,23 +14,24 @@ } class TestGrammarChecking { - constructor (gce, spfTests="") { - this.gce = gce; + constructor (gc_engine, gc_options, spfTests="") { + this.gc_engine = gc_engine; + this.gc_options = gc_options; this.spfTests = spfTests; this._aRuleTested = new Set(); } * testParse (bDebug=false) { const t0 = Date.now(); let sURL; if(typeof(process) !== 'undefined') { - sURL = (this.spfTests !== "") ? this.spfTests : "./"+this.gce.lang+"/tests_data.json"; + sURL = (this.spfTests !== "") ? this.spfTests : "./"+this.gc_engine.lang+"/tests_data.json"; } else { - sURL = (this.spfTests !== "") ? this.spfTests : "resource://grammalecte/"+this.gce.lang+"/tests_data.json"; + sURL = (this.spfTests !== "") ? this.spfTests : "resource://grammalecte/"+this.gc_engine.lang+"/tests_data.json"; } const aData = JSON.parse(helpers.loadFile(sURL)).aData; let nInvalid = 0; let nTotal = 0; let sErrorText; @@ -44,11 +45,11 @@ let sUntestedRules = ""; let bShowUntested = false; let zOption = /^__([a-zA-Z0-9]+)__ /; let sOption; let m; - yield "Tests [" + this.gce.lang + "]: " + aData.length.toString(); + yield "Tests [" + this.gc_engine.lang + "]: " + aData.length.toString(); try { for (let sLine of aData) { sLineNum = sLine.slice(0,10).trim(); sLine = sLine.slice(10).trim(); if (sLine.length > 0 && !sLine.startsWith("#")) { @@ -90,11 +91,11 @@ console.error(e); } if (bShowUntested) { i = 0; - for (let [sOpt, sLineId, sRuleId] of this.gce.listRules()) { + for (let [sOpt, sLineId, sRuleId] of this.gc_engine.listRules()) { if (sOpt !== "@@@@" && !this._aRuleTested.has(sLineId) && !/^[0-9]+[sp]$|^[pd]_/.test(sRuleId)) { sUntestedRules += sLineId + "/" + sRuleId + ", "; i += 1; } } @@ -136,15 +137,15 @@ _getFoundErrors (sLine, bDebug, sOption) { try { let aErrs = []; if (sOption) { - this.gce.setOption(sOption, true); - aErrs = this.gce.parse(sLine, "FR", bDebug); - this.gce.setOption(sOption, false); + this.gc_options.setOption(sOption, true); + aErrs = this.gc_engine.parse(sLine, "FR", bDebug); + this.gc_options.setOption(sOption, false); } else { - aErrs = this.gce.parse(sLine, "FR", bDebug); + aErrs = this.gc_engine.parse(sLine, "FR", bDebug); } let sRes = " ".repeat(sLine.length); let sListErr = ""; for (let dErr of aErrs) { sRes = sRes.slice(0, dErr["nStart"]) + "~".repeat(dErr["nEnd"] - dErr["nStart"]) + sRes.slice(dErr["nEnd"]); Index: gc_core/py/__init__.py ================================================================== --- gc_core/py/__init__.py +++ gc_core/py/__init__.py @@ -1,5 +1,5 @@ """ Grammar checker """ -from .grammar_checker import * +from .${lang}.gc_engine import * DELETED gc_core/py/grammar_checker.py Index: gc_core/py/grammar_checker.py ================================================================== --- gc_core/py/grammar_checker.py +++ /dev/null @@ -1,82 +0,0 @@ -""" -Grammalecte, grammar checker -""" - -import importlib -import json - -from . import text - - -class GrammarChecker: - "GrammarChecker: Wrapper for the grammar checker engine" - - def __init__ (self, sLangCode, sContext="Python"): - self.sLangCode = sLangCode - # Grammar checker engine - self.gce = importlib.import_module("."+sLangCode, "grammalecte") - self.gce.load(sContext) - # Spell checker - self.oSpellChecker = self.gce.getSpellChecker() - # Lexicographer - self.oLexicographer = None - # Text formatter - self.oTextFormatter = None - - def getGCEngine (self): - "return the grammar checker object" - return self.gce - - def getSpellChecker (self): - "return the spell checker object" - return self.oSpellChecker - - def getTextFormatter (self): - "load and return the text formatter" - if self.oTextFormatter is None: - tf = importlib.import_module("."+self.sLangCode+".textformatter", "grammalecte") - self.oTextFormatter = tf.TextFormatter() - return self.oTextFormatter - - def getLexicographer (self): - "load and return the lexicographer" - if self.oLexicographer is None: - lxg = importlib.import_module("."+self.sLangCode+".lexicographe", "grammalecte") - self.oLexicographer = lxg.Lexicographe(self.oSpellChecker) - return self.oLexicographer - - def displayGCOptions (self): - "display the grammar checker options" - self.gce.displayOptions() - - def getParagraphErrors (self, sText, dOptions=None, bContext=False, bSpellSugg=False, bDebug=False): - "returns a tuple: (grammar errors, spelling errors)" - aGrammErrs = self.gce.parse(sText, "FR", bDebug=bDebug, dOptions=dOptions, bContext=bContext) - aSpellErrs = self.oSpellChecker.parseParagraph(sText, bSpellSugg) - return aGrammErrs, aSpellErrs - - def getParagraphWithErrors (self, sText, dOptions=None, bEmptyIfNoErrors=False, bSpellSugg=False, nWidth=100, bDebug=False): - "parse text and return a readable text with underline errors" - aGrammErrs, aSpellErrs = self.getParagraphErrors(sText, dOptions, False, bSpellSugg, bDebug) - if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs: - return ("", []) - return text.generateParagraph(sText, aGrammErrs, aSpellErrs, nWidth) - - def getTextWithErrors (self, sText, bEmptyIfNoErrors=False, bSpellSugg=False, nWidth=100, bDebug=False): - "[todo]" - - def getParagraphErrorsAsJSON (self, iIndex, sText, dOptions=None, bContext=False, bEmptyIfNoErrors=False, bSpellSugg=False, bReturnText=False, lLineSet=None, bDebug=False): - "parse text and return errors as a JSON string" - aGrammErrs, aSpellErrs = self.getParagraphErrors(sText, dOptions, bContext, bSpellSugg, bDebug) - aGrammErrs = list(aGrammErrs) - if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs: - return "" - if lLineSet: - aGrammErrs, aSpellErrs = text.convertToXY(aGrammErrs, aSpellErrs, lLineSet) - return json.dumps({ "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False) - if bReturnText: - return json.dumps({ "iParagraph": iIndex, "sText": sText, "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False) - return json.dumps({ "iParagraph": iIndex, "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False) - - def getTextErrorsAsJSON (self, sText, bContext=False, bEmptyIfNoErrors=False, bSpellSugg=False, bReturnText=False, bDebug=False): - "[todo]" Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -3,19 +3,23 @@ Grammar checker engine """ import re import traceback +import json +import importlib #import unicodedata from itertools import chain from ..graphspell.spellchecker import SpellChecker from ..graphspell.echo import echo from .. import text +from . import gc_engine_func as gce_func from . import gc_options + try: # LibreOffice / OpenOffice from com.sun.star.linguistic2 import SingleProofreadingError from com.sun.star.text.TextMarkupType import PROOFREADING @@ -24,14 +28,13 @@ _bWriterError = True except ImportError: _bWriterError = False -__all__ = [ "lang", "locales", "pkg", "name", "version", "author", \ - "load", "parse", "getSpellChecker", \ - "setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \ - "ignoreRule", "resetIgnoreRules", "reactivateRule", "listRules", "displayRules", "setWriterUnderliningStyle" ] +#__all__ = [ "lang", "locales", "pkg", "name", "version", "author", \ +# "load", "parse", "getSpellChecker", "getTextFormatter", "getLexicographer" \ +# "ignoreRule", "resetIgnoreRules", "reactivateRule", "listRules", "displayRules", "setWriterUnderliningStyle" ] __version__ = "${version}" lang = "${lang}" @@ -43,42 +46,42 @@ # Modules _rules = None # module gc_rules _rules_graph = None # module gc_rules_graph -# Data -_sAppContext = "" # what software is running -_dOptions = None -_dOptionsColors = None +# Tools _oSpellChecker = None _oTokenizer = None + +# Data _aIgnoredRules = set() # Writer underlining style +_dOptionsColors = None _bMulticolor = True _nUnderliningStyle = 0 #### Initialization def load (sContext="Python", sColorType="aRGB"): "initialization of the grammar checker" global _oSpellChecker - global _sAppContext - global _dOptions global _dOptionsColors global _oTokenizer try: _oSpellChecker = SpellChecker("${lang}", "${dic_main_filename_py}", "${dic_community_filename_py}", "${dic_personal_filename_py}") - _sAppContext = sContext - _dOptions = gc_options.getOptions(sContext).copy() # duplication necessary, to be able to reset to default - _dOptionsColors = gc_options.getOptionsColors(sContext, sColorType) + _oSpellChecker.activateStorage() _oTokenizer = _oSpellChecker.getTokenizer() - _oSpellChecker.activateStorage() + gce_func.load(sContext, _oSpellChecker) + gc_options.load(sContext) + _dOptionsColors = gc_options.getOptionsColors(sContext, sColorType) except: traceback.print_exc() + +#### Tools def getSpellChecker (): "return the spellchecker object" return _oSpellChecker @@ -129,11 +132,11 @@ "(re)activate rule " _aIgnoredRules.discard(sRuleId) def listRules (sFilter=None): - "generator: returns typle (sOption, sLineId, sRuleId)" + "generator: returns tuple (sRuleType, sOption, sLineId, sRuleId)" if sFilter: try: zFilter = re.compile(sFilter) except re.error: echo("# Error. List rules: wrong regex.") @@ -155,53 +158,10 @@ "display the name of rules, with the filter " echo("List of rules. Filter: << " + str(sFilter) + " >>") for sOption, sLineId, sRuleId, sType in listRules(sFilter): echo("{:<8} {:<10} {:<10} {}".format(sOption, sLineId, sRuleId, sType)) - -#### Options - -def setOption (sOpt, bVal): - "set option with if it exists" - if sOpt in _dOptions: - _dOptions[sOpt] = bVal - - -def setOptions (dOpt): - "update the dictionary of options with " - for sKey, bVal in dOpt.items(): - if sKey in _dOptions: - _dOptions[sKey] = bVal - - -def getOptions (): - "return the dictionary of current options" - return _dOptions - - -def getDefaultOptions (): - "return the dictionary of default options" - return gc_options.getOptions(_sAppContext).copy() - - -def getOptionsLabels (sLang): - "return options labels" - return gc_options.getUI(sLang) - - -def displayOptions (sLang="${lang}"): - "display the list of grammar checking options" - echo("Options:") - echo("\n".join( [ k+":\t"+str(v)+"\t"+gc_options.getUI(sLang).get(k, ("?", ""))[0] for k, v in sorted(_dOptions.items()) ] )) - echo("") - - -def resetOptions (): - "set options to default values" - global _dOptions - _dOptions = getDefaultOptions() - def setWriterUnderliningStyle (sStyle="BOLDWAVE", bMulticolor=True): "set underlining style for Writer (WAVE, BOLDWAVE, BOLD)" global _nUnderliningStyle global _bMulticolor @@ -219,10 +179,39 @@ _nUnderliningStyle = 0 _bMulticolor = bMulticolor #### Parsing + +def getParagraphErrors (sText, dOptions=None, bContext=False, bSpellSugg=False, bDebug=False): + "returns a tuple: (grammar errors, spelling errors)" + aGrammErrs = parse(sText, "FR", bDebug=bDebug, dOptions=dOptions, bContext=bContext) + aSpellErrs = _oSpellChecker.parseParagraph(sText, bSpellSugg) + return aGrammErrs, aSpellErrs + + +def getParagraphWithErrors (sText, dOptions=None, bEmptyIfNoErrors=False, bSpellSugg=False, nWidth=100, bDebug=False): + "parse text and return a readable text with underline errors" + aGrammErrs, aSpellErrs = getParagraphErrors(sText, dOptions, False, bSpellSugg, bDebug) + if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs: + return ("", []) + return text.generateParagraph(sText, aGrammErrs, aSpellErrs, nWidth) + + +def getParagraphErrorsAsJSON (iIndex, sText, dOptions=None, bContext=False, bEmptyIfNoErrors=False, bSpellSugg=False, bReturnText=False, lLineSet=None, bDebug=False): + "parse text and return errors as a JSON string" + aGrammErrs, aSpellErrs = getParagraphErrors(sText, dOptions, bContext, bSpellSugg, bDebug) + aGrammErrs = list(aGrammErrs) + if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs: + return "" + if lLineSet: + aGrammErrs, aSpellErrs = text.convertToXY(aGrammErrs, aSpellErrs, lLineSet) + return json.dumps({ "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False) + if bReturnText: + return json.dumps({ "iParagraph": iIndex, "sText": sText, "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False) + return json.dumps({ "iParagraph": iIndex, "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False) + def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False, bFullInfo=False): "init point to analyse and returns an iterable of errors or (with option ) paragraphs errors and sentences with tokens and errors" oText = TextParser(sText) return oText.parse(sCountry, bDebug, dOptions, bContext, bFullInfo) @@ -262,12 +251,12 @@ return s def parse (self, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False, bFullInfo=False): "analyses and returns an iterable of errors or (with option ) paragraphs errors and sentences with tokens and errors" #sText = unicodedata.normalize("NFC", sText) - dOpt = dOptions or _dOptions - bShowRuleId = option('idrule') + dOpt = dOptions or gc_options.dOptions + bShowRuleId = gc_options.dOptions.get('idrule', False) # parse paragraph try: self.parseText(self.sText, self.sText0, True, 0, sCountry, dOpt, bShowRuleId, bDebug, bContext) except: raise @@ -340,11 +329,11 @@ for m in zRegex.finditer(sText): bCondMemo = None for sFuncCond, cActionType, sWhat, *eAct in lActions: # action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ] try: - bCondMemo = not sFuncCond or globals()[sFuncCond](sText, sText0, m, self.dTokenPos, sCountry, bCondMemo) + bCondMemo = not sFuncCond or getattr(gce_func, sFuncCond)(sText, sText0, m, self.dTokenPos, sCountry, bCondMemo) if bCondMemo: if bDebug: echo("RULE: " + sLineId) if cActionType == "-": # grammar error @@ -360,11 +349,11 @@ if bDebug: echo("~ " + sText + " -- " + m.group(eAct[0]) + " # " + sLineId) elif cActionType == "=": # disambiguation if not bParagraph: - globals()[sWhat](sText, m, self.dTokenPos) + getattr(gce_func, sWhat)(sText, m, self.dTokenPos) if bDebug: echo("= " + m.group(0) + " # " + sLineId) elif cActionType == ">": # we do nothing, this test is just a condition to apply all following actions pass @@ -586,18 +575,18 @@ for sRuleId in dGraph[nextNodeKey]: try: if bDebug: echo(" >TRY: " + sRuleId + " " + sLineId) _, sOption, sFuncCond, cActionType, sWhat, *eAct = _rules_graph.dRule[sRuleId] - # Suggestion [ option, condition, "-", replacement/suggestion/action, iTokenStart, iTokenEnd, cStartLimit, cEndLimit, bCaseSvty, nPriority, sMessage, sURL ] - # TextProcessor [ option, condition, "~", replacement/suggestion/action, iTokenStart, iTokenEnd, bCaseSvty ] - # Disambiguator [ option, condition, "=", replacement/suggestion/action ] - # Tag [ option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ] - # Immunity [ option, condition, "!", "", iTokenStart, iTokenEnd ] - # Test [ option, condition, ">", "" ] + # Suggestion [ sActionLineId, option, condition, "-", replacement/suggestion/action, iTokenStart, iTokenEnd, cStartLimit, cEndLimit, bCaseSvty, nPriority, sMessage, sURL ] + # TextProcessor [ sActionLineId, option, condition, "~", replacement/suggestion/action, iTokenStart, iTokenEnd, bCaseSvty ] + # Disambiguator [ sActionLineId, option, condition, "=", replacement/suggestion/action ] + # Tag [ sActionLineId, option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ] + # Immunity [ sActionLineId, option, condition, "!", "", iTokenStart, iTokenEnd ] + # Test [ sActionLineId, option, condition, ">", "" ] if not sOption or dOptions.get(sOption, False): - bCondMemo = not sFuncCond or globals()[sFuncCond](self.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, self.dTags, self.sSentence, self.sSentence0) + bCondMemo = not sFuncCond or getattr(gce_func, sFuncCond)(self.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, self.dTags, self.sSentence, self.sSentence0) if bCondMemo: if cActionType == "-": # grammar error iTokenStart, iTokenEnd, cStartLimit, cEndLimit, bCaseSvty, nPriority, sMessage, sURL = eAct nTokenErrorStart = nTokenOffset + iTokenStart if iTokenStart > 0 else nLastToken + iTokenStart @@ -619,11 +608,11 @@ bChange = True if bDebug: echo(" TEXT_PROCESSOR: [{}:{}] > {}".format(self.lToken[nTokenStart]["sValue"], self.lToken[nTokenEnd]["sValue"], sWhat)) elif cActionType == "=": # disambiguation - globals()[sWhat](self.lToken, nTokenOffset, nLastToken) + getattr(gce_func, sWhat)(self.lToken, nTokenOffset, nLastToken) if bDebug: echo(" DISAMBIGUATOR: ({}) [{}:{}]".format(sWhat, self.lToken[nTokenOffset+1]["sValue"], self.lToken[nLastToken]["sValue"])) elif cActionType == ">": # we do nothing, this test is just a condition to apply all following actions if bDebug: @@ -675,20 +664,20 @@ def _createErrorFromRegex (self, sText, sText0, sRepl, nOffset, m, iGroup, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext): nStart = nOffset + m.start(iGroup) nEnd = nOffset + m.end(iGroup) # suggestions if sRepl[0:1] == "=": - sSugg = globals()[sRepl[1:]](sText, m) + sSugg = getattr(gce_func, sRepl[1:])(sText, m) lSugg = sSugg.split("|") if sSugg else [] elif sRepl == "_": lSugg = [] else: lSugg = m.expand(sRepl).split("|") if bUppercase and lSugg and m.group(iGroup)[0:1].isupper(): lSugg = list(map(lambda s: s[0:1].upper()+s[1:], lSugg)) # Message - sMessage = globals()[sMsg[1:]](sText, m) if sMsg[0:1] == "=" else m.expand(sMsg) + sMessage = getattr(gce_func, sMsg[1:])(sText, m) if sMsg[0:1] == "=" else m.expand(sMsg) if bShowRuleId: sMessage += " #" + sLineId + " / " + sRuleId # if _bWriterError: return self._createErrorForWriter(nStart, nEnd - nStart, sRuleId, sOption, sMessage, lSugg, sURL) @@ -695,20 +684,20 @@ return self._createErrorAsDict(nStart, nEnd, sLineId, sRuleId, sOption, sMessage, lSugg, sURL, bContext) def _createErrorFromTokens (self, sSugg, nTokenOffset, nLastToken, iFirstToken, nStart, nEnd, sLineId, sRuleId, bCaseSvty, sMsg, sURL, bShowRuleId, sOption, bContext): # suggestions if sSugg[0:1] == "=": - sSugg = globals()[sSugg[1:]](self.lToken, nTokenOffset, nLastToken) + sSugg = getattr(gce_func, sSugg[1:])(self.lToken, nTokenOffset, nLastToken) lSugg = sSugg.split("|") if sSugg else [] elif sSugg == "_": lSugg = [] else: lSugg = self._expand(sSugg, nTokenOffset, nLastToken).split("|") if bCaseSvty and lSugg and self.lToken[iFirstToken]["sValue"][0:1].isupper(): lSugg = list(map(lambda s: s[0:1].upper()+s[1:], lSugg)) # Message - sMessage = globals()[sMsg[1:]](self.lToken, nTokenOffset, nLastToken) if sMsg[0:1] == "=" else self._expand(sMsg, nTokenOffset, nLastToken) + sMessage = getattr(gce_func, sMsg[1:])(self.lToken, nTokenOffset, nLastToken) if sMsg[0:1] == "=" else self._expand(sMsg, nTokenOffset, nLastToken) if bShowRuleId: sMessage += " #" + sLineId + " / " + sRuleId # if _bWriterError: return self._createErrorForWriter(nStart, nEnd - nStart, sRuleId, sOption, sMessage, lSugg, sURL) @@ -768,11 +757,11 @@ elif sRepl == "_": sNew = "_" * nLen elif sRepl == "@": sNew = "@" * nLen elif sRepl[0:1] == "=": - sNew = globals()[sRepl[1:]](sText, m) + sNew = getattr(gce_func, sRepl[1:])(sText, m) sNew = sNew + " " * (nLen-len(sNew)) if bUppercase and m.group(iGroup)[0:1].isupper(): sNew = sNew.capitalize() else: sNew = m.expand(sRepl) @@ -798,11 +787,11 @@ else: for i in range(nTokenRewriteStart, nTokenRewriteEnd+1): self.lToken[i]["sNewValue"] = "_" else: if sWhat.startswith("="): - sWhat = globals()[sWhat[1:]](self.lToken, nTokenOffset, nLastToken) + sWhat = getattr(gce_func, sWhat[1:])(self.lToken, nTokenOffset, nLastToken) else: sWhat = self._expand(sWhat, nTokenOffset, nLastToken) bUppercase = bCaseSvty and self.lToken[nTokenRewriteStart]["sValue"][0:1].isupper() if nTokenRewriteEnd - nTokenRewriteStart == 0: # one token @@ -872,410 +861,5 @@ echo(dToken) if bDebug: echo(" TEXT REWRITED: " + self.sSentence) self.lToken.clear() self.lToken = lNewToken - - -#### common functions - -def option (sOpt): - "return True if option is active" - return _dOptions.get(sOpt, False) - - -#### Functions to get text outside pattern scope - -# warning: check compile_rules.py to understand how it works - -_zNextWord = re.compile(r" +(\w[\w-]*)") -_zPrevWord = re.compile(r"(\w[\w-]*) +$") - -def nextword (s, iStart, n): - "get the nth word of the input string or empty string" - m = re.match("(?: +[\\w%-]+){" + str(n-1) + "} +([\\w%-]+)", s[iStart:]) - if not m: - return None - return (iStart+m.start(1), m.group(1)) - - -def prevword (s, iEnd, n): - "get the (-)nth word of the input string or empty string" - m = re.search("([\\w%-]+) +(?:[\\w%-]+ +){" + str(n-1) + "}$", s[:iEnd]) - if not m: - return None - return (m.start(1), m.group(1)) - - -def nextword1 (s, iStart): - "get next word (optimization)" - m = _zNextWord.match(s[iStart:]) - if not m: - return None - return (iStart+m.start(1), m.group(1)) - - -def prevword1 (s, iEnd): - "get previous word (optimization)" - m = _zPrevWord.search(s[:iEnd]) - if not m: - return None - return (m.start(1), m.group(1)) - - -def look (s, sPattern, sNegPattern=None): - "seek sPattern in s (before/after/fulltext), if sNegPattern not in s" - if sNegPattern and re.search(sNegPattern, s): - return False - if re.search(sPattern, s): - return True - return False - - -def look_chk1 (dTokenPos, s, nOffset, sPattern, sPatternGroup1, sNegPatternGroup1=""): - "returns True if s has pattern sPattern and m.group(1) has pattern sPatternGroup1" - m = re.search(sPattern, s) - if not m: - return False - try: - sWord = m.group(1) - nPos = m.start(1) + nOffset - except IndexError: - return False - return morph(dTokenPos, (nPos, sWord), sPatternGroup1, sNegPatternGroup1) - - - -#### Analyse groups for regex rules - -def displayInfo (dTokenPos, tWord): - "for debugging: retrieve info of word" - if not tWord: - echo("> nothing to find") - return True - lMorph = _oSpellChecker.getMorph(tWord[1]) - if not lMorph: - echo("> not in dictionary") - return True - echo("TOKENS:", dTokenPos) - if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]]: - echo("DA: " + str(dTokenPos[tWord[0]]["lMorph"])) - echo("FSA: " + str(lMorph)) - return True - - -def morph (dTokenPos, tWord, sPattern, sNegPattern="", bNoWord=False): - "analyse a tuple (position, word), returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)" - if not tWord: - return bNoWord - lMorph = dTokenPos[tWord[0]]["lMorph"] if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]] else _oSpellChecker.getMorph(tWord[1]) - if not lMorph: - return False - # check negative condition - if sNegPattern: - if sNegPattern == "*": - # all morph must match sPattern - zPattern = re.compile(sPattern) - return all(zPattern.search(sMorph) for sMorph in lMorph) - zNegPattern = re.compile(sNegPattern) - if any(zNegPattern.search(sMorph) for sMorph in lMorph): - return False - # search sPattern - zPattern = re.compile(sPattern) - return any(zPattern.search(sMorph) for sMorph in lMorph) - - -def analyse (sWord, sPattern, sNegPattern=""): - "analyse a word, returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation off)" - lMorph = _oSpellChecker.getMorph(sWord) - if not lMorph: - return False - # check negative condition - if sNegPattern: - if sNegPattern == "*": - zPattern = re.compile(sPattern) - return all(zPattern.search(sMorph) for sMorph in lMorph) - zNegPattern = re.compile(sNegPattern) - if any(zNegPattern.search(sMorph) for sMorph in lMorph): - return False - # search sPattern - zPattern = re.compile(sPattern) - return any(zPattern.search(sMorph) for sMorph in lMorph) - - -#### Analyse tokens for graph rules - -def g_value (dToken, sValues, nLeft=None, nRight=None): - "test if is in sValues (each value should be separated with |)" - sValue = "|"+dToken["sValue"]+"|" if nLeft is None else "|"+dToken["sValue"][slice(nLeft, nRight)]+"|" - if sValue in sValues: - return True - if dToken["sValue"][0:2].istitle(): # we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". - if sValue.lower() in sValues: - return True - elif dToken["sValue"].isupper(): - #if sValue.lower() in sValues: - # return True - sValue = "|"+sValue[1:].capitalize() - if sValue in sValues: - return True - sValue = sValue.lower() - if sValue in sValues: - return True - return False - - -def g_morph (dToken, sPattern, sNegPattern="", nLeft=None, nRight=None, bMemorizeMorph=True): - "analyse a token, return True if not in morphologies and in morphologies" - if "lMorph" in dToken: - lMorph = dToken["lMorph"] - else: - if nLeft is not None: - lMorph = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)]) - if bMemorizeMorph: - dToken["lMorph"] = lMorph - else: - lMorph = _oSpellChecker.getMorph(dToken["sValue"]) - if not lMorph: - return False - # check negative condition - if sNegPattern: - if sNegPattern == "*": - # all morph must match sPattern - zPattern = re.compile(sPattern) - return all(zPattern.search(sMorph) for sMorph in lMorph) - zNegPattern = re.compile(sNegPattern) - if any(zNegPattern.search(sMorph) for sMorph in lMorph): - return False - # search sPattern - zPattern = re.compile(sPattern) - return any(zPattern.search(sMorph) for sMorph in lMorph) - - -def g_analyse (dToken, sPattern, sNegPattern="", nLeft=None, nRight=None, bMemorizeMorph=True): - "analyse a token, return True if not in morphologies and in morphologies (disambiguation off)" - if nLeft is not None: - lMorph = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)]) - if bMemorizeMorph: - dToken["lMorph"] = lMorph - else: - lMorph = _oSpellChecker.getMorph(dToken["sValue"]) - if not lMorph: - return False - # check negative condition - if sNegPattern: - if sNegPattern == "*": - # all morph must match sPattern - zPattern = re.compile(sPattern) - return all(zPattern.search(sMorph) for sMorph in lMorph) - zNegPattern = re.compile(sNegPattern) - if any(zNegPattern.search(sMorph) for sMorph in lMorph): - return False - # search sPattern - zPattern = re.compile(sPattern) - return any(zPattern.search(sMorph) for sMorph in lMorph) - - -def g_merged_analyse (dToken1, dToken2, cMerger, sPattern, sNegPattern="", bSetMorph=True): - "merge two token values, return True if not in morphologies and in morphologies (disambiguation off)" - lMorph = _oSpellChecker.getMorph(dToken1["sValue"] + cMerger + dToken2["sValue"]) - if not lMorph: - return False - # check negative condition - if sNegPattern: - if sNegPattern == "*": - # all morph must match sPattern - zPattern = re.compile(sPattern) - bResult = all(zPattern.search(sMorph) for sMorph in lMorph) - if bResult and bSetMorph: - dToken1["lMorph"] = lMorph - return bResult - zNegPattern = re.compile(sNegPattern) - if any(zNegPattern.search(sMorph) for sMorph in lMorph): - return False - # search sPattern - zPattern = re.compile(sPattern) - bResult = any(zPattern.search(sMorph) for sMorph in lMorph) - if bResult and bSetMorph: - dToken1["lMorph"] = lMorph - return bResult - - -def g_tag_before (dToken, dTags, sTag): - "returns True if is present on tokens before " - if sTag not in dTags: - return False - if dToken["i"] > dTags[sTag][0]: - return True - return False - - -def g_tag_after (dToken, dTags, sTag): - "returns True if is present on tokens after " - if sTag not in dTags: - return False - if dToken["i"] < dTags[sTag][1]: - return True - return False - - -def g_tag (dToken, sTag): - "returns True if is present on token " - return "aTags" in dToken and sTag in dToken["aTags"] - - -def g_meta (dToken, sType): - "returns True if is equal to the token type" - return dToken["sType"] == sType - - -def g_space_between_tokens (dToken1, dToken2, nMin, nMax=None): - "checks if spaces between tokens is >= and <= " - nSpace = dToken2["nStart"] - dToken1["nEnd"] - if nSpace < nMin: - return False - if nMax is not None and nSpace > nMax: - return False - return True - - -def g_token (lToken, i): - "return token at index in lToken (or the closest one)" - if i < 0: - return lToken[0] - if i >= len(lToken): - return lToken[-1] - return lToken[i] - - - -#### Disambiguator for regex rules - -def select (dTokenPos, nPos, sWord, sPattern, lDefault=None): - "Disambiguation: select morphologies of matching " - if not sWord: - return True - if nPos not in dTokenPos: - echo("Error. There should be a token at this position: ", nPos) - return True - lMorph = _oSpellChecker.getMorph(sWord) - if not lMorph or len(lMorph) == 1: - return True - lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ] - if lSelect: - if len(lSelect) != len(lMorph): - dTokenPos[nPos]["lMorph"] = lSelect - elif lDefault: - dTokenPos[nPos]["lMorph"] = lDefault - return True - - -def exclude (dTokenPos, nPos, sWord, sPattern, lDefault=None): - "Disambiguation: exclude morphologies of matching " - if not sWord: - return True - if nPos not in dTokenPos: - echo("Error. There should be a token at this position: ", nPos) - return True - lMorph = _oSpellChecker.getMorph(sWord) - if not lMorph or len(lMorph) == 1: - return True - lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ] - if lSelect: - if len(lSelect) != len(lMorph): - dTokenPos[nPos]["lMorph"] = lSelect - elif lDefault: - dTokenPos[nPos]["lMorph"] = lDefault - return True - - -def define (dTokenPos, nPos, lMorph): - "Disambiguation: set morphologies of token at with " - if nPos not in dTokenPos: - echo("Error. There should be a token at this position: ", nPos) - return True - dTokenPos[nPos]["lMorph"] = lMorph - return True - - -#### Disambiguation for graph rules - -def g_select (dToken, sPattern, lDefault=None): - "Disambiguation: select morphologies for according to , always return True" - lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) - if not lMorph or len(lMorph) == 1: - if lDefault: - dToken["lMorph"] = lDefault - #echo("DA:", dToken["sValue"], dToken["lMorph"]) - return True - lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ] - if lSelect: - if len(lSelect) != len(lMorph): - dToken["lMorph"] = lSelect - elif lDefault: - dToken["lMorph"] = lDefault - #echo("DA:", dToken["sValue"], dToken["lMorph"]) - return True - - -def g_exclude (dToken, sPattern, lDefault=None): - "Disambiguation: select morphologies for according to , always return True" - lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) - if not lMorph or len(lMorph) == 1: - if lDefault: - dToken["lMorph"] = lDefault - #echo("DA:", dToken["sValue"], dToken["lMorph"]) - return True - lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ] - if lSelect: - if len(lSelect) != len(lMorph): - dToken["lMorph"] = lSelect - elif lDefault: - dToken["lMorph"] = lDefault - #echo("DA:", dToken["sValue"], dToken["lMorph"]) - return True - - -def g_add_morph (dToken, lNewMorph): - "Disambiguation: add a morphology to a token" - lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) - lMorph.extend(lNewMorph) - dToken["lMorph"] = lMorph - return True - - -def g_define (dToken, lMorph): - "Disambiguation: set morphologies of , always return True" - dToken["lMorph"] = lMorph - #echo("DA:", dToken["sValue"], lMorph) - return True - - -def g_define_from (dToken, nLeft=None, nRight=None): - "Disambiguation: set morphologies of with slicing its value with and " - if nLeft is not None: - dToken["lMorph"] = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)]) - else: - dToken["lMorph"] = _oSpellChecker.getMorph(dToken["sValue"]) - return True - - -def g_change_meta (dToken, sType): - "Disambiguation: change type of token" - dToken["sType"] = sType - return True - - - -#### GRAMMAR CHECKER PLUGINS - -${plugins} - - -#### CALLABLES FOR REGEX RULES (generated code) - -${callables} - - -#### CALLABLES FOR GRAPH RULES (generated code) - -${graph_callables} ADDED gc_core/py/lang_core/gc_engine_func.py Index: gc_core/py/lang_core/gc_engine_func.py ================================================================== --- /dev/null +++ gc_core/py/lang_core/gc_engine_func.py @@ -0,0 +1,427 @@ +""" +Grammar checking functions +""" + +# generated code, do not edit +# source: gc_core/py/lang_core/gc_engine_func.py + +import re + +from . import gc_options +from ..graphspell.echo import echo + + +_sAppContext = "Python" # what software is running +_oSpellChecker = None + + +def load (sContext, oSpellChecker): + global _sAppContext + global _oSpellChecker + _sAppContext = sContext + _oSpellChecker = oSpellChecker + + +#### common functions + +def option (sOpt): + "return True if option is active" + return gc_options.dOptions.get(sOpt, False) + + +#### Functions to get text outside pattern scope + +# warning: check compile_rules.py to understand how it works + +_zNextWord = re.compile(r" +(\w[\w-]*)") +_zPrevWord = re.compile(r"(\w[\w-]*) +$") + +def nextword (s, iStart, n): + "get the nth word of the input string or empty string" + m = re.match("(?: +[\\w%-]+){" + str(n-1) + "} +([\\w%-]+)", s[iStart:]) + if not m: + return None + return (iStart+m.start(1), m.group(1)) + + +def prevword (s, iEnd, n): + "get the (-)nth word of the input string or empty string" + m = re.search("([\\w%-]+) +(?:[\\w%-]+ +){" + str(n-1) + "}$", s[:iEnd]) + if not m: + return None + return (m.start(1), m.group(1)) + + +def nextword1 (s, iStart): + "get next word (optimization)" + m = _zNextWord.match(s[iStart:]) + if not m: + return None + return (iStart+m.start(1), m.group(1)) + + +def prevword1 (s, iEnd): + "get previous word (optimization)" + m = _zPrevWord.search(s[:iEnd]) + if not m: + return None + return (m.start(1), m.group(1)) + + +def look (s, sPattern, sNegPattern=None): + "seek sPattern in s (before/after/fulltext), if sNegPattern not in s" + if sNegPattern and re.search(sNegPattern, s): + return False + if re.search(sPattern, s): + return True + return False + + +def look_chk1 (dTokenPos, s, nOffset, sPattern, sPatternGroup1, sNegPatternGroup1=""): + "returns True if s has pattern sPattern and m.group(1) has pattern sPatternGroup1" + m = re.search(sPattern, s) + if not m: + return False + try: + sWord = m.group(1) + nPos = m.start(1) + nOffset + except IndexError: + return False + return morph(dTokenPos, (nPos, sWord), sPatternGroup1, sNegPatternGroup1) + + + +#### Analyse groups for regex rules + +def displayInfo (dTokenPos, tWord): + "for debugging: retrieve info of word" + if not tWord: + print("> nothing to find") + return True + lMorph = _oSpellChecker.getMorph(tWord[1]) + if not lMorph: + print("> not in dictionary") + return True + print("TOKENS:", dTokenPos) + if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]]: + print("DA: " + str(dTokenPos[tWord[0]]["lMorph"])) + print("FSA: " + str(lMorph)) + return True + + +def morph (dTokenPos, tWord, sPattern, sNegPattern="", bNoWord=False): + "analyse a tuple (position, word), returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)" + if not tWord: + return bNoWord + lMorph = dTokenPos[tWord[0]]["lMorph"] if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]] else _oSpellChecker.getMorph(tWord[1]) + if not lMorph: + return False + # check negative condition + if sNegPattern: + if sNegPattern == "*": + # all morph must match sPattern + zPattern = re.compile(sPattern) + return all(zPattern.search(sMorph) for sMorph in lMorph) + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(sMorph) for sMorph in lMorph): + return False + # search sPattern + zPattern = re.compile(sPattern) + return any(zPattern.search(sMorph) for sMorph in lMorph) + + +def analyse (sWord, sPattern, sNegPattern=""): + "analyse a word, returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation off)" + lMorph = _oSpellChecker.getMorph(sWord) + if not lMorph: + return False + # check negative condition + if sNegPattern: + if sNegPattern == "*": + zPattern = re.compile(sPattern) + return all(zPattern.search(sMorph) for sMorph in lMorph) + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(sMorph) for sMorph in lMorph): + return False + # search sPattern + zPattern = re.compile(sPattern) + return any(zPattern.search(sMorph) for sMorph in lMorph) + + +#### Analyse tokens for graph rules + +def g_value (dToken, sValues, nLeft=None, nRight=None): + "test if is in sValues (each value should be separated with |)" + sValue = "|"+dToken["sValue"]+"|" if nLeft is None else "|"+dToken["sValue"][slice(nLeft, nRight)]+"|" + if sValue in sValues: + return True + if dToken["sValue"][0:2].istitle(): # we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". + if sValue.lower() in sValues: + return True + elif dToken["sValue"].isupper(): + #if sValue.lower() in sValues: + # return True + sValue = "|"+sValue[1:].capitalize() + if sValue in sValues: + return True + sValue = sValue.lower() + if sValue in sValues: + return True + return False + + +def g_morph (dToken, sPattern, sNegPattern="", nLeft=None, nRight=None, bMemorizeMorph=True): + "analyse a token, return True if not in morphologies and in morphologies" + if "lMorph" in dToken: + lMorph = dToken["lMorph"] + else: + if nLeft is not None: + lMorph = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)]) + if bMemorizeMorph: + dToken["lMorph"] = lMorph + else: + lMorph = _oSpellChecker.getMorph(dToken["sValue"]) + if not lMorph: + return False + # check negative condition + if sNegPattern: + if sNegPattern == "*": + # all morph must match sPattern + zPattern = re.compile(sPattern) + return all(zPattern.search(sMorph) for sMorph in lMorph) + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(sMorph) for sMorph in lMorph): + return False + # search sPattern + zPattern = re.compile(sPattern) + return any(zPattern.search(sMorph) for sMorph in lMorph) + + +def g_analyse (dToken, sPattern, sNegPattern="", nLeft=None, nRight=None, bMemorizeMorph=True): + "analyse a token, return True if not in morphologies and in morphologies (disambiguation off)" + if nLeft is not None: + lMorph = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)]) + if bMemorizeMorph: + dToken["lMorph"] = lMorph + else: + lMorph = _oSpellChecker.getMorph(dToken["sValue"]) + if not lMorph: + return False + # check negative condition + if sNegPattern: + if sNegPattern == "*": + # all morph must match sPattern + zPattern = re.compile(sPattern) + return all(zPattern.search(sMorph) for sMorph in lMorph) + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(sMorph) for sMorph in lMorph): + return False + # search sPattern + zPattern = re.compile(sPattern) + return any(zPattern.search(sMorph) for sMorph in lMorph) + + +def g_merged_analyse (dToken1, dToken2, cMerger, sPattern, sNegPattern="", bSetMorph=True): + "merge two token values, return True if not in morphologies and in morphologies (disambiguation off)" + lMorph = _oSpellChecker.getMorph(dToken1["sValue"] + cMerger + dToken2["sValue"]) + if not lMorph: + return False + # check negative condition + if sNegPattern: + if sNegPattern == "*": + # all morph must match sPattern + zPattern = re.compile(sPattern) + bResult = all(zPattern.search(sMorph) for sMorph in lMorph) + if bResult and bSetMorph: + dToken1["lMorph"] = lMorph + return bResult + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(sMorph) for sMorph in lMorph): + return False + # search sPattern + zPattern = re.compile(sPattern) + bResult = any(zPattern.search(sMorph) for sMorph in lMorph) + if bResult and bSetMorph: + dToken1["lMorph"] = lMorph + return bResult + + +def g_tag_before (dToken, dTags, sTag): + "returns True if is present on tokens before " + if sTag not in dTags: + return False + if dToken["i"] > dTags[sTag][0]: + return True + return False + + +def g_tag_after (dToken, dTags, sTag): + "returns True if is present on tokens after " + if sTag not in dTags: + return False + if dToken["i"] < dTags[sTag][1]: + return True + return False + + +def g_tag (dToken, sTag): + "returns True if is present on token " + return "aTags" in dToken and sTag in dToken["aTags"] + + +def g_meta (dToken, sType): + "returns True if is equal to the token type" + return dToken["sType"] == sType + + +def g_space_between_tokens (dToken1, dToken2, nMin, nMax=None): + "checks if spaces between tokens is >= and <= " + nSpace = dToken2["nStart"] - dToken1["nEnd"] + if nSpace < nMin: + return False + if nMax is not None and nSpace > nMax: + return False + return True + + +def g_token (lToken, i): + "return token at index in lToken (or the closest one)" + if i < 0: + return lToken[0] + if i >= len(lToken): + return lToken[-1] + return lToken[i] + + + +#### Disambiguator for regex rules + +def select (dTokenPos, nPos, sWord, sPattern, lDefault=None): + "Disambiguation: select morphologies of matching " + if not sWord: + return True + if nPos not in dTokenPos: + print("Error. There should be a token at this position: ", nPos) + return True + lMorph = _oSpellChecker.getMorph(sWord) + if not lMorph or len(lMorph) == 1: + return True + lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ] + if lSelect: + if len(lSelect) != len(lMorph): + dTokenPos[nPos]["lMorph"] = lSelect + elif lDefault: + dTokenPos[nPos]["lMorph"] = lDefault + return True + + +def exclude (dTokenPos, nPos, sWord, sPattern, lDefault=None): + "Disambiguation: exclude morphologies of matching " + if not sWord: + return True + if nPos not in dTokenPos: + print("Error. There should be a token at this position: ", nPos) + return True + lMorph = _oSpellChecker.getMorph(sWord) + if not lMorph or len(lMorph) == 1: + return True + lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ] + if lSelect: + if len(lSelect) != len(lMorph): + dTokenPos[nPos]["lMorph"] = lSelect + elif lDefault: + dTokenPos[nPos]["lMorph"] = lDefault + return True + + +def define (dTokenPos, nPos, lMorph): + "Disambiguation: set morphologies of token at with " + if nPos not in dTokenPos: + print("Error. There should be a token at this position: ", nPos) + return True + dTokenPos[nPos]["lMorph"] = lMorph + return True + + +#### Disambiguation for graph rules + +def g_select (dToken, sPattern, lDefault=None): + "Disambiguation: select morphologies for according to , always return True" + lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) + if not lMorph or len(lMorph) == 1: + if lDefault: + dToken["lMorph"] = lDefault + #print("DA:", dToken["sValue"], dToken["lMorph"]) + return True + lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ] + if lSelect: + if len(lSelect) != len(lMorph): + dToken["lMorph"] = lSelect + elif lDefault: + dToken["lMorph"] = lDefault + #print("DA:", dToken["sValue"], dToken["lMorph"]) + return True + + +def g_exclude (dToken, sPattern, lDefault=None): + "Disambiguation: select morphologies for according to , always return True" + lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) + if not lMorph or len(lMorph) == 1: + if lDefault: + dToken["lMorph"] = lDefault + #print("DA:", dToken["sValue"], dToken["lMorph"]) + return True + lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ] + if lSelect: + if len(lSelect) != len(lMorph): + dToken["lMorph"] = lSelect + elif lDefault: + dToken["lMorph"] = lDefault + #print("DA:", dToken["sValue"], dToken["lMorph"]) + return True + + +def g_add_morph (dToken, lNewMorph): + "Disambiguation: add a morphology to a token" + lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) + lMorph.extend(lNewMorph) + dToken["lMorph"] = lMorph + return True + + +def g_define (dToken, lMorph): + "Disambiguation: set morphologies of , always return True" + dToken["lMorph"] = lMorph + #print("DA:", dToken["sValue"], lMorph) + return True + + +def g_define_from (dToken, nLeft=None, nRight=None): + "Disambiguation: set morphologies of with slicing its value with and " + if nLeft is not None: + dToken["lMorph"] = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)]) + else: + dToken["lMorph"] = _oSpellChecker.getMorph(dToken["sValue"]) + return True + + +def g_change_meta (dToken, sType): + "Disambiguation: change type of token" + dToken["sType"] = sType + return True + + + +#### GRAMMAR CHECKER PLUGINS + +${plugins} + + +#### CALLABLES FOR REGEX RULES (generated code) + +${callables} + + +#### CALLABLES FOR GRAPH RULES (generated code) + +${graph_callables} Index: gc_core/py/lang_core/gc_options.py ================================================================== --- gc_core/py/lang_core/gc_options.py +++ gc_core/py/lang_core/gc_options.py @@ -1,42 +1,89 @@ """ Grammar checker default options """ # generated code, do not edit +# source: gc_core/py/lang_core/gc_options.py import traceback -def getUI (sLang): + +dOptions = {} + +_sAppContext = "Python" + + +def load (sContext="Python"): + global dOptions + global _sAppContext + _sAppContext = sContext + dOptions = getDefaultOptions(sContext) + + +def setOption (sOpt, bVal): + "set option with if it exists" + if sOpt in dOptions: + dOptions[sOpt] = bVal + + +def setOptions (dOpt): + "update the dictionary of options with , only known options are updated" + for sKey, bVal in dOpt.items(): + if sKey in dOptions: + dOptions[sKey] = bVal + + +def getOptions (): + "return a copy of options as dictionary" + return dOptions.copy() + + +def resetOptions (): + "set options to default values" + global dOptions + dOptions = getDefaultOptions() + + +def displayOptions (sLang="${lang}"): + "display the list of grammar checking options" + print("Options:") + print("\n".join( [ k+":\t"+str(v)+"\t"+getOptionLabels(sLang).get(k, ("?", ""))[0] for k, v in sorted(dOptions.items()) ] )) + print("") + + +def getOptionLabels (sLang="${sLang}"): "returns dictionary of UI labels" if sLang in _dOptLabel: return _dOptLabel[sLang] - return _dOptLabel["fr"] + return _dOptLabel["${sLang}"] -def getOptions (sContext="Python"): +def getDefaultOptions (sContext=""): "returns dictionary of options" - if sContext in _dOpt: - return _dOpt[sContext] - return _dOpt["Python"] + if not sContext: + sContext = _sAppContext + if sContext in _dDefaultOpt: + return _dDefaultOpt[sContext].copy() # duplication necessary, to be able to reset to default + return _dDefaultOpt["Python"].copy() # duplication necessary, to be able to reset to default def getOptionsColors (sTheme="Default", sColorType="aRGB"): "returns dictionary of options colors" dOptColor = _dOptColor[sTheme] if sTheme in _dOptColor else _dOptColor["Default"] dColorType = _dColorType[sColorType] if sColorType in _dColorType else _dColorType["aRGB"] try: - return { sOpt: dColorType[sColor] for sOpt, sColor in dOptColor.items() } + return { sOpt: dColorType[sColor] for sOpt, sColor in dOptColor.items() } except KeyError: traceback.print_exc() return {} lStructOpt = ${lStructOpt} -_dOpt = { +_dDefaultOpt = { "Python": ${dOptPython}, "Server": ${dOptServer}, "Writer": ${dOptWriter} } Index: gc_core/py/oxt/Grammalecte.py ================================================================== --- gc_core/py/oxt/Grammalecte.py +++ gc_core/py/oxt/Grammalecte.py @@ -13,36 +13,36 @@ from com.sun.star.linguistic2 import ProofreadingResult from com.sun.star.lang import XServiceInfo, XServiceName, XServiceDisplayName from com.sun.star.lang import Locale import helpers -import grammalecte.${lang} as gce +import grammalecte.${lang} as gc_engine #import lightproof_handler_${implname} as opt_handler import Options class Grammalecte (unohelper.Base, XProofreader, XServiceInfo, XServiceName, XServiceDisplayName, XSupportedLocales): def __init__ (self, ctx, *args): self.ctx = ctx self.ServiceName = "com.sun.star.linguistic2.Proofreader" - self.ImplementationName = "org.openoffice.comp.pyuno.Lightproof." + gce.pkg + self.ImplementationName = "org.openoffice.comp.pyuno.Lightproof." + gc_engine.pkg self.SupportedServiceNames = (self.ServiceName, ) self.locales = [] - for i in gce.locales: - l = gce.locales[i] + for i in gc_engine.locales: + l = gc_engine.locales[i] self.locales.append(Locale(l[0], l[1], l[2])) self.locales = tuple(self.locales) # debug #helpers.startConsole() # init - gce.load("Writer", "nInt") + gc_engine.load("Writer", "nInt") # GC options #xContext = uno.getComponentContext() #opt_handler.load(xContext) dOpt = Options.loadOptions("${lang}") - gce.setOptions(dOpt) + gc_engine.gc_options.setOptions(dOpt) # dictionaries options self.loadUserDictionaries() # underlining options self.setWriterUnderliningStyle() # store for results of big paragraphs @@ -109,11 +109,11 @@ # WORKAROUND ->>> xRes.nBehindEndOfSentencePosition = xRes.nStartOfNextSentencePosition try: - xRes.aErrors = tuple(gce.parse(rText, rLocale.Country)) + xRes.aErrors = tuple(gc_engine.parse(rText, rLocale.Country)) # ->>> WORKAROUND if xRes.nStartOfNextSentencePosition > 3000: self.dResult[nHashedVal] = xRes self.nRes += 1 if self.nRes > self.nMaxRes: @@ -124,31 +124,31 @@ except: traceback.print_exc() return xRes def ignoreRule (self, rid, aLocale): - gce.ignoreRule(rid) + gc_engine.ignoreRule(rid) def resetIgnoreRules (self): - gce.resetIgnoreRules() + gc_engine.resetIgnoreRules() # XServiceDisplayName def getServiceDisplayName (self, aLocale): - return gce.name + return gc_engine.name # Grammalecte def getSpellChecker (self): - return gce.getSpellChecker() + return gc_engine.getSpellChecker() def loadUserDictionaries (self): try: xSettingNode = helpers.getConfigSetting("/org.openoffice.Lightproof_${implname}/Other/", False) xChild = xSettingNode.getByName("o_${lang}") if xChild.getPropertyValue("use_personal_dic"): sJSON = xChild.getPropertyValue("personal_dic") if sJSON: - oSpellChecker = gce.getSpellChecker(); + oSpellChecker = gc_engine.getSpellChecker(); oSpellChecker.setPersonalDictionary(json.loads(sJSON)) except: traceback.print_exc() def setWriterUnderliningStyle (self): @@ -155,15 +155,15 @@ try: xSettingNode = helpers.getConfigSetting("/org.openoffice.Lightproof_${implname}/Other/", False) xChild = xSettingNode.getByName("o_${lang}") sLineType = xChild.getPropertyValue("line_type") bMulticolor = bool(xChild.getPropertyValue("line_multicolor")) - gce.setWriterUnderliningStyle(sLineType, bMulticolor) + gc_engine.setWriterUnderliningStyle(sLineType, bMulticolor) except: traceback.print_exc() g_ImplementationHelper = unohelper.ImplementationHelper() -g_ImplementationHelper.addImplementation(Grammalecte, "org.openoffice.comp.pyuno.Lightproof."+gce.pkg, ("com.sun.star.linguistic2.Proofreader",),) +g_ImplementationHelper.addImplementation(Grammalecte, "org.openoffice.comp.pyuno.Lightproof."+gc_engine.pkg, ("com.sun.star.linguistic2.Proofreader",),) # g_ImplementationHelper.addImplementation( opt_handler.LightproofOptionsEventHandler, \ -# "org.openoffice.comp.pyuno.LightproofOptionsEventHandler." + gce.pkg, ("com.sun.star.awt.XContainerWindowEventHandler",),) +# "org.openoffice.comp.pyuno.LightproofOptionsEventHandler." + gc_engine.pkg, ("com.sun.star.awt.XContainerWindowEventHandler",),) Index: gc_core/py/oxt/Options.py ================================================================== --- gc_core/py/oxt/Options.py +++ gc_core/py/oxt/Options.py @@ -11,30 +11,30 @@ import helpers import op_strings try: - import grammalecte.${lang} as gce + import grammalecte.${lang} as gc_engine except: traceback.print_exc() def loadOptions (sLang): "load options from Grammalecte and change them according to LibreOffice settings, returns a dictionary {option_name: boolean}" try: xNode = helpers.getConfigSetting("/org.openoffice.Lightproof_${implname}/Leaves", False) xChild = xNode.getByName(sLang) - dOpt = gce.gc_options.getOptions("Writer") + dOpt = gc_engine.gc_options.getDefaultOptions("Writer") for sKey in dOpt: sValue = xChild.getPropertyValue(sKey) if sValue != '': dOpt[sKey] = bool(int(sValue)) return dOpt except: print("# Error. Unable to load options of language:", sLang) traceback.print_exc() - return gce.gc_options.getOptions("Writer") + return gc_engine.gc_options.getDefaultOptions("Writer") def saveOptions (sLang, dOpt): "save options in LibreOffice profile" try: @@ -70,11 +70,11 @@ return xWidget def run (self, sUI): try: dUI = op_strings.getUI(sUI) - dOptionUI = gce.gc_options.getUI(sUI) + dOptionUI = gc_engine.gc_options.getOptionLabels(sUI) # fonts xFDTitle = uno.createUnoStruct("com.sun.star.awt.FontDescriptor") xFDTitle.Height = 9 xFDTitle.Weight = uno.getConstantByName("com.sun.star.awt.FontWeight.BOLD") @@ -99,11 +99,11 @@ self.lOptionWidgets = [] sProdName, sVersion = helpers.getProductNameAndVersion() if True: # no tab available (bug) - for sOptionType, lOptions in gce.gc_options.lStructOpt: + for sOptionType, lOptions in gc_engine.gc_options.lStructOpt: x = 10 y += 10 self._addWidget(sOptionType, 'FixedLine', x, y, nWidth, nHeight, Label = dOptionUI.get(sOptionType, "#err")[0], FontDescriptor= xFDTitle) y += 3 for lOptLine in lOptions: @@ -162,11 +162,11 @@ # XActionListener def actionPerformed (self, xActionEvent): try: if xActionEvent.ActionCommand == 'Default': - self._setWidgets(gce.gc_options.getOptions("Writer")) + self._setWidgets(gc_engine.gc_options.getDefaultOptions("Writer")) elif xActionEvent.ActionCommand == 'Apply': self._save("${lang}") self.xContainer.endExecute() elif xActionEvent.ActionCommand == 'Cancel': self.xContainer.endExecute() @@ -181,8 +181,8 @@ w.State = dOpt.get(w.Name, False) def _save (self, sLang): try: saveOptions(sLang, { w.Name: str(w.State) for w in self.lOptionWidgets }) - gce.setOptions({ w.Name: bool(w.State) for w in self.lOptionWidgets }) + gc_engine.gc_options.setOptions({ w.Name: bool(w.State) for w in self.lOptionWidgets }) except: traceback.print_exc() Index: gc_lang/fr/config.ini ================================================================== --- gc_lang/fr/config.ini +++ gc_lang/fr/config.ini @@ -4,11 +4,11 @@ locales = fr_FR fr_BE fr_CA fr_CH fr_LU fr_BF fr_BJ fr_CD fr_CI fr_CM fr_MA fr_ML fr_MU fr_NE fr_RE fr_SN fr_TG country_default = FR name = Grammalecte implname = grammalecte # always use 3 numbers for version: x.y.z -version = 1.9.0 +version = 2.0.0 author = Olivier R. provider = Grammalecte.net link = https://grammalecte.net description = Correcteur grammatical, orthographique et typographique pour le français. extras = README_fr.txt Index: gc_lang/fr/mailext/worker/gce_worker.js ================================================================== --- gc_lang/fr/mailext/worker/gce_worker.js +++ gc_lang/fr/mailext/worker/gce_worker.js @@ -43,10 +43,11 @@ // no console here, use “dump” let gce = null; // module: grammar checker engine +let gco = null; let text = null; let tkz = null; // module: tokenizer let lxg = null; // module: lexicographer let helpers = null; @@ -55,23 +56,24 @@ let oLxg = null; function loadGrammarChecker (sGCOptions="", sContext="JavaScript") { if (gce === null) { try { + gco = require("resource://grammalecte/fr/gc_options.js"); gce = require("resource://grammalecte/fr/gc_engine.js"); helpers = require("resource://grammalecte/graphspell/helpers.js"); text = require("resource://grammalecte/text.js"); tkz = require("resource://grammalecte/graphspell/tokenizer.js"); //lxg = require("resource://grammalecte/fr/lexicographe.js"); oTokenizer = new tkz.Tokenizer("fr"); gce.load(sContext, "sCSS"); oSpellChecker = gce.getSpellChecker(); if (sGCOptions !== "") { - gce.setOptions(helpers.objectToMap(JSON.parse(sGCOptions))); + gco.setOptions(helpers.objectToMap(JSON.parse(sGCOptions))); } // we always retrieve options from the gce, for setOptions filters obsolete options - return gce.getOptions().gl_toString(); + return gco.getOptions().gl_toString(); } catch (e) { console.log("# Error: " + e.fileName + "\n" + e.name + "\nline: " + e.lineNumber + "\n" + e.message); } } @@ -117,45 +119,45 @@ } return lSugg.join("|"); } function getOptions () { - return gce.getOptions().gl_toString(); + return gco.getOptions().gl_toString(); } function getDefaultOptions () { - return gce.getDefaultOptions().gl_toString(); + return gco.getDefaultOptions().gl_toString(); } function setOptions (sGCOptions) { - gce.setOptions(helpers.objectToMap(JSON.parse(sGCOptions))); - return gce.getOptions().gl_toString(); + gco.setOptions(helpers.objectToMap(JSON.parse(sGCOptions))); + return gco.getOptions().gl_toString(); } function setOption (sOptName, bValue) { - gce.setOptions(new Map([ [sOptName, bValue] ])); - return gce.getOptions().gl_toString(); + gco.setOptions(new Map([ [sOptName, bValue] ])); + return gco.getOptions().gl_toString(); } function resetOptions () { - gce.resetOptions(); - return gce.getOptions().gl_toString(); + gco.resetOptions(); + return gco.getOptions().gl_toString(); } function fullTests (sGCOptions="") { - if (!gce || !oSpellChecker) { + if (!gce || !oSpellChecker || !gco) { return "# Error: grammar checker or dictionary not loaded." } - let dMemoOptions = gce.getOptions(); + let dMemoOptions = gco.getOptions(); if (sGCOptions) { - gce.setOptions(helpers.objectToMap(JSON.parse(sGCOptions))); + gco.setOptions(helpers.objectToMap(JSON.parse(sGCOptions))); } let tests = require("resource://grammalecte/tests.js"); let oTest = new tests.TestGrammarChecking(gce); let sAllRes = ""; for (let sRes of oTest.testParse()) { console.log(sRes+"\n"); sAllRes += sRes+"\n"; } - gce.setOptions(dMemoOptions); + gco.setOptions(dMemoOptions); return sAllRes; } DELETED gc_lang/fr/modules-js/lexicographe.js Index: gc_lang/fr/modules-js/lexicographe.js ================================================================== --- gc_lang/fr/modules-js/lexicographe.js +++ /dev/null @@ -1,599 +0,0 @@ -// Grammalecte - Lexicographe -// License: MPL 2 - -/* jshint esversion:6, -W097 */ -/* jslint esversion:6 */ -/* global require, exports, console */ - -"use strict"; - -${string} -${map} - - -const _dTag = new Map([ - [':N', [" nom,", "Nom"]], - [':A', [" adjectif,", "Adjectif"]], - [':M1', [" prénom,", "Prénom"]], - [':M2', [" patronyme,", "Patronyme, matronyme, nom de famille…"]], - [':MP', [" nom propre,", "Nom propre"]], - [':W', [" adverbe,", "Adverbe"]], - [':J', [" interjection,", "Interjection"]], - [':B', [" nombre,", "Nombre"]], - [':T', [" titre,", "Titre de civilité"]], - - [':e', [" épicène", "épicène"]], - [':m', [" masculin", "masculin"]], - [':f', [" féminin", "féminin"]], - [':s', [" singulier", "singulier"]], - [':p', [" pluriel", "pluriel"]], - [':i', [" invariable", "invariable"]], - - [':V1', [" verbe (1ᵉʳ gr.),", "Verbe du 1ᵉʳ groupe"]], - [':V2', [" verbe (2ᵉ gr.),", "Verbe du 2ᵉ groupe"]], - [':V3', [" verbe (3ᵉ gr.),", "Verbe du 3ᵉ groupe"]], - [':V0e', [" verbe,", "Verbe auxiliaire être"]], - [':V0a', [" verbe,", "Verbe auxiliaire avoir"]], - - [':Y', [" infinitif,", "infinitif"]], - [':P', [" participe présent,", "participe présent"]], - [':Q', [" participe passé,", "participe passé"]], - [':Ip', [" présent,", "indicatif présent"]], - [':Iq', [" imparfait,", "indicatif imparfait"]], - [':Is', [" passé simple,", "indicatif passé simple"]], - [':If', [" futur,", "indicatif futur"]], - [':K', [" conditionnel présent,", "conditionnel présent"]], - [':Sp', [" subjonctif présent,", "subjonctif présent"]], - [':Sq', [" subjonctif imparfait,", "subjonctif imparfait"]], - [':E', [" impératif,", "impératif"]], - - [':1s', [" 1ʳᵉ p. sg.,", "verbe : 1ʳᵉ personne du singulier"]], - [':1ŝ', [" présent interr. 1ʳᵉ p. sg.,", "verbe : 1ʳᵉ personne du singulier (présent interrogatif)"]], - [':1ś', [" présent interr. 1ʳᵉ p. sg.,", "verbe : 1ʳᵉ personne du singulier (présent interrogatif)"]], - [':2s', [" 2ᵉ p. sg.,", "verbe : 2ᵉ personne du singulier"]], - [':3s', [" 3ᵉ p. sg.,", "verbe : 3ᵉ personne du singulier"]], - [':1p', [" 1ʳᵉ p. pl.,", "verbe : 1ʳᵉ personne du pluriel"]], - [':2p', [" 2ᵉ p. pl.,", "verbe : 2ᵉ personne du pluriel"]], - [':3p', [" 3ᵉ p. pl.,", "verbe : 3ᵉ personne du pluriel"]], - [':3p!', [" 3ᵉ p. pl.,", "verbe : 3ᵉ personne du pluriel (prononciation distinctive)"]], - - [':G', ["[mot grammatical]", "Mot grammatical"]], - [':X', [" adverbe de négation,", "Adverbe de négation"]], - [':U', [" adverbe interrogatif,", "Adverbe interrogatif"]], - [':R', [" préposition,", "Préposition"]], - [':Rv', [" préposition verbale,", "Préposition verbale"]], - [':D', [" déterminant,", "Déterminant"]], - [':Dd', [" déterminant démonstratif,", "Déterminant démonstratif"]], - [':De', [" déterminant exclamatif,", "Déterminant exclamatif"]], - [':Dp', [" déterminant possessif,", "Déterminant possessif"]], - [':Di', [" déterminant indéfini,", "Déterminant indéfini"]], - [':Dn', [" déterminant négatif,", "Déterminant négatif"]], - [':Od', [" pronom démonstratif,", "Pronom démonstratif"]], - [':Oi', [" pronom indéfini,", "Pronom indéfini"]], - [':On', [" pronom indéfini négatif,", "Pronom indéfini négatif"]], - [':Ot', [" pronom interrogatif,", "Pronom interrogatif"]], - [':Or', [" pronom relatif,", "Pronom relatif"]], - [':Ow', [" pronom adverbial,", "Pronom adverbial"]], - [':Os', [" pronom personnel sujet,", "Pronom personnel sujet"]], - [':Oo', [" pronom personnel objet,", "Pronom personnel objet"]], - [':Ov', [" préverbe,", "Préverbe (pronom personnel objet, +ne)"]], - [':O1', [" 1ʳᵉ pers.,", "Pronom : 1ʳᵉ personne"]], - [':O2', [" 2ᵉ pers.,", "Pronom : 2ᵉ personne"]], - [':O3', [" 3ᵉ pers.,", "Pronom : 3ᵉ personne"]], - [':C', [" conjonction,", "Conjonction"]], - [':Ĉ', [" conjonction (él.),", "Conjonction (élément)"]], - [':Cc', [" conjonction de coordination,", "Conjonction de coordination"]], - [':Cs', [" conjonction de subordination,", "Conjonction de subordination"]], - [':Ĉs', [" conjonction de subordination (él.),", "Conjonction de subordination (élément)"]], - - [':Ñ', [" locution nominale (él.),", "Locution nominale (élément)"]], - [':Â', [" locution adjectivale (él.),", "Locution adjectivale (élément)"]], - [':Ṽ', [" locution verbale (él.),", "Locution verbale (élément)"]], - [':Ŵ', [" locution adverbiale (él.),", "Locution adverbiale (élément)"]], - [':Ŕ', [" locution prépositive (él.),", "Locution prépositive (élément)"]], - [':Ĵ', [" locution interjective (él.),", "Locution interjective (élément)"]], - - [':Zp', [" préfixe,", "Préfixe"]], - [':Zs', [" suffixe,", "Suffixe"]], - - [':H', ["", ""]], - - [':@', ["", ""]], - [':@p', ["signe de ponctuation", "Signe de ponctuation"]], - [':@s', ["signe", "Signe divers"]], - - [';S', [" : symbole (unité de mesure)", "Symbole (unité de mesure)"]], - - ['/*', ["", "Sous-dictionnaire "]], - ['/C', [" ", "Sous-dictionnaire "]], - ['/M', ["", "Sous-dictionnaire "]], - ['/R', [" ", "Sous-dictionnaire "]], - ['/A', ["", "Sous-dictionnaire "]], - ['/X', ["", "Sous-dictionnaire "]] -]); - - -const _dLocTag = new Map([ - [':L', "locution"], - [':LN', "locution nominale"], - [':LA', "locution adjectivale"], - [':LV', "locution verbale"], - [':LW', "locution adverbiale"], - [':LR', "locution prépositive"], - [':LRv', "locution prépositive verbale"], - [':LO', "locution pronominale"], - [':LC', "locution conjonctive"], - [':LJ', "locution interjective"], - - [':B', " cardinale"], - [':e', " épicène"], - [':m', " masculine"], - [':f', " féminine"], - [':s', " singulière"], - [':p', " plurielle"], - [':i', " invariable"], - ['/L', " (latin)"] -]); - -const _dLocVerb = new Map([ - ['i', " intransitive"], - ['n', " transitive indirecte"], - ['t', " transitive directe"], - ['p', " pronominale"], - ['m', " impersonnelle"], -]); - -const _dElidedPrefix = new Map([ - ['d', "(de), déterminant épicène invariable"], - ['l', "(le/la), déterminant masculin/féminin singulier"], - ['j', "(je), pronom personnel sujet, 1ʳᵉ pers., épicène singulier"], - ['m', "(me), pronom personnel objet, 1ʳᵉ pers., épicène singulier"], - ['t', "(te), pronom personnel objet, 2ᵉ pers., épicène singulier"], - ['s', "(se), pronom personnel objet, 3ᵉ pers., épicène singulier/pluriel"], - ['n', "(ne), adverbe de négation"], - ['c', "(ce), pronom démonstratif, masculin singulier/pluriel"], - ['ç', "(ça), pronom démonstratif, masculin singulier"], - ['qu', "(que), conjonction de subordination"], - ['lorsqu', "(lorsque), conjonction de subordination"], - ['puisqu', "(lorsque), conjonction de subordination"], - ['quoiqu', "(quoique), conjonction de subordination"], - ['jusqu', "(jusque), préposition"] -]); - -const _dPronoms = new Map([ - ['je', " pronom personnel sujet, 1ʳᵉ pers. sing."], - ['tu', " pronom personnel sujet, 2ᵉ pers. sing."], - ['il', " pronom personnel sujet, 3ᵉ pers. masc. sing."], - ['on', " pronom personnel sujet, 3ᵉ pers. sing. ou plur."], - ['elle', " pronom personnel sujet, 3ᵉ pers. fém. sing."], - ['nous', " pronom personnel sujet/objet, 1ʳᵉ pers. plur."], - ['vous', " pronom personnel sujet/objet, 2ᵉ pers. plur."], - ['ils', " pronom personnel sujet, 3ᵉ pers. masc. plur."], - ['elles', " pronom personnel sujet, 3ᵉ pers. masc. plur."], - - ["là", " particule démonstrative"], - ["ci", " particule démonstrative"], - - ['le', " COD, masc. sing."], - ['la', " COD, fém. sing."], - ['les', " COD, plur."], - - ['moi', " COI (à moi), sing."], - ['toi', " COI (à toi), sing."], - ['lui', " COI (à lui ou à elle), sing."], - ['nous2', " COI (à nous), plur."], - ['vous2', " COI (à vous), plur."], - ['leur', " COI (à eux ou à elles), plur."], - - ['y', " pronom adverbial"], - ["m'y", " (me) pronom personnel objet + (y) pronom adverbial"], - ["t'y", " (te) pronom personnel objet + (y) pronom adverbial"], - ["s'y", " (se) pronom personnel objet + (y) pronom adverbial"], - - ['en', " pronom adverbial"], - ["m'en", " (me) pronom personnel objet + (en) pronom adverbial"], - ["t'en", " (te) pronom personnel objet + (en) pronom adverbial"], - ["s'en", " (se) pronom personnel objet + (en) pronom adverbial"] -]); - -const _dChar = new Map([ - ['.', "point"], - ['·', "point médian"], - ['…', "points de suspension"], - [':', "deux-points"], - [';', "point-virgule"], - [',', "virgule"], - ['?', "point d’interrogation"], - ['!', "point d’exclamation"], - ['(', "parenthèse ouvrante"], - [')', "parenthèse fermante"], - ['[', "crochet ouvrant"], - [']', "crochet fermant"], - ['{', "accolade ouvrante"], - ['}', "accolade fermante"], - ['-', "tiret"], - ['—', "tiret cadratin"], - ['–', "tiret demi-cadratin"], - ['«', "guillemet ouvrant (chevrons)"], - ['»', "guillemet fermant (chevrons)"], - ['“', "guillemet ouvrant double"], - ['”', "guillemet fermant double"], - ['‘', "guillemet ouvrant"], - ['’', "guillemet fermant"], - ['"', "guillemets droits (déconseillé en typographie)"], - ['/', "signe de la division"], - ['+', "signe de l’addition"], - ['*', "signe de la multiplication"], - ['=', "signe de l’égalité"], - ['<', "inférieur à"], - ['>', "supérieur à"], - ['⩽', "inférieur ou égal à"], - ['⩾', "supérieur ou égal à"], - ['%', "signe de pourcentage"], - ['‰', "signe pour mille"], -]); - - -class Lexicographe { - - constructor (oSpellChecker, oTokenizer, oLocGraph) { - this.oSpellChecker = oSpellChecker; - this.oTokenizer = oTokenizer; - this.oLocGraph = JSON.parse(oLocGraph); - - this._zPartDemForm = new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)-(là|ci)$", "i"); - this._aPartDemExceptList = new Set(["celui", "celle", "ceux", "celles", "de", "jusque", "par", "marie-couche-toi"]); - this._zInterroVerb = new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)-(t-(?:il|elle|on)|je|tu|ils?|elles?|on|[nv]ous)$", "i"); - this._zImperatifVerb = new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)-((?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts][’'](?:y|en)|les?|la|[mt]oi|leur|lui)$", "i"); - this._zTag = new RegExp("[:;/][a-zA-Z0-9ÑÂĴĈŔÔṼŴ!][^:;/]*", "g"); - } - - getInfoForToken (oToken) { - // Token: .sType, .sValue, .nStart, .nEnd - // return a object {sType, sValue, aLabel} - let m = null; - try { - switch (oToken.sType) { - case 'PUNC': - case 'SIGN': - return { - sType: oToken.sType, - sValue: oToken.sValue, - aLabel: [_dChar.gl_get(oToken.sValue, "caractère indéterminé")] - }; - break; - case 'NUM': - return { - sType: oToken.sType, - sValue: oToken.sValue, - aLabel: ["nombre"] - }; - break; - case 'LINK': - return { - sType: oToken.sType, - sValue: oToken.sValue.slice(0, 40) + "…", - aLabel: ["hyperlien"] - }; - break; - case 'TAG': - return { - sType: oToken.sType, - sValue: oToken.sValue, - aLabel: ["étiquette (hashtag)"] - }; - break; - case 'HTML': - return { - sType: oToken.sType, - sValue: oToken.sValue.slice(0, 40) + "…", - aLabel: ["balise HTML"] - }; - break; - case 'PSEUDOHTML': - return { - sType: oToken.sType, - sValue: oToken.sValue, - aLabel: ["balise pseudo-HTML"] - }; - break; - case 'HTMLENTITY': - return { - sType: oToken.sType, - sValue: oToken.sValue, - aLabel: ["entité caractère XML/HTML"] - }; - break; - case 'HOUR': - return { - sType: oToken.sType, - sValue: oToken.sValue, - aLabel: ["heure"] - }; - break; - case 'WORD_ELIDED': - let sTemp = oToken.sValue.replace("’", "").replace("'", "").replace("`", "").toLowerCase(); - return { - sType: oToken.sType, - sValue: oToken.sValue, - aLabel: [_dElidedPrefix.gl_get(sTemp, "préfixe élidé inconnu")] - }; - break; - case 'WORD_ORDINAL': - return { - sType: oToken.sType, - sValue: oToken.sValue, - aLabel: ["nombre ordinal"] - }; - break; - case 'FOLDERUNIX': - return { - sType: oToken.sType, - sValue: oToken.sValue.slice(0, 40) + "…", - aLabel: ["dossier UNIX (et dérivés)"] - }; - break; - case 'FOLDERWIN': - return { - sType: oToken.sType, - sValue: oToken.sValue.slice(0, 40) + "…", - aLabel: ["dossier Windows"] - }; - break; - case 'WORD_ACRONYM': - return { - sType: oToken.sType, - sValue: oToken.sValue, - aLabel: ["Sigle ou acronyme"] - }; - break; - case 'WORD': - if (oToken.sValue.gl_count("-") > 4) { - return { - sType: "COMPLEX", - sValue: oToken.sValue, - aLabel: ["élément complexe indéterminé"] - }; - } else if (m = this._zPartDemForm.exec(oToken.sValue)) { - // mots avec particules démonstratives - if (this._aPartDemExceptList.has(m[1].toLowerCase())) { - return { - sType: "WORD", - sValue: oToken.sValue, - aLabel: this._getMorph(oToken.sValue) - }; - } - return { - sType: oToken.sType, - sValue: oToken.sValue, - aLabel: ["mot avec particule démonstrative"], - aSubElem: [ - { sType: oToken.sType, sValue: m[1], aLabel: this._getMorph(m[1]) }, - { sType: oToken.sType, sValue: "-" + m[2], aLabel: [this._formatSuffix(m[2].toLowerCase())] } - ] - }; - } else if (m = this._zImperatifVerb.exec(oToken.sValue)) { - // formes interrogatives - return { - sType: oToken.sType, - sValue: oToken.sValue, - aLabel: ["forme verbale impérative"], - aSubElem: [ - { sType: oToken.sType, sValue: m[1], aLabel: this._getMorph(m[1]) }, - { sType: oToken.sType, sValue: "-" + m[2], aLabel: [this._formatSuffix(m[2].toLowerCase())] } - ] - }; - } else if (m = this._zInterroVerb.exec(oToken.sValue)) { - // formes interrogatives - return { - sType: oToken.sType, - sValue: oToken.sValue, - aLabel: ["forme verbale interrogative"], - aSubElem: [ - { sType: oToken.sType, sValue: m[1], aLabel: this._getMorph(m[1]) }, - { sType: oToken.sType, sValue: "-" + m[2], aLabel: [this._formatSuffix(m[2].toLowerCase())] } - ] - }; - } else if (this.oSpellChecker.isValidToken(oToken.sValue)) { - return { - sType: oToken.sType, - sValue: oToken.sValue, - aLabel: this._getMorph(oToken.sValue) - }; - } else { - return { - sType: "UNKNOWN_WORD", - sValue: oToken.sValue, - aLabel: ["mot inconnu du dictionnaire"] - }; - } - break; - default: - return { - sType: oToken.sType, - sValue: oToken.sValue, - aLabel: ["token inconnu"] - } - } - } catch (e) { - console.error(e); - } - return null; - } - - _getMorph (sWord) { - let aElem = []; - for (let s of this.oSpellChecker.getMorph(sWord)) { - if (s.includes(":")) aElem.push(this._formatTags(s)); - } - if (aElem.length == 0) { - aElem.push("mot inconnu du dictionnaire"); - } - return aElem; - } - - _formatTags (sTags) { - let sRes = ""; - sTags = sTags.replace(/V([0-3][ea]?)[itpqnmr_eaxz]+/, "V$1"); - let m; - while ((m = this._zTag.exec(sTags)) !== null) { - sRes += _dTag.get(m[0])[0]; - } - if (sRes.startsWith(" verbe") && !sRes.includes("infinitif")) { - sRes += " [" + sTags.slice(1, sTags.indexOf("/")) + "]"; - } - if (!sRes) { - return "#Erreur. Étiquette inconnue : [" + sTags + "]"; - } - return sRes.gl_trimRight(","); - } - - _formatTagsLoc (sTags) { - let sRes = ""; - let m; - while ((m = this._zTag.exec(sTags)) !== null) { - if (m[0].startsWith(":LV")) { - sRes += _dLocTag.get(":LV"); - for (let c of m[0].slice(3)) { - sRes += _dLocVerb.get(c); - } - } else { - sRes += _dLocTag.get(m[0]); - } - } - if (!sRes) { - return "#Erreur. Étiquette inconnue : [" + sTags + "]"; - } - return sRes.gl_trimRight(","); - } - - _formatSuffix (s) { - if (s.startsWith("t-")) { - return "“t” euphonique +" + _dPronoms.get(s.slice(2)); - } - if (!s.includes("-")) { - return _dPronoms.get(s.replace("’", "'")); - } - if (s.endsWith("ous")) { - s += '2'; - } - let nPos = s.indexOf("-"); - return _dPronoms.get(s.slice(0, nPos)) + " +" + _dPronoms.get(s.slice(nPos + 1)); - } - - getListOfTokens (sText, bInfo=true) { - let aElem = []; - if (sText !== "") { - for (let oToken of this.oTokenizer.genTokens(sText)) { - if (bInfo) { - let aRes = this.getInfoForToken(oToken); - if (aRes) { - aElem.push(aRes); - } - } else if (oToken.sType !== "SPACE") { - aElem.push(oToken); - } - } - } - return aElem; - } - - * generateInfoForTokenList (lToken) { - for (let oToken of lToken) { - let aRes = this.getInfoForToken(oToken); - if (aRes) { - yield aRes; - } - } - } - - getListOfTokensReduc (sText, bInfo=true) { - let lToken = this.getListOfTokens(sText.replace("'", "’").trim(), false); - let iToken = 0; - let aElem = []; - if (lToken.length == 0) { - return aElem; - } - do { - let oToken = lToken[iToken]; - let sMorphLoc = ''; - let aTokenTempList = [oToken]; - if (oToken.sType == "WORD" || oToken.sType == "WORD_ELIDED"){ - let iLocEnd = iToken + 1; - let oLocNode = this.oLocGraph[oToken.sValue.toLowerCase()]; - while (oLocNode) { - let oTokenNext = lToken[iLocEnd]; - iLocEnd++; - if (oTokenNext) { - oLocNode = oLocNode[oTokenNext.sValue.toLowerCase()]; - } - if (oLocNode && iLocEnd <= lToken.length) { - sMorphLoc = oLocNode["_:_"]; - aTokenTempList.push(oTokenNext); - } else { - break; - } - } - } - - if (sMorphLoc) { - // we have a locution - let sValue = ''; - for (let oTokenWord of aTokenTempList) { - sValue += oTokenWord.sValue+' '; - } - let oTokenLocution = { - 'nStart': aTokenTempList[0].nStart, - 'nEnd': aTokenTempList[aTokenTempList.length-1].nEnd, - 'sType': "LOC", - 'sValue': sValue.replace('’ ','’').trim(), - 'aSubToken': aTokenTempList - }; - if (bInfo) { - let aSubElem = null; - if (sMorphLoc.startsWith("*|")) { - // cette suite de tokens n’est une locution que dans certains cas minoritaires - oTokenLocution.sType = "LOCP"; - for (let oElem of this.generateInfoForTokenList(aTokenTempList)) { - aElem.push(oElem); - } - sMorphLoc = sMorphLoc.slice(2); - } else { - aSubElem = [...this.generateInfoForTokenList(aTokenTempList)]; - } - // cette suite de tokens est la plupart du temps une locution - let aFormatedTag = []; - for (let sTagLoc of sMorphLoc.split('|') ){ - aFormatedTag.push(this._formatTagsLoc(sTagLoc)); - } - aElem.push({ - sType: oTokenLocution.sType, - sValue: oTokenLocution.sValue, - aLabel: aFormatedTag, - aSubElem: aSubElem - }); - } else { - aElem.push(oTokenLocution); - } - iToken = iToken + aTokenTempList.length; - } else { - // No locution, we just add information - if (bInfo) { - let aRes = this.getInfoForToken(oToken); - if (aRes) { - aElem.push(aRes); - } - } else { - aElem.push(oToken); - } - iToken++; - } - } while (iToken < lToken.length); - return aElem; - } -} - - -if (typeof(exports) !== 'undefined') { - exports.Lexicographe = Lexicographe; -} DELETED gc_lang/fr/modules/lexicographe.py Index: gc_lang/fr/modules/lexicographe.py ================================================================== --- gc_lang/fr/modules/lexicographe.py +++ /dev/null @@ -1,234 +0,0 @@ -""" -Grammalecte - Lexicographe -""" - -# License: MPL 2 - - -import re -import traceback - - -_dTAGS = { - ':N': (" nom,", "Nom"), - ':A': (" adjectif,", "Adjectif"), - ':M1': (" prénom,", "Prénom"), - ':M2': (" patronyme,", "Patronyme, matronyme, nom de famille…"), - ':MP': (" nom propre,", "Nom propre"), - ':W': (" adverbe,", "Adverbe"), - ':J': (" interjection,", "Interjection"), - ':B': (" nombre,", "Nombre"), - ':T': (" titre,", "Titre de civilité"), - - ':e': (" épicène", "épicène"), - ':m': (" masculin", "masculin"), - ':f': (" féminin", "féminin"), - ':s': (" singulier", "singulier"), - ':p': (" pluriel", "pluriel"), - ':i': (" invariable", "invariable"), - - ':V1': (" verbe (1ᵉʳ gr.),", "Verbe du 1ᵉʳ groupe"), - ':V2': (" verbe (2ᵉ gr.),", "Verbe du 2ᵉ groupe"), - ':V3': (" verbe (3ᵉ gr.),", "Verbe du 3ᵉ groupe"), - ':V0e': (" verbe,", "Verbe auxiliaire être"), - ':V0a': (" verbe,", "Verbe auxiliaire avoir"), - - ':Y': (" infinitif,", "infinitif"), - ':P': (" participe présent,", "participe présent"), - ':Q': (" participe passé,", "participe passé"), - ':Ip': (" présent,", "indicatif présent"), - ':Iq': (" imparfait,", "indicatif imparfait"), - ':Is': (" passé simple,", "indicatif passé simple"), - ':If': (" futur,", "indicatif futur"), - ':K': (" conditionnel présent,", "conditionnel présent"), - ':Sp': (" subjonctif présent,", "subjonctif présent"), - ':Sq': (" subjonctif imparfait,", "subjonctif imparfait"), - ':E': (" impératif,", "impératif"), - - ':1s': (" 1ʳᵉ p. sg.,", "verbe : 1ʳᵉ personne du singulier"), - ':1ŝ': (" présent interr. 1ʳᵉ p. sg.,", "verbe : 1ʳᵉ personne du singulier (présent interrogatif)"), - ':1ś': (" présent interr. 1ʳᵉ p. sg.,", "verbe : 1ʳᵉ personne du singulier (présent interrogatif)"), - ':2s': (" 2ᵉ p. sg.,", "verbe : 2ᵉ personne du singulier"), - ':3s': (" 3ᵉ p. sg.,", "verbe : 3ᵉ personne du singulier"), - ':1p': (" 1ʳᵉ p. pl.,", "verbe : 1ʳᵉ personne du pluriel"), - ':2p': (" 2ᵉ p. pl.,", "verbe : 2ᵉ personne du pluriel"), - ':3p': (" 3ᵉ p. pl.,", "verbe : 3ᵉ personne du pluriel"), - ':3p!': (" 3ᵉ p. pl.,", "verbe : 3ᵉ personne du pluriel (prononciation distinctive)"), - - ':G': ("", "Mot grammatical"), - ':X': (" adverbe de négation,", "Adverbe de négation"), - ':U': (" adverbe interrogatif,", "Adverbe interrogatif"), - ':R': (" préposition,", "Préposition"), - ':Rv': (" préposition verbale,", "Préposition verbale"), - ':D': (" déterminant,", "Déterminant"), - ':Dd': (" déterminant démonstratif,", "Déterminant démonstratif"), - ':De': (" déterminant exclamatif,", "Déterminant exclamatif"), - ':Dp': (" déterminant possessif,", "Déterminant possessif"), - ':Di': (" déterminant indéfini,", "Déterminant indéfini"), - ':Dn': (" déterminant négatif,", "Déterminant négatif"), - ':Od': (" pronom démonstratif,", "Pronom démonstratif"), - ':Oi': (" pronom indéfini,", "Pronom indéfini"), - ':On': (" pronom indéfini négatif,", "Pronom indéfini négatif"), - ':Ot': (" pronom interrogatif,", "Pronom interrogatif"), - ':Or': (" pronom relatif,", "Pronom relatif"), - ':Ow': (" pronom adverbial,", "Pronom adverbial"), - ':Os': (" pronom personnel sujet,", "Pronom personnel sujet"), - ':Oo': (" pronom personnel objet,", "Pronom personnel objet"), - ':Ov': (" préverbe,", "Préverbe (pronom personnel objet, +ne)"), - ':O1': (" 1ʳᵉ pers.,", "Pronom : 1ʳᵉ personne"), - ':O2': (" 2ᵉ pers.,", "Pronom : 2ᵉ personne"), - ':O3': (" 3ᵉ pers.,", "Pronom : 3ᵉ personne"), - ':C': (" conjonction,", "Conjonction"), - ':Ĉ': (" conjonction (él.),", "Conjonction (élément)"), - ':Cc': (" conjonction de coordination,", "Conjonction de coordination"), - ':Cs': (" conjonction de subordination,", "Conjonction de subordination"), - ':Ĉs': (" conjonction de subordination (él.),", "Conjonction de subordination (élément)"), - - ':Ñ': (" locution nominale (él.),", "Locution nominale (élément)"), - ':Â': (" locution adjectivale (él.),", "Locution adjectivale (élément)"), - ':Ṽ': (" locution verbale (él.),", "Locution verbale (élément)"), - ':Ŵ': (" locution adverbiale (él.),", "Locution adverbiale (élément)"), - ':Ŕ': (" locution prépositive (él.),", "Locution prépositive (élément)"), - ':Ĵ': (" locution interjective (él.),", "Locution interjective (élément)"), - - ':Zp': (" préfixe,", "Préfixe"), - ':Zs': (" suffixe,", "Suffixe"), - - ':H': ("", ""), - - ':@': ("", ""), - ':@p': ("signe de ponctuation", "Signe de ponctuation"), - ':@s': ("signe", "Signe divers"), - - ';S': (" : symbole (unité de mesure)", "Symbole (unité de mesure)"), - - '/*': ("", "Sous-dictionnaire "), - '/C': (" ", "Sous-dictionnaire "), - '/M': ("", "Sous-dictionnaire "), - '/R': (" ", "Sous-dictionnaire "), - '/A': ("", "Sous-dictionnaire "), - '/X': ("", "Sous-dictionnaire ") -} - -_dPFX = { - 'd': "(de), déterminant épicène invariable", - 'l': "(le/la), déterminant masculin/féminin singulier", - 'j': "(je), pronom personnel sujet, 1ʳᵉ pers., épicène singulier", - 'm': "(me), pronom personnel objet, 1ʳᵉ pers., épicène singulier", - 't': "(te), pronom personnel objet, 2ᵉ pers., épicène singulier", - 's': "(se), pronom personnel objet, 3ᵉ pers., épicène singulier/pluriel", - 'n': "(ne), adverbe de négation", - 'c': "(ce), pronom démonstratif, masculin singulier/pluriel", - 'ç': "(ça), pronom démonstratif, masculin singulier", - 'qu': "(que), conjonction de subordination", - 'lorsqu': "(lorsque), conjonction de subordination", - 'puisqu': "(puisque), conjonction de subordination", - 'quoiqu': "(quoique), conjonction de subordination", - 'jusqu': "(jusque), préposition", -} - -_dAD = { - 'je': " pronom personnel sujet, 1ʳᵉ pers. sing.", - 'tu': " pronom personnel sujet, 2ᵉ pers. sing.", - 'il': " pronom personnel sujet, 3ᵉ pers. masc. sing.", - 'on': " pronom personnel sujet, 3ᵉ pers. sing. ou plur.", - 'elle': " pronom personnel sujet, 3ᵉ pers. fém. sing.", - 'nous': " pronom personnel sujet/objet, 1ʳᵉ pers. plur.", - 'vous': " pronom personnel sujet/objet, 2ᵉ pers. plur.", - 'ils': " pronom personnel sujet, 3ᵉ pers. masc. plur.", - 'elles': " pronom personnel sujet, 3ᵉ pers. masc. plur.", - - "là": " particule démonstrative", - "ci": " particule démonstrative", - - 'le': " COD, masc. sing.", - 'la': " COD, fém. sing.", - 'les': " COD, plur.", - - 'moi': " COI (à moi), sing.", - 'toi': " COI (à toi), sing.", - 'lui': " COI (à lui ou à elle), sing.", - 'nous2': " COI (à nous), plur.", - 'vous2': " COI (à vous), plur.", - 'leur': " COI (à eux ou à elles), plur.", - - 'y': " pronom adverbial", - "m'y": " (me) pronom personnel objet + (y) pronom adverbial", - "t'y": " (te) pronom personnel objet + (y) pronom adverbial", - "s'y": " (se) pronom personnel objet + (y) pronom adverbial", - - 'en': " pronom adverbial", - "m'en": " (me) pronom personnel objet + (en) pronom adverbial", - "t'en": " (te) pronom personnel objet + (en) pronom adverbial", - "s'en": " (se) pronom personnel objet + (en) pronom adverbial", -} - - -class Lexicographe: - "Lexicographer - word analyzer" - - def __init__ (self, oSpellChecker): - self.oSpellChecker = oSpellChecker - self._zElidedPrefix = re.compile("(?i)^([dljmtsncç]|quoiqu|lorsqu|jusqu|puisqu|qu)['’](.+)") - self._zCompoundWord = re.compile("(?i)(\\w+)-((?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts][’'](?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous)$") - self._zTag = re.compile("[:;/][\\w*][^:;/]*") - - def analyzeWord (self, sWord): - "returns a tuple (a list of morphologies, a set of verb at infinitive form)" - try: - if not sWord: - return (None, None) - if sWord.count("-") > 4: - return (["élément complexe indéterminé"], None) - if sWord.isdigit(): - return (["nombre"], None) - - aMorph = [] - # préfixes élidés - m = self._zElidedPrefix.match(sWord) - if m: - sWord = m.group(2) - aMorph.append( "{}’ : {}".format(m.group(1), _dPFX.get(m.group(1).lower(), "[?]")) ) - # mots composés - m2 = self._zCompoundWord.match(sWord) - if m2: - sWord = m2.group(1) - # Morphologies - lMorph = self.oSpellChecker.getMorph(sWord) - if len(lMorph) > 1: - # sublist - aMorph.append( (sWord, [ self.formatTags(s) for s in lMorph if ":" in s ]) ) - elif len(lMorph) == 1: - aMorph.append( "{} : {}".format(sWord, self.formatTags(lMorph[0])) ) - else: - aMorph.append( "{} : inconnu du dictionnaire".format(sWord) ) - # suffixe d’un mot composé - if m2: - aMorph.append( "-{} : {}".format(m2.group(2), self._formatSuffix(m2.group(2).lower())) ) - # Verbes - aVerb = { s[1:s.find("/")] for s in lMorph if ":V" in s } - return (aMorph, aVerb) - except (IndexError, TypeError): - traceback.print_exc() - return (["#erreur"], None) - - def formatTags (self, sTags): - "returns string: readable tags" - sRes = "" - sTags = re.sub("(?<=V[1-3])[itpqnmr_eaxz]+", "", sTags) - sTags = re.sub("(?<=V0[ea])[itpqnmr_eaxz]+", "", sTags) - for m in self._zTag.finditer(sTags): - sRes += _dTAGS.get(m.group(0), " [{}]".format(m.group(0)))[0] - if sRes.startswith(" verbe") and not sRes.endswith("infinitif"): - sRes += " [{}]".format(sTags[1:sTags.find("/")]) - return sRes.rstrip(",") - - def _formatSuffix (self, s): - if s.startswith("t-"): - return "“t” euphonique +" + _dAD.get(s[2:], "[?]") - if not "-" in s: - return _dAD.get(s.replace("’", "'"), "[?]") - if s.endswith("ous"): - s += '2' - nPos = s.find("-") - return "%s +%s" % (_dAD.get(s[:nPos], "[?]"), _dAD.get(s[nPos+1:], "[?]")) Index: gc_lang/fr/modules/tests.py ================================================================== --- gc_lang/fr/modules/tests.py +++ gc_lang/fr/modules/tests.py @@ -10,11 +10,11 @@ import time from contextlib import contextmanager from ..graphspell.ibdawg import IBDAWG from ..graphspell.echo import echo -from . import gc_engine as gce +from . import gc_engine from . import conj from . import phonet from . import mfsp @@ -32,20 +32,20 @@ def perf (sVersion, hDst=None): "performance tests" print("\nPerformance tests") - gce.load() - gce.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.") + gc_engine.load() + gc_engine.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.") spHere, _ = os.path.split(__file__) with open(os.path.join(spHere, "perf.txt"), "r", encoding="utf-8") as hSrc: if hDst: hDst.write("{:<12}{:<20}".format(sVersion, time.strftime("%Y.%m.%d %H:%M"))) for sText in ( s.strip() for s in hSrc if not s.startswith("#") and s.strip() ): with timeblock(sText[:sText.find(".")], hDst): - gce.parse(sText) + gc_engine.parse(sText) if hDst: hDst.write("\n") def _fuckBackslashUTF8 (s): @@ -157,11 +157,11 @@ class TestGrammarChecking (unittest.TestCase): "Tests du correcteur grammatical" @classmethod def setUpClass (cls): - gce.load() + gc_engine.load() cls._zError = re.compile(r"\{\{.*?\}\}") cls._aTestedRules = set() def test_parse (self): zOption = re.compile("^__([a-zA-Z0-9]+)__ ") @@ -205,11 +205,11 @@ if nError: print("Unexpected errors:", nError) # untested rules i = 0 echo("Untested rules:") - for _, sOpt, sLineId, sRuleId in gce.listRules(): + for _, sOpt, sLineId, sRuleId in gc_engine.listRules(): if sOpt != "@@@@" and sRuleId not in self._aTestedRules and not re.search("^[0-9]+[sp]$|^[pd]_", sRuleId): echo(sLineId + "/" + sRuleId) i += 1 echo("[{} untested rules]".format(i)) @@ -217,15 +217,15 @@ sText, sSugg = sLine.split("->>") return (sText.strip(), sSugg.strip()) def _getFoundErrors (self, sLine, sOption): if sOption: - gce.setOption(sOption, True) - aErrs = gce.parse(sLine) - gce.setOption(sOption, False) + gc_engine.gc_options.setOption(sOption, True) + aErrs = gc_engine.parse(sLine) + gc_engine.gc_options.setOption(sOption, False) else: - aErrs = gce.parse(sLine) + aErrs = gc_engine.parse(sLine) sRes = " " * len(sLine) sListErr = "" lAllSugg = [] for dErr in aErrs: sRes = sRes[:dErr["nStart"]] + "~" * (dErr["nEnd"] - dErr["nStart"]) + sRes[dErr["nEnd"]:] Index: gc_lang/fr/modules/textformatter.py ================================================================== --- gc_lang/fr/modules/textformatter.py +++ gc_lang/fr/modules/textformatter.py @@ -242,24 +242,32 @@ "ma_1letter_lowercase": False, "ma_1letter_uppercase": False } -class TextFormatter: - "Text Formatter: purge typographic mistakes from text" - - def __init__ (self): - for _, lTup in dReplTable.items(): - for i, t in enumerate(lTup): - lTup[i] = (re.compile(t[0]), t[1]) - - def formatText (self, sText): - "returns formatted text" - for sOptName, bVal in dDefaultOptions.items(): - if bVal: - for zRgx, sRep in dReplTable[sOptName]: - sText = zRgx.sub(sRep, sText) - return sText - - def getDefaultOptions (self): - "returns default options" - return dDefaultOptions.copy() +_bCompiled = False + +def _compileRegex(): + global _bCompiled + for _, lTup in dReplTable.items(): + for i, t in enumerate(lTup): + lTup[i] = (re.compile(t[0]), t[1]) + _bCompiled = True + + +def formatText (sText, dOpt=None): + "returns formatted text" + if not _bCompiled: + _compileRegex() + dOptions = getDefaultOptions() + if dOpt: + dOptions.update(dOpt) + for sOptName, bVal in dOptions.items(): + if bVal: + for zRgx, sRep in dReplTable[sOptName]: + sText = zRgx.sub(sRep, sText) + return sText + + +def getDefaultOptions (): + "returns default options" + return dDefaultOptions.copy() Index: gc_lang/fr/oxt/ContextMenu/ContextMenu.py ================================================================== --- gc_lang/fr/oxt/ContextMenu/ContextMenu.py +++ gc_lang/fr/oxt/ContextMenu/ContextMenu.py @@ -8,30 +8,28 @@ from com.sun.star.task import XJob from com.sun.star.ui import XContextMenuInterceptor #from com.sun.star.ui.ContextMenuInterceptorAction import IGNORED #from com.sun.star.ui.ContextMenuInterceptorAction import EXECUTE_MODIFIED -import grammalecte.fr.lexicographe as lxg from grammalecte.graphspell.spellchecker import SpellChecker from grammalecte.graphspell.echo import echo import helpers xDesktop = None oSpellChecker = None -oLexicographe = None class MyContextMenuInterceptor (XContextMenuInterceptor, unohelper.Base): def __init__ (self, ctx): self.ctx = ctx def notifyContextMenuExecute (self, xEvent): sWord = self._getWord() try: - aItem, aVerb = oLexicographe.analyzeWord(sWord) - if not aItem: + lWordAndMorph = oSpellChecker.analyze(sWord) + if not lWordAndMorph: return uno.Enum("com.sun.star.ui.ContextMenuInterceptorAction", "IGNORED") # don’t work on AOO, have to import the value #return IGNORED xContextMenu = xEvent.ActionTriggerContainer if xContextMenu: # entries index @@ -38,30 +36,31 @@ i = xContextMenu.Count nUnoConstantLine = uno.getConstantByName("com.sun.star.ui.ActionTriggerSeparatorType.LINE") # word analysis i = self._addItemToContextMenu(xContextMenu, i, "ActionTriggerSeparator", SeparatorType=nUnoConstantLine) - for item in aItem: - if isinstance(item, str): - i = self._addItemToContextMenu(xContextMenu, i, "ActionTrigger", Text=item, CommandURL="service:net.grammalecte.AppLauncher?None") - elif isinstance(item, tuple): - sRoot, lMorph = item + for sWord, lMorph in lWordAndMorph: + if len(lMorph) == 1: + sMorph, sReadableMorph = lMorph[0] + i = self._addItemToContextMenu(xContextMenu, i, "ActionTrigger", Text=sWord + " : " + sReadableMorph, CommandURL="service:net.grammalecte.AppLauncher?None") + elif len(lMorph) >= 1: # submenu xSubMenuContainer = xContextMenu.createInstance("com.sun.star.ui.ActionTriggerContainer") - for j, s in enumerate(lMorph): - self._addItemToContextMenu(xSubMenuContainer, j, "ActionTrigger", Text=s, CommandURL="service:net.grammalecte.AppLauncher?None") + for j, (sMorph, sReadableMorph) in enumerate(lMorph): + self._addItemToContextMenu(xSubMenuContainer, j, "ActionTrigger", Text=sReadableMorph, CommandURL="service:net.grammalecte.AppLauncher?None") # create root menu entry - i = self._addItemToContextMenu(xContextMenu, i, "ActionTrigger", Text=sRoot, SubContainer=xSubMenuContainer) + i = self._addItemToContextMenu(xContextMenu, i, "ActionTrigger", Text=sWord, SubContainer=xSubMenuContainer) else: - i = self._addItemToContextMenu(xContextMenu, i, "ActionTrigger", Text="# erreur : {}".format(item)) + i = self._addItemToContextMenu(xContextMenu, i, "ActionTrigger", Text=sWord + " : [erreur] aucun résultat trouvé.") # Links to Conjugueur + aVerb = { sMorph[1:sMorph.find("/")] for sMorph in oSpellChecker.getMorph(sWord) if ":V" in sMorph } if aVerb: i = self._addItemToContextMenu(xContextMenu, i, "ActionTriggerSeparator", SeparatorType=nUnoConstantLine) for sVerb in aVerb: - i = self._addItemToContextMenu(xContextMenu, i, "ActionTrigger", Text="Conjuguer “{}”…".format(sVerb), - CommandURL="service:net.grammalecte.AppLauncher?CJ/"+sVerb) + i = self._addItemToContextMenu(xContextMenu, i, "ActionTrigger", Text="Conjuguer “{}”…".format(sVerb), \ + CommandURL="service:net.grammalecte.AppLauncher?CJ/"+sVerb) # Search xDoc = xDesktop.getCurrentComponent() xViewCursor = xDoc.CurrentController.ViewCursor if not xViewCursor.isCollapsed(): @@ -116,11 +115,10 @@ class JobExecutor (XJob, unohelper.Base): def __init__ (self, ctx): self.ctx = ctx global xDesktop global oSpellChecker - global oLexicographe try: if not xDesktop: xDesktop = self.ctx.getServiceManager().createInstanceWithContext('com.sun.star.frame.Desktop', self.ctx) if not oSpellChecker: xCurCtx = uno.getComponentContext() @@ -128,12 +126,10 @@ if hasattr(oGC, "getSpellChecker"): # https://bugs.documentfoundation.org/show_bug.cgi?id=97790 oSpellChecker = oGC.getSpellChecker() else: oSpellChecker = SpellChecker("${lang}", "fr-allvars.bdic") - if not oLexicographe: - oLexicographe = lxg.Lexicographe(oSpellChecker) except: traceback.print_exc() def execute (self, args): if not args: Index: gc_lang/fr/webext/gce_worker.js ================================================================== --- gc_lang/fr/webext/gce_worker.js +++ gc_lang/fr/webext/gce_worker.js @@ -33,11 +33,11 @@ //console.log(self); importScripts("grammalecte/graphspell/helpers.js"); importScripts("grammalecte/graphspell/str_transform.js"); importScripts("grammalecte/graphspell/char_player.js"); -importScripts("grammalecte/graphspell/suggest.js"); +importScripts("grammalecte/graphspell/lexgraph_fr.js"); importScripts("grammalecte/graphspell/ibdawg.js"); importScripts("grammalecte/graphspell/spellchecker.js"); importScripts("grammalecte/text.js"); importScripts("grammalecte/graphspell/tokenizer.js"); importScripts("grammalecte/fr/conj.js"); @@ -45,12 +45,12 @@ importScripts("grammalecte/fr/phonet.js"); importScripts("grammalecte/fr/cregex.js"); importScripts("grammalecte/fr/gc_options.js"); importScripts("grammalecte/fr/gc_rules.js"); importScripts("grammalecte/fr/gc_rules_graph.js"); +importScripts("grammalecte/fr/gc_engine_func.js"); importScripts("grammalecte/fr/gc_engine.js"); -importScripts("grammalecte/fr/lexicographe.js"); importScripts("grammalecte/tests.js"); /* Warning. Initialization can’t be completed at startup of the worker, for we need the path of the extension to load data stored in JSON files. @@ -152,11 +152,10 @@ let bInitDone = false; let oSpellChecker = null; let oTokenizer = null; -let oLxg = null; let oTest = null; let oLocution = null; /* @@ -176,27 +175,27 @@ phonet.init(helpers.loadFile(sExtensionPath + "/grammalecte/fr/phonet_data.json")); mfsp.init(helpers.loadFile(sExtensionPath + "/grammalecte/fr/mfsp_data.json")); //console.log("[Worker] Modules have been initialized…"); gc_engine.load(sContext, "aHSL", sExtensionPath+"grammalecte/graphspell/_dictionaries"); oSpellChecker = gc_engine.getSpellChecker(); - oTest = new TestGrammarChecking(gc_engine, sExtensionPath+"/grammalecte/fr/tests_data.json"); + oTest = new TestGrammarChecking(gc_engine, gc_options, sExtensionPath+"/grammalecte/fr/tests_data.json"); oTokenizer = new Tokenizer("fr"); oLocution = helpers.loadFile(sExtensionPath + "/grammalecte/fr/locutions_data.json"); - oLxg = new Lexicographe(oSpellChecker, oTokenizer, oLocution); + lexgraph_fr.load(oSpellChecker, oTokenizer, oLocution); if (dOptions !== null) { if (!(dOptions instanceof Map)) { dOptions = helpers.objectToMap(dOptions); } - gc_engine.setOptions(dOptions); + gc_options.setOptions(dOptions); } //tests(); bInitDone = true; } else { console.log("[Worker] Already initialized…") } - // we always retrieve options from the gc_engine, for setOptions filters obsolete options - dOptions = helpers.mapToObject(gc_engine.getOptions()); + // we always retrieve options from the gc_options, for setOptions filters obsolete options + dOptions = helpers.mapToObject(gc_options.getOptions()); postMessage(createResponse("init", dOptions, oInfo, true)); } catch (e) { console.error(e); postMessage(createResponse("init", createErrorResult(e, "init failed"), oInfo, true, true)); @@ -248,11 +247,11 @@ // lexicographer try { sText = sText.replace(/­/g, "").normalize("NFC"); for (let sParagraph of text.getParagraph(sText)) { if (sParagraph.trim() !== "") { - postMessage(createResponse("getListOfTokens", oLxg.getListOfTokensReduc(sParagraph, true), oInfo, false)); + postMessage(createResponse("getListOfTokens", lexgraph_fr.getListOfTokensReduc(sParagraph, true), oInfo, false)); } } postMessage(createResponse("getListOfTokens", null, oInfo, true)); } catch (e) { @@ -260,40 +259,40 @@ postMessage(createResponse("getListOfTokens", createErrorResult(e, "no tokens"), oInfo, true, true)); } } function getOptions (oInfo={}) { - let dOptions = helpers.mapToObject(gc_engine.getOptions()); + let dOptions = helpers.mapToObject(gc_options.getOptions()); postMessage(createResponse("getOptions", dOptions, oInfo, true)); } function getDefaultOptions (oInfo={}) { - let dOptions = helpers.mapToObject(gc_engine.getDefaultOptions()); + let dOptions = helpers.mapToObject(gc_options.getDefaultOptions()); postMessage(createResponse("getDefaultOptions", dOptions, oInfo, true)); } function setOptions (dOptions, oInfo={}) { if (!(dOptions instanceof Map)) { dOptions = helpers.objectToMap(dOptions); } - gc_engine.setOptions(dOptions); - dOptions = helpers.mapToObject(gc_engine.getOptions()); + gc_options.setOptions(dOptions); + dOptions = helpers.mapToObject(gc_options.getOptions()); postMessage(createResponse("setOptions", dOptions, oInfo, true)); } function setOption (sOptName, bValue, oInfo={}) { console.log(sOptName+": "+bValue); if (sOptName) { - gc_engine.setOption(sOptName, bValue); - let dOptions = helpers.mapToObject(gc_engine.getOptions()); + gc_options.setOption(sOptName, bValue); + let dOptions = helpers.mapToObject(gc_options.getOptions()); postMessage(createResponse("setOption", dOptions, oInfo, true)); } } function resetOptions (oInfo={}) { - gc_engine.resetOptions(); - let dOptions = helpers.mapToObject(gc_engine.getOptions()); + gc_options.resetOptions(); + let dOptions = helpers.mapToObject(gc_options.getOptions()); postMessage(createResponse("resetOptions", dOptions, oInfo, true)); } function tests () { console.log(conj.getConj("devenir", ":E", ":2s")); @@ -326,23 +325,23 @@ function fullTests (oInfo={}) { if (!gc_engine) { postMessage(createResponse("fullTests", "# Grammar checker not loaded.", oInfo, true)); return; } - let dMemoOptions = gc_engine.getOptions(); - let dTestOptions = gc_engine.getDefaultOptions(); + let dMemoOptions = gc_options.getOptions(); + let dTestOptions = gc_options.getDefaultOptions(); dTestOptions.set("nbsp", true); dTestOptions.set("esp", true); dTestOptions.set("unit", true); dTestOptions.set("num", true); - gc_engine.setOptions(dTestOptions); + gc_options.setOptions(dTestOptions); let sMsg = ""; for (let sRes of oTest.testParse()) { sMsg += sRes + "\n"; console.log(sRes); } - gc_engine.setOptions(dMemoOptions); + gc_options.setOptions(dMemoOptions); postMessage(createResponse("fullTests", sMsg, oInfo, true)); } // SpellChecker Index: grammalecte-cli.py ================================================================== --- grammalecte-cli.py +++ grammalecte-cli.py @@ -11,10 +11,11 @@ import re import traceback import grammalecte import grammalecte.text as txt +import grammalecte.fr.textformatter as tf from grammalecte.graphspell.echo import echo _EXAMPLE = "Quoi ? Racontes ! Racontes-moi ! Bon sangg, parles ! Oui. Il y a des menteur partout. " \ "Je suit sidéré par la brutales arrogance de cette homme-là. Quelle salopard ! Un escrocs de la pire espece. " \ @@ -149,29 +150,27 @@ xParser.add_argument("-off", "--opt_off", nargs="+", help="deactivate options") xParser.add_argument("-roff", "--rule_off", nargs="+", help="deactivate rules") xParser.add_argument("-d", "--debug", help="debugging mode (only in interactive mode)", action="store_true") xArgs = xParser.parse_args() - oGrammarChecker = grammalecte.GrammarChecker("fr") - oSpellChecker = oGrammarChecker.getSpellChecker() - oLexicographer = oGrammarChecker.getLexicographer() - oTextFormatter = oGrammarChecker.getTextFormatter() + grammalecte.load() + oSpellChecker = grammalecte.getSpellChecker() if xArgs.personal_dict: oJSON = loadDictionary(xArgs.personal_dict) if oJSON: oSpellChecker.setPersonalDictionary(oJSON) if not xArgs.json: echo("Python v" + sys.version) - echo("Grammalecte v{}".format(oGrammarChecker.gce.version)) + echo("Grammalecte v{}".format(grammalecte.version)) # list options or rules if xArgs.list_options or xArgs.list_rules: if xArgs.list_options: - oGrammarChecker.gce.displayOptions("fr") + grammalecte.gc_options.displayOptions() if xArgs.list_rules: - oGrammarChecker.gce.displayRules(None if xArgs.list_rules == "*" else xArgs.list_rules) + grammalecte.displayRules(None if xArgs.list_rules == "*" else xArgs.list_rules) exit() # spell suggestions if xArgs.suggest: for lSugg in oSpellChecker.suggest(xArgs.suggest): @@ -187,40 +186,40 @@ xArgs.context = False if xArgs.concat_lines: xArgs.textformatter = False # grammar options - oGrammarChecker.gce.setOptions({"html": True, "latex": True}) + grammalecte.gc_options.setOptions({"html": True, "latex": True}) if xArgs.opt_on: - oGrammarChecker.gce.setOptions({ opt:True for opt in xArgs.opt_on }) + grammalecte.gc_options.setOptions({ opt:True for opt in xArgs.opt_on }) if xArgs.opt_off: - oGrammarChecker.gce.setOptions({ opt:False for opt in xArgs.opt_off }) + grammalecte.gc_options.setOptions({ opt:False for opt in xArgs.opt_off }) # disable grammar rules if xArgs.rule_off: for sRule in xArgs.rule_off: - oGrammarChecker.gce.ignoreRule(sRule) + grammalecte.ignoreRule(sRule) if xArgs.file or xArgs.file_to_file: # file processing sFile = xArgs.file or xArgs.file_to_file hDst = open(sFile[:sFile.rfind(".")]+".res.txt", "w", encoding="utf-8", newline="\n") if xArgs.file_to_file or sys.platform == "win32" else None bComma = False if xArgs.json: - output('{ "grammalecte": "'+oGrammarChecker.gce.version+'", "lang": "'+oGrammarChecker.gce.lang+'", "data" : [\n', hDst) + output('{ "grammalecte": "'+grammalecte.version+'", "lang": "'+grammalecte.lang+'", "data" : [\n', hDst) for i, sText, lLineSet in generateParagraphFromFile(sFile, xArgs.concat_lines): if xArgs.textformatter or xArgs.textformatteronly: - sText = oTextFormatter.formatText(sText) + sText = tf.formatText(sText) if xArgs.textformatteronly: output(sText, hDst) continue if xArgs.json: - sText = oGrammarChecker.getParagraphErrorsAsJSON(i, sText, bContext=xArgs.context, bEmptyIfNoErrors=xArgs.only_when_errors, \ + sText = grammalecte.getParagraphErrorsAsJSON(i, sText, bContext=xArgs.context, bEmptyIfNoErrors=xArgs.only_when_errors, \ bSpellSugg=xArgs.with_spell_sugg, bReturnText=xArgs.textformatter, lLineSet=lLineSet) else: - sText, _ = oGrammarChecker.getParagraphWithErrors(sText, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSugg=xArgs.with_spell_sugg, nWidth=xArgs.width) + sText, _ = grammalecte.getParagraphWithErrors(sText, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSugg=xArgs.with_spell_sugg, nWidth=xArgs.width) if sText: if xArgs.json and bComma: output(",\n", hDst) output(sText, hDst) bComma = True @@ -232,13 +231,13 @@ # file processing: interactive mode sFile = xArgs.interactive_file_to_file hDst = open(sFile[:sFile.rfind(".")]+".res.txt", "w", encoding="utf-8", newline="\n") for i, sText, lLineSet in generateParagraphFromFile(sFile, xArgs.concat_lines): if xArgs.textformatter: - sText = oTextFormatter.formatText(sText) + sText = tf.formatText(sText) while True: - sResult, lErrors = oGrammarChecker.getParagraphWithErrors(sText, bEmptyIfNoErrors=False, bSpellSugg=True, nWidth=xArgs.width) + sResult, lErrors = grammalecte.getParagraphWithErrors(sText, bEmptyIfNoErrors=False, bSpellSugg=True, nWidth=xArgs.width) print("\n\n============================== Paragraph " + str(i) + " ==============================\n") echo(sResult) print("\n") vCommand = getCommand() if vCommand == "q": @@ -269,12 +268,14 @@ while True: if sText.startswith("?"): for sWord in sText[1:].strip().split(): if sWord: echo("* " + sWord) - for sMorph in oSpellChecker.getMorph(sWord): - echo(" {:<32} {}".format(sMorph, oLexicographer.formatTags(sMorph))) + for sElem, aRes in oSpellChecker.analyze(sWord): + echo(" - " + sElem) + for sMorph, sMeaning in aRes: + echo(" {:<40} {}".format(sMorph, sMeaning)) elif sText.startswith("!"): for sWord in sText[1:].strip().split(): if sWord: for lSugg in oSpellChecker.suggest(sWord): echo(" | ".join(lSugg)) @@ -290,47 +291,47 @@ sFlexPattern = sSearch sTagsPattern = "" for aRes in oSpellChecker.select(sFlexPattern, sTagsPattern): echo("{:<30} {:<30} {}".format(*aRes)) elif sText.startswith("/o+ "): - oGrammarChecker.gce.setOptions({ opt:True for opt in sText[3:].strip().split() if opt in oGrammarChecker.gce.getOptions() }) + grammalecte.gc_options.setOptions({ opt:True for opt in sText[3:].strip().split() if opt in grammalecte.gc_options.dOptions }) echo("done") elif sText.startswith("/o- "): - oGrammarChecker.gce.setOptions({ opt:False for opt in sText[3:].strip().split() if opt in oGrammarChecker.gce.getOptions() }) + grammalecte.gc_options.setOptions({ opt:False for opt in sText[3:].strip().split() if opt in grammalecte.gc_options.dOptions }) echo("done") elif sText.startswith("/r- "): for sRule in sText[3:].strip().split(): - oGrammarChecker.gce.ignoreRule(sRule) + grammalecte.ignoreRule(sRule) echo("done") elif sText.startswith("/r+ "): for sRule in sText[3:].strip().split(): - oGrammarChecker.gce.reactivateRule(sRule) + grammalecte.reactivateRule(sRule) echo("done") elif sText in ("/debug", "/d"): xArgs.debug = not xArgs.debug echo("debug mode on" if xArgs.debug else "debug mode off") elif sText in ("/textformatter", "/tf"): xArgs.textformatter = not xArgs.textformatter - echo("textformatter on" if xArgs.debug else "textformatter off") + echo("textformatter on" if xArgs.textformatter else "textformatter off") elif sText in ("/help", "/h"): echo(_HELP) elif sText in ("/lopt", "/lo"): - oGrammarChecker.gce.displayOptions("fr") + grammalecte.gc_options.displayOptions() elif sText.startswith("/lr"): sText = sText.strip() sFilter = sText[sText.find(" "):].strip() if " " in sText else None - oGrammarChecker.gce.displayRules(sFilter) + grammalecte.displayRules(sFilter) elif sText in ("/quit", "/q"): break elif sText.startswith("/rl"): # reload (todo) pass elif sText.startswith("$"): for sParagraph in txt.getParagraph(sText[1:]): if xArgs.textformatter: - sParagraph = oTextFormatter.formatText(sParagraph) - lParagraphErrors, lSentences = oGrammarChecker.gce.parse(sParagraph, bDebug=xArgs.debug, bFullInfo=True) + sParagraph = tf.formatText(sParagraph) + lParagraphErrors, lSentences = grammalecte.parse(sParagraph, bDebug=xArgs.debug, bFullInfo=True) echo(txt.getReadableErrors(lParagraphErrors, xArgs.width)) for dSentence in lSentences: echo("{nStart}:{nEnd}".format(**dSentence)) echo(" <" + dSentence["sSentence"]+">") for dToken in dSentence["lToken"]: @@ -341,16 +342,16 @@ "·".join(dToken.get("aTags", "")) ) ) echo(txt.getReadableErrors(dSentence["lGrammarErrors"], xArgs.width)) else: for sParagraph in txt.getParagraph(sText): if xArgs.textformatter: - sParagraph = oTextFormatter.formatText(sParagraph) - sRes, _ = oGrammarChecker.getParagraphWithErrors(sParagraph, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width, bDebug=xArgs.debug) + sParagraph = tf.formatText(sParagraph) + sRes, _ = grammalecte.getParagraphWithErrors(sParagraph, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width, bDebug=xArgs.debug) if sRes: echo("\n" + sRes) else: echo("\nNo error found.") sText = _getText(sInputText) if __name__ == '__main__': main() Index: grammalecte-server.py ================================================================== --- grammalecte-server.py +++ grammalecte-server.py @@ -14,29 +14,28 @@ from grammalecte.bottle import Bottle, run, request, response #, template, static_file import grammalecte import grammalecte.text as txt +import grammalecte.fr.textformatter as tf from grammalecte.graphspell.echo import echo #### GRAMMAR CHECKER #### -oGrammarChecker = grammalecte.GrammarChecker("fr", "Server") -oSpellChecker = oGrammarChecker.getSpellChecker() -oTextFormatter = oGrammarChecker.getTextFormatter() -oGCE = oGrammarChecker.getGCEngine() +grammalecte.load("Server") +oSpellChecker = grammalecte.getSpellChecker() def parseText (sText, dOptions=None, bFormatText=False, sError=""): "parse and return errors in a JSON format" - sJSON = '{ "program": "grammalecte-fr", "version": "'+oGCE.version+'", "lang": "'+oGCE.lang+'", "error": "'+sError+'", "data" : [\n' + sJSON = '{ "program": "grammalecte-fr", "version": "'+grammalecte.version+'", "lang": "'+grammalecte.lang+'", "error": "'+sError+'", "data" : [\n' sDataJSON = "" for i, sParagraph in enumerate(txt.getParagraph(sText), 1): if bFormatText: - sParagraph = oTextFormatter.formatText(sParagraph) - sResult = oGrammarChecker.getParagraphErrorsAsJSON(i, sParagraph, dOptions=dOptions, bEmptyIfNoErrors=True, bReturnText=bFormatText) + sParagraph = tf.formatText(sParagraph) + sResult = grammalecte.getParagraphErrorsAsJSON(i, sParagraph, dOptions=dOptions, bEmptyIfNoErrors=True, bReturnText=bFormatText) if sResult: if sDataJSON: sDataJSON += ",\n" sDataJSON += sResult sJSON += sDataJSON + "\n]}\n" @@ -185,13 +184,13 @@ @app.route("/get_options/fr") def listOptions (): "returns grammar options in a text JSON format" sUserId = request.cookies.user_id - dOptions = dUser[sUserId]["gc_options"] if sUserId and sUserId in dUser else oGCE.getOptions() + dOptions = dUser[sUserId]["gc_options"] if sUserId and sUserId in dUser else grammalecte.gc_options.getOptions() response.set_header("Content-Type", "application/json; charset=UTF-8") - return '{ "values": ' + json.dumps(dOptions, ensure_ascii=False) + ', "labels": ' + json.dumps(oGCE.getOptionsLabels("fr"), ensure_ascii=False) + ' }' + return '{ "values": ' + json.dumps(dOptions, ensure_ascii=False) + ', "labels": ' + json.dumps(grammalecte.gc_options.getOptionsLabels("fr"), ensure_ascii=False) + ' }' @app.route("/suggest/fr/") def suggestGet (token): response.set_header("Content-Type", "application/json; charset=UTF-8") try: @@ -217,11 +216,11 @@ response.set_cookie("user_id", request.cookies.user_id, path="/", max_age=86400) # we renew cookie for 24h else: response.delete_cookie("user_id", path="/") if request.forms.options: try: - dUserOptions = dict(oGCE.getOptions()) if not dUserOptions else dict(dUserOptions) + dUserOptions = grammalecte.gc_options.getOptions() if not dUserOptions else dict(dUserOptions) dUserOptions.update(json.loads(request.forms.options)) except (TypeError, json.JSONDecodeError): sError = "Request options not used." response.set_header("Content-Type", "application/json; charset=UTF-8") try: @@ -237,11 +236,11 @@ def setOptions (): "set grammar options for current user" response.set_header("Content-Type", "application/json; charset=UTF-8") if request.forms.options: sUserId = request.cookies.user_id if request.cookies.user_id else next(userGenerator) - dOptions = dUser[sUserId]["gc_options"] if sUserId in dUser else dict(oGCE.getOptions()) + dOptions = dUser[sUserId]["gc_options"] if sUserId in dUser else grammalecte.gc_options.getOptions() try: dOptions.update(json.loads(request.forms.options)) dUser[sUserId] = { "time": int(time.time()), "gc_options": dOptions } response.set_cookie("user_id", sUserId, path="/", max_age=86400) # 24h return json.dumps(dUser[sUserId]["gc_options"], ensure_ascii=False) @@ -262,11 +261,11 @@ return '{"message" : "Done."}' @app.route("/format_text/fr", method="POST") def formatText (): "apply the text formatter and returns text" - return oTextFormatter.formatText(request.forms.text) + return tf.formatText(request.forms.text) #@app.route('/static/') #def server_static (filepath): # return static_file(filepath, root='./views/static') @@ -314,20 +313,20 @@ if bTestPage: TESTPAGE = True HOMEPAGE = HOMEPAGE.replace("{SERVER_PORT}", str(nPort)) if dOptions: - oGCE.setOptions(dOptions) + grammalecte.gc_options.setOptions(dOptions) # Python version print("Python: " + sys.version) if sys.version < "3.7": print("Python 3.7+ required") return # Grammalecte - echo("Grammalecte v{}".format(oGCE.version)) - oGCE.displayOptions() + echo("Grammalecte v{}".format(grammalecte.version)) + grammalecte.gc_options.displayOptions() # Process Pool Executor initExecutor(nMultiCPU) # Server (Bottle) run(app, host=sHost, port=nPort) ADDED graphspell-js/lexgraph_fr.js Index: graphspell-js/lexgraph_fr.js ================================================================== --- /dev/null +++ graphspell-js/lexgraph_fr.js @@ -0,0 +1,726 @@ +// JavaScript + +"use strict"; + + +${string} +${map} + + +//// Default Suggestions + +const _dSugg = new Map ([ + ["bcp", "beaucoup"], + ["ca", "ça"], + ["cad", "c’est-à-dire"], + ["cb", "combien|CB"], + ["cdlt", "cordialement"], + ["construirent", "construire|construisirent|construisent|construiront"], + ["càd", "c’est-à-dire"], + ["chai", "j’sais|je sais"], + ["chais", "j’sais|je sais"], + ["chui", "j’suis|je suis"], + ["chuis", "j’suis|je suis"], + ["dc", "de|donc"], + ["done", "donc|donne"], + ["email", "courriel|e-mail|émail"], + ["emails", "courriels|e-mails"], + ["ete", "êtes|été"], + ["Etes-vous", "Êtes-vous"], + ["Etiez-vous", "Étiez-vous"], + ["Etions-vous", "Étions-nous"], + ["loins", "loin"], + ["mn", "min"], + ["mns", "min"], + ["parce-que", "parce que"], + ["pcq", "parce que"], + ["pd", "pendant|pédé"], + ["pdq", "pendant que"], + ["pdt", "pendant"], + ["pdtq", "pendant que"], + ["pécunier", "pécuniaire"], + ["pécuniers", "pécuniaires"], + ["pk", "pourquoi"], + ["pkoi", "pourquoi"], + ["pq", "pourquoi|PQ"], + ["prq", "presque"], + ["prsq", "presque"], + ["qcq", "quiconque"], + ["qd", "quand"], + ["qq", "quelque"], + ["qqch", "quelque chose"], + ["qqn", "quelqu’un"], + ["qqne", "quelqu’une"], + ["qqs", "quelques"], + ["qqunes", "quelques-unes"], + ["qquns", "quelques-uns"], + ["tdq", "tandis que"], + ["tj", "toujours"], + ["tjs", "toujours"], + ["tq", "tant que|tandis que"], + ["ts", "tous"], + ["tt", "tant|tout"], + ["tte", "toute"], + ["ttes", "toutes"], + + ["Iier", "Iᵉʳ"], + ["Iière", "Iʳᵉ"], + ["IIième", "IIᵉ"], + ["IIIième", "IIIᵉ"], + ["IVième", "IVᵉ"], + ["Vième", "Vᵉ"], + ["VIième", "VIᵉ"], + ["VIIième", "VIIᵉ"], + ["VIIIième", "VIIIᵉ"], + ["IXième", "IXᵉ"], + ["Xième", "Xᵉ"], + ["XIième", "XIᵉ"], + ["XIIième", "XIIᵉ"], + ["XIIIième", "XIIIᵉ"], + ["XIVième", "XIVᵉ"], + ["XVième", "XVᵉ"], + ["XVIième", "XVIᵉ"], + ["XVIIième", "XVIIᵉ"], + ["XVIIIième", "XVIIIᵉ"], + ["XIXième", "XIXᵉ"], + ["XXième", "XXᵉ"], + ["XXIième", "XXIᵉ"], + ["XXIIième", "XXIIᵉ"], + ["XXIIIième", "XXIIIᵉ"], + ["XXIVième", "XXIVᵉ"], + ["XXVième", "XXVᵉ"], + ["XXVIième", "XXVIᵉ"], + ["XXVIIième", "XXVIIᵉ"], + ["XXVIIIième", "XXVIIIᵉ"], + ["XXIXième", "XXIXᵉ"], + ["XXXième", "XXXᵉ"], + ["Ier", "Iᵉʳ"], + ["Ière", "Iʳᵉ"], + ["IIème", "IIᵉ"], + ["IIIème", "IIIᵉ"], + ["IVème", "IVᵉ"], + ["Vème", "Vᵉ"], + ["VIème", "VIᵉ"], + ["VIIème", "VIIᵉ"], + ["VIIIème", "VIIIᵉ"], + ["IXème", "IXᵉ"], + ["Xème", "Xᵉ"], + ["XIème", "XIᵉ"], + ["XIIème", "XIIᵉ"], + ["XIIIème", "XIIIᵉ"], + ["XIVème", "XIVᵉ"], + ["XVème", "XVᵉ"], + ["XVIème", "XVIᵉ"], + ["XVIIème", "XVIIᵉ"], + ["XVIIIème", "XVIIIᵉ"], + ["XIXème", "XIXᵉ"], + ["XXème", "XXᵉ"], + ["XXIème", "XXIᵉ"], + ["XXIIème", "XXIIᵉ"], + ["XXIIIème", "XXIIIᵉ"], + ["XXIVème", "XXIVᵉ"], + ["XXVème", "XXVᵉ"], + ["XXVIème", "XXVIᵉ"], + ["XXVIIème", "XXVIIᵉ"], + ["XXVIIIème", "XXVIIIᵉ"], + ["XXIXème", "XXIXᵉ"], + ["XXXème", "XXXᵉ"] +]); + + + +//// Lexicographer + +var lexgraph_fr = { + + dSugg: _dSugg, + + dTag: new Map([ + [':N', [" nom,", "Nom"]], + [':A', [" adjectif,", "Adjectif"]], + [':M1', [" prénom,", "Prénom"]], + [':M2', [" patronyme,", "Patronyme, matronyme, nom de famille…"]], + [':MP', [" nom propre,", "Nom propre"]], + [':W', [" adverbe,", "Adverbe"]], + [':J', [" interjection,", "Interjection"]], + [':B', [" nombre,", "Nombre"]], + [':T', [" titre,", "Titre de civilité"]], + + [':e', [" épicène", "épicène"]], + [':m', [" masculin", "masculin"]], + [':f', [" féminin", "féminin"]], + [':s', [" singulier", "singulier"]], + [':p', [" pluriel", "pluriel"]], + [':i', [" invariable", "invariable"]], + + [':V1', [" verbe (1ᵉʳ gr.),", "Verbe du 1ᵉʳ groupe"]], + [':V2', [" verbe (2ᵉ gr.),", "Verbe du 2ᵉ groupe"]], + [':V3', [" verbe (3ᵉ gr.),", "Verbe du 3ᵉ groupe"]], + [':V0e', [" verbe,", "Verbe auxiliaire être"]], + [':V0a', [" verbe,", "Verbe auxiliaire avoir"]], + + [':Y', [" infinitif,", "infinitif"]], + [':P', [" participe présent,", "participe présent"]], + [':Q', [" participe passé,", "participe passé"]], + [':Ip', [" présent,", "indicatif présent"]], + [':Iq', [" imparfait,", "indicatif imparfait"]], + [':Is', [" passé simple,", "indicatif passé simple"]], + [':If', [" futur,", "indicatif futur"]], + [':K', [" conditionnel présent,", "conditionnel présent"]], + [':Sp', [" subjonctif présent,", "subjonctif présent"]], + [':Sq', [" subjonctif imparfait,", "subjonctif imparfait"]], + [':E', [" impératif,", "impératif"]], + + [':1s', [" 1ʳᵉ p. sg.,", "verbe : 1ʳᵉ personne du singulier"]], + [':1ŝ', [" présent interr. 1ʳᵉ p. sg.,", "verbe : 1ʳᵉ personne du singulier (présent interrogatif)"]], + [':1ś', [" présent interr. 1ʳᵉ p. sg.,", "verbe : 1ʳᵉ personne du singulier (présent interrogatif)"]], + [':2s', [" 2ᵉ p. sg.,", "verbe : 2ᵉ personne du singulier"]], + [':3s', [" 3ᵉ p. sg.,", "verbe : 3ᵉ personne du singulier"]], + [':1p', [" 1ʳᵉ p. pl.,", "verbe : 1ʳᵉ personne du pluriel"]], + [':2p', [" 2ᵉ p. pl.,", "verbe : 2ᵉ personne du pluriel"]], + [':3p', [" 3ᵉ p. pl.,", "verbe : 3ᵉ personne du pluriel"]], + [':3p!', [" 3ᵉ p. pl.,", "verbe : 3ᵉ personne du pluriel (prononciation distinctive)"]], + + [':G', ["[mot grammatical]", "Mot grammatical"]], + [':X', [" adverbe de négation,", "Adverbe de négation"]], + [':U', [" adverbe interrogatif,", "Adverbe interrogatif"]], + [':R', [" préposition,", "Préposition"]], + [':Rv', [" préposition verbale,", "Préposition verbale"]], + [':D', [" déterminant,", "Déterminant"]], + [':Dd', [" déterminant démonstratif,", "Déterminant démonstratif"]], + [':De', [" déterminant exclamatif,", "Déterminant exclamatif"]], + [':Dp', [" déterminant possessif,", "Déterminant possessif"]], + [':Di', [" déterminant indéfini,", "Déterminant indéfini"]], + [':Dn', [" déterminant négatif,", "Déterminant négatif"]], + [':Od', [" pronom démonstratif,", "Pronom démonstratif"]], + [':Oi', [" pronom indéfini,", "Pronom indéfini"]], + [':On', [" pronom indéfini négatif,", "Pronom indéfini négatif"]], + [':Ot', [" pronom interrogatif,", "Pronom interrogatif"]], + [':Or', [" pronom relatif,", "Pronom relatif"]], + [':Ow', [" pronom adverbial,", "Pronom adverbial"]], + [':Os', [" pronom personnel sujet,", "Pronom personnel sujet"]], + [':Oo', [" pronom personnel objet,", "Pronom personnel objet"]], + [':Ov', [" préverbe,", "Préverbe (pronom personnel objet, +ne)"]], + [':O1', [" 1ʳᵉ pers.,", "Pronom : 1ʳᵉ personne"]], + [':O2', [" 2ᵉ pers.,", "Pronom : 2ᵉ personne"]], + [':O3', [" 3ᵉ pers.,", "Pronom : 3ᵉ personne"]], + [':C', [" conjonction,", "Conjonction"]], + [':Ĉ', [" conjonction (él.),", "Conjonction (élément)"]], + [':Cc', [" conjonction de coordination,", "Conjonction de coordination"]], + [':Cs', [" conjonction de subordination,", "Conjonction de subordination"]], + [':Ĉs', [" conjonction de subordination (él.),", "Conjonction de subordination (élément)"]], + + [':Ñ', [" locution nominale (él.),", "Locution nominale (élément)"]], + [':Â', [" locution adjectivale (él.),", "Locution adjectivale (élément)"]], + [':Ṽ', [" locution verbale (él.),", "Locution verbale (élément)"]], + [':Ŵ', [" locution adverbiale (él.),", "Locution adverbiale (élément)"]], + [':Ŕ', [" locution prépositive (él.),", "Locution prépositive (élément)"]], + [':Ĵ', [" locution interjective (él.),", "Locution interjective (élément)"]], + + [':Zp', [" préfixe,", "Préfixe"]], + [':Zs', [" suffixe,", "Suffixe"]], + + [':H', ["", ""]], + + [':@', ["", ""]], + [':@p', ["signe de ponctuation", "Signe de ponctuation"]], + [':@s', ["signe", "Signe divers"]], + + [';S', [" : symbole (unité de mesure)", "Symbole (unité de mesure)"]], + + ['/*', ["", "Sous-dictionnaire "]], + ['/C', [" ", "Sous-dictionnaire "]], + ['/M', ["", "Sous-dictionnaire "]], + ['/R', [" ", "Sous-dictionnaire "]], + ['/A', ["", "Sous-dictionnaire "]], + ['/X', ["", "Sous-dictionnaire "]] + ]), + + dLocTag: new Map([ + [':L', "locution"], + [':LN', "locution nominale"], + [':LA', "locution adjectivale"], + [':LV', "locution verbale"], + [':LW', "locution adverbiale"], + [':LR', "locution prépositive"], + [':LRv', "locution prépositive verbale"], + [':LO', "locution pronominale"], + [':LC', "locution conjonctive"], + [':LJ', "locution interjective"], + + [':B', " cardinale"], + [':e', " épicène"], + [':m', " masculine"], + [':f', " féminine"], + [':s', " singulière"], + [':p', " plurielle"], + [':i', " invariable"], + ['/L', " (latin)"] + ]), + + dLocVerb: new Map([ + ['i', " intransitive"], + ['n', " transitive indirecte"], + ['t', " transitive directe"], + ['p', " pronominale"], + ['m', " impersonnelle"], + ]), + + dElidedPrefix: new Map([ + ['d', "(de), déterminant épicène invariable"], + ['l', "(le/la), déterminant masculin/féminin singulier"], + ['j', "(je), pronom personnel sujet, 1ʳᵉ pers., épicène singulier"], + ['m', "(me), pronom personnel objet, 1ʳᵉ pers., épicène singulier"], + ['t', "(te), pronom personnel objet, 2ᵉ pers., épicène singulier"], + ['s', "(se), pronom personnel objet, 3ᵉ pers., épicène singulier/pluriel"], + ['n', "(ne), adverbe de négation"], + ['c', "(ce), pronom démonstratif, masculin singulier/pluriel"], + ['ç', "(ça), pronom démonstratif, masculin singulier"], + ['qu', "(que), conjonction de subordination"], + ['lorsqu', "(lorsque), conjonction de subordination"], + ['puisqu', "(lorsque), conjonction de subordination"], + ['quoiqu', "(quoique), conjonction de subordination"], + ['jusqu', "(jusque), préposition"] + ]), + + dPronoms: new Map([ + ['je', " pronom personnel sujet, 1ʳᵉ pers. sing."], + ['tu', " pronom personnel sujet, 2ᵉ pers. sing."], + ['il', " pronom personnel sujet, 3ᵉ pers. masc. sing."], + ['on', " pronom personnel sujet, 3ᵉ pers. sing. ou plur."], + ['elle', " pronom personnel sujet, 3ᵉ pers. fém. sing."], + ['nous', " pronom personnel sujet/objet, 1ʳᵉ pers. plur."], + ['vous', " pronom personnel sujet/objet, 2ᵉ pers. plur."], + ['ils', " pronom personnel sujet, 3ᵉ pers. masc. plur."], + ['elles', " pronom personnel sujet, 3ᵉ pers. masc. plur."], + + ["là", " particule démonstrative"], + ["ci", " particule démonstrative"], + + ['le', " COD, masc. sing."], + ['la', " COD, fém. sing."], + ['les', " COD, plur."], + + ['moi', " COI (à moi), sing."], + ['toi', " COI (à toi), sing."], + ['lui', " COI (à lui ou à elle), sing."], + ['nous2', " COI (à nous), plur."], + ['vous2', " COI (à vous), plur."], + ['leur', " COI (à eux ou à elles), plur."], + + ['y', " pronom adverbial"], + ["m'y", " (me) pronom personnel objet + (y) pronom adverbial"], + ["t'y", " (te) pronom personnel objet + (y) pronom adverbial"], + ["s'y", " (se) pronom personnel objet + (y) pronom adverbial"], + + ['en', " pronom adverbial"], + ["m'en", " (me) pronom personnel objet + (en) pronom adverbial"], + ["t'en", " (te) pronom personnel objet + (en) pronom adverbial"], + ["s'en", " (se) pronom personnel objet + (en) pronom adverbial"] + ]), + + dChar: new Map([ + ['.', "point"], + ['·', "point médian"], + ['…', "points de suspension"], + [':', "deux-points"], + [';', "point-virgule"], + [',', "virgule"], + ['?', "point d’interrogation"], + ['!', "point d’exclamation"], + ['(', "parenthèse ouvrante"], + [')', "parenthèse fermante"], + ['[', "crochet ouvrant"], + [']', "crochet fermant"], + ['{', "accolade ouvrante"], + ['}', "accolade fermante"], + ['-', "tiret"], + ['—', "tiret cadratin"], + ['–', "tiret demi-cadratin"], + ['«', "guillemet ouvrant (chevrons)"], + ['»', "guillemet fermant (chevrons)"], + ['“', "guillemet ouvrant double"], + ['”', "guillemet fermant double"], + ['‘', "guillemet ouvrant"], + ['’', "guillemet fermant"], + ['"', "guillemets droits (déconseillé en typographie)"], + ['/', "signe de la division"], + ['+', "signe de l’addition"], + ['*', "signe de la multiplication"], + ['=', "signe de l’égalité"], + ['<', "inférieur à"], + ['>', "supérieur à"], + ['⩽', "inférieur ou égal à"], + ['⩾', "supérieur ou égal à"], + ['%', "signe de pourcentage"], + ['‰', "signe pour mille"], + ]), + + oSpellChecker: null, + oTokenizer: null, + oLocGraph: null, + + _zPartDemForm: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)-(là|ci)$", "i"), + _aPartDemExceptList: new Set(["celui", "celle", "ceux", "celles", "de", "jusque", "par", "marie-couche-toi"]), + _zInterroVerb: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)-(t-(?:il|elle|on)|je|tu|ils?|elles?|on|[nv]ous)$", "i"), + _zImperatifVerb: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)-((?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts][’'](?:y|en)|les?|la|[mt]oi|leur|lui)$", "i"), + _zTag: new RegExp("[:;/][a-zA-Z0-9ÑÂĴĈŔÔṼŴ!][^:;/]*", "g"), + + + load: function (oSpellChecker, oTokenizer, oLocGraph) { + this.oSpellChecker = oSpellChecker; + this.oTokenizer = oTokenizer; + this.oLocGraph = JSON.parse(oLocGraph); + }, + + getInfoForToken: function (oToken) { + // Token: .sType, .sValue, .nStart, .nEnd + // return a object {sType, sValue, aLabel} + let m = null; + try { + switch (oToken.sType) { + case 'PUNC': + case 'SIGN': + return { + sType: oToken.sType, + sValue: oToken.sValue, + aLabel: [this.dChar.gl_get(oToken.sValue, "caractère indéterminé")] + }; + break; + case 'NUM': + return { + sType: oToken.sType, + sValue: oToken.sValue, + aLabel: ["nombre"] + }; + break; + case 'LINK': + return { + sType: oToken.sType, + sValue: oToken.sValue.slice(0, 40) + "…", + aLabel: ["hyperlien"] + }; + break; + case 'TAG': + return { + sType: oToken.sType, + sValue: oToken.sValue, + aLabel: ["étiquette (hashtag)"] + }; + break; + case 'HTML': + return { + sType: oToken.sType, + sValue: oToken.sValue.slice(0, 40) + "…", + aLabel: ["balise HTML"] + }; + break; + case 'PSEUDOHTML': + return { + sType: oToken.sType, + sValue: oToken.sValue, + aLabel: ["balise pseudo-HTML"] + }; + break; + case 'HTMLENTITY': + return { + sType: oToken.sType, + sValue: oToken.sValue, + aLabel: ["entité caractère XML/HTML"] + }; + break; + case 'HOUR': + return { + sType: oToken.sType, + sValue: oToken.sValue, + aLabel: ["heure"] + }; + break; + case 'WORD_ELIDED': + let sTemp = oToken.sValue.replace("’", "").replace("'", "").replace("`", "").toLowerCase(); + return { + sType: oToken.sType, + sValue: oToken.sValue, + aLabel: [this.dElidedPrefix.gl_get(sTemp, "préfixe élidé inconnu")] + }; + break; + case 'WORD_ORDINAL': + return { + sType: oToken.sType, + sValue: oToken.sValue, + aLabel: ["nombre ordinal"] + }; + break; + case 'FOLDERUNIX': + return { + sType: oToken.sType, + sValue: oToken.sValue.slice(0, 40) + "…", + aLabel: ["dossier UNIX (et dérivés)"] + }; + break; + case 'FOLDERWIN': + return { + sType: oToken.sType, + sValue: oToken.sValue.slice(0, 40) + "…", + aLabel: ["dossier Windows"] + }; + break; + case 'WORD_ACRONYM': + return { + sType: oToken.sType, + sValue: oToken.sValue, + aLabel: ["Sigle ou acronyme"] + }; + break; + case 'WORD': + if (oToken.sValue.gl_count("-") > 4) { + return { + sType: "COMPLEX", + sValue: oToken.sValue, + aLabel: ["élément complexe indéterminé"] + }; + } else if (m = this._zPartDemForm.exec(oToken.sValue)) { + // mots avec particules démonstratives + if (this._aPartDemExceptList.has(m[1].toLowerCase())) { + return { + sType: "WORD", + sValue: oToken.sValue, + aLabel: this._getMorph(oToken.sValue) + }; + } + return { + sType: oToken.sType, + sValue: oToken.sValue, + aLabel: ["mot avec particule démonstrative"], + aSubElem: [ + { sType: oToken.sType, sValue: m[1], aLabel: this._getMorph(m[1]) }, + { sType: oToken.sType, sValue: "-" + m[2], aLabel: [this._formatSuffix(m[2].toLowerCase())] } + ] + }; + } else if (m = this._zImperatifVerb.exec(oToken.sValue)) { + // formes interrogatives + return { + sType: oToken.sType, + sValue: oToken.sValue, + aLabel: ["forme verbale impérative"], + aSubElem: [ + { sType: oToken.sType, sValue: m[1], aLabel: this._getMorph(m[1]) }, + { sType: oToken.sType, sValue: "-" + m[2], aLabel: [this._formatSuffix(m[2].toLowerCase())] } + ] + }; + } else if (m = this._zInterroVerb.exec(oToken.sValue)) { + // formes interrogatives + return { + sType: oToken.sType, + sValue: oToken.sValue, + aLabel: ["forme verbale interrogative"], + aSubElem: [ + { sType: oToken.sType, sValue: m[1], aLabel: this._getMorph(m[1]) }, + { sType: oToken.sType, sValue: "-" + m[2], aLabel: [this._formatSuffix(m[2].toLowerCase())] } + ] + }; + } else if (this.oSpellChecker.isValidToken(oToken.sValue)) { + return { + sType: oToken.sType, + sValue: oToken.sValue, + aLabel: this._getMorph(oToken.sValue) + }; + } else { + return { + sType: "UNKNOWN_WORD", + sValue: oToken.sValue, + aLabel: ["mot inconnu du dictionnaire"] + }; + } + break; + default: + return { + sType: oToken.sType, + sValue: oToken.sValue, + aLabel: ["token inconnu"] + } + } + } catch (e) { + console.error(e); + } + return null; + }, + + _getMorph (sWord) { + let aElem = []; + for (let s of this.oSpellChecker.getMorph(sWord)) { + if (s.includes(":")) aElem.push(this._formatTags(s)); + } + if (aElem.length == 0) { + aElem.push("mot inconnu du dictionnaire"); + } + return aElem; + }, + + _formatTags (sTags) { + let sRes = ""; + sTags = sTags.replace(/V([0-3][ea]?)[itpqnmr_eaxz]+/, "V$1"); + let m; + while ((m = this._zTag.exec(sTags)) !== null) { + sRes += this.dTag.get(m[0])[0]; + } + if (sRes.startsWith(" verbe") && !sRes.includes("infinitif")) { + sRes += " [" + sTags.slice(1, sTags.indexOf("/")) + "]"; + } + if (!sRes) { + return "#Erreur. Étiquette inconnue : [" + sTags + "]"; + } + return sRes.gl_trimRight(","); + }, + + _formatTagsLoc (sTags) { + let sRes = ""; + let m; + while ((m = this._zTag.exec(sTags)) !== null) { + if (m[0].startsWith(":LV")) { + sRes += this.dLocTag.get(":LV"); + for (let c of m[0].slice(3)) { + sRes += this.dLocVerb.get(c); + } + } else { + sRes += this.dLocTag.get(m[0]); + } + } + if (!sRes) { + return "#Erreur. Étiquette inconnue : [" + sTags + "]"; + } + return sRes.gl_trimRight(","); + }, + + _formatSuffix (s) { + if (s.startsWith("t-")) { + return "“t” euphonique +" + this.dPronoms.get(s.slice(2)); + } + if (!s.includes("-")) { + return this.dPronoms.get(s.replace("’", "'")); + } + if (s.endsWith("ous")) { + s += '2'; + } + let nPos = s.indexOf("-"); + return this.dPronoms.get(s.slice(0, nPos)) + " +" + this.dPronoms.get(s.slice(nPos + 1)); + }, + + getListOfTokens (sText, bInfo=true) { + let aElem = []; + if (sText !== "") { + for (let oToken of this.oTokenizer.genTokens(sText)) { + if (bInfo) { + let aRes = this.getInfoForToken(oToken); + if (aRes) { + aElem.push(aRes); + } + } else if (oToken.sType !== "SPACE") { + aElem.push(oToken); + } + } + } + return aElem; + }, + + * generateInfoForTokenList (lToken) { + for (let oToken of lToken) { + let aRes = this.getInfoForToken(oToken); + if (aRes) { + yield aRes; + } + } + }, + + getListOfTokensReduc (sText, bInfo=true) { + let lToken = this.getListOfTokens(sText.replace("'", "’").trim(), false); + let iToken = 0; + let aElem = []; + if (lToken.length == 0) { + return aElem; + } + do { + let oToken = lToken[iToken]; + let sMorphLoc = ''; + let aTokenTempList = [oToken]; + if (oToken.sType == "WORD" || oToken.sType == "WORD_ELIDED"){ + let iLocEnd = iToken + 1; + let oLocNode = this.oLocGraph[oToken.sValue.toLowerCase()]; + while (oLocNode) { + let oTokenNext = lToken[iLocEnd]; + iLocEnd++; + if (oTokenNext) { + oLocNode = oLocNode[oTokenNext.sValue.toLowerCase()]; + } + if (oLocNode && iLocEnd <= lToken.length) { + sMorphLoc = oLocNode["_:_"]; + aTokenTempList.push(oTokenNext); + } else { + break; + } + } + } + + if (sMorphLoc) { + // we have a locution + let sValue = ''; + for (let oTokenWord of aTokenTempList) { + sValue += oTokenWord.sValue+' '; + } + let oTokenLocution = { + 'nStart': aTokenTempList[0].nStart, + 'nEnd': aTokenTempList[aTokenTempList.length-1].nEnd, + 'sType': "LOC", + 'sValue': sValue.replace('’ ','’').trim(), + 'aSubToken': aTokenTempList + }; + if (bInfo) { + let aSubElem = null; + if (sMorphLoc.startsWith("*|")) { + // cette suite de tokens n’est une locution que dans certains cas minoritaires + oTokenLocution.sType = "LOCP"; + for (let oElem of this.generateInfoForTokenList(aTokenTempList)) { + aElem.push(oElem); + } + sMorphLoc = sMorphLoc.slice(2); + } else { + aSubElem = [...this.generateInfoForTokenList(aTokenTempList)]; + } + // cette suite de tokens est la plupart du temps une locution + let aFormatedTag = []; + for (let sTagLoc of sMorphLoc.split('|') ){ + aFormatedTag.push(this._formatTagsLoc(sTagLoc)); + } + aElem.push({ + sType: oTokenLocution.sType, + sValue: oTokenLocution.sValue, + aLabel: aFormatedTag, + aSubElem: aSubElem + }); + } else { + aElem.push(oTokenLocution); + } + iToken = iToken + aTokenTempList.length; + } + else { + // No locution, we just add information + if (bInfo) { + let aRes = this.getInfoForToken(oToken); + if (aRes) { + aElem.push(aRes); + } + } else { + aElem.push(oToken); + } + iToken++; + } + } while (iToken < lToken.length); + return aElem; + } +} + + + +if (typeof(exports) !== 'undefined') { + exports.lexgraph_fr = lexgraph_fr; +} Index: graphspell-js/spellchecker.js ================================================================== --- graphspell-js/spellchecker.js +++ graphspell-js/spellchecker.js @@ -11,18 +11,17 @@ /* jslint esversion:6 */ /* global require, exports, console, IBDAWG, Tokenizer */ "use strict"; -if(typeof(process) !== 'undefined') { +if (typeof(process) !== 'undefined') { var ibdawg = require("./ibdawg.js"); var tokenizer = require("./tokenizer.js"); - var suggest = require("./suggest.js"); -} else if (typeof(require) !== 'undefined') { +} +else if (typeof(require) !== 'undefined') { var ibdawg = require("resource://grammalecte/graphspell/ibdawg.js"); var tokenizer = require("resource://grammalecte/graphspell/tokenizer.js"); - var suggest = require("resource://grammalecte/graphspell/suggest.js"); } ${map} @@ -44,13 +43,13 @@ this.oCommunityDic = this._loadDictionary(communityDic, sPath); this.oPersonalDic = this._loadDictionary(personalDic, sPath); this.bCommunityDic = Boolean(this.oCommunityDic); this.bPersonalDic = Boolean(this.oPersonalDic); this.oTokenizer = null; - // Default suggestions - this.dDefaultSugg = null; - this.loadSuggestions(sLangCode) + // Lexicographer + this.lexicographer = null; + this.loadLexicographer(sLangCode) // storage this.bStorage = false; this._dMorphologies = new Map(); // key: flexion, value: list of morphologies this._dLemmas = new Map(); // key: flexion, value: list of lemmas } @@ -129,22 +128,23 @@ deactivatePersonalDictionary () { this.bPersonalDic = false; } - // Default suggestions + // Lexicographer - loadSuggestions (sLangCode) { + loadLexicographer (sLangCode) { // load default suggestion module for - // When “import” works everywhere, do like with Python - try { - if (typeof(suggest) !== 'undefined') { - this.dDefaultSugg = suggest[sLangCode]; - } - } - catch (e) { - console.error(e); + if (typeof(process) !== 'undefined') { + this.lexicographer = require(`./lexgraph_${sLangCode}.js`); + } + else if (typeof(require) !== 'undefined') { + this.lexicographer = require(`resource://grammalecte/graphspell/lexgraph_${sLangCode}.js`); + } + else if (self.hasOwnProperty("lexgraph_"+sLangCode)) { + console.log(self); + this.lexicographer = self["lexgraph_"+sLangCode]; } } // Storage @@ -253,15 +253,15 @@ return Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); }))); } * suggest (sWord, nSuggLimit=10) { // generator: returns 1, 2 or 3 lists of suggestions - if (this.dDefaultSugg) { - if (this.dDefaultSugg.has(sWord)) { - yield this.dDefaultSugg.get(sWord).split("|"); - } else if (sWord.gl_isTitle() && this.dDefaultSugg.has(sWord.toLowerCase())) { - let lRes = this.dDefaultSugg.get(sWord.toLowerCase()).split("|"); + if (this.lexicographer) { + if (this.lexicographer.dSugg.has(sWord)) { + yield this.lexicographer.dSugg.get(sWord).split("|"); + } else if (sWord.gl_isTitle() && this.lexicographer.dSugg.has(sWord.toLowerCase())) { + let lRes = this.lexicographer.dSugg.get(sWord.toLowerCase()).split("|"); yield lRes.map((sSugg) => { return sSugg.slice(0,1).toUpperCase() + sSugg.slice(1); }); } else { yield this.oMainDic.suggest(sWord, nSuggLimit, true); } } else { DELETED graphspell-js/suggest.js Index: graphspell-js/suggest.js ================================================================== --- graphspell-js/suggest.js +++ /dev/null @@ -1,128 +0,0 @@ -// JavaScript - -"use strict"; - -var suggest = { - fr: new Map ([ - ["bcp", "beaucoup"], - ["ca", "ça"], - ["cad", "c’est-à-dire"], - ["cb", "combien|CB"], - ["cdlt", "cordialement"], - ["construirent", "construire|construisirent|construisent|construiront"], - ["càd", "c’est-à-dire"], - ["chai", "j’sais|je sais"], - ["chais", "j’sais|je sais"], - ["chui", "j’suis|je suis"], - ["chuis", "j’suis|je suis"], - ["dc", "de|donc"], - ["done", "donc|donne"], - ["email", "courriel|e-mail|émail"], - ["emails", "courriels|e-mails"], - ["ete", "êtes|été"], - ["Etes-vous", "Êtes-vous"], - ["Etiez-vous", "Étiez-vous"], - ["Etions-vous", "Étions-nous"], - ["loins", "loin"], - ["mn", "min"], - ["mns", "min"], - ["parce-que", "parce que"], - ["pcq", "parce que"], - ["pd", "pendant|pédé"], - ["pdq", "pendant que"], - ["pdt", "pendant"], - ["pdtq", "pendant que"], - ["pécunier", "pécuniaire"], - ["pécuniers", "pécuniaires"], - ["pk", "pourquoi"], - ["pkoi", "pourquoi"], - ["pq", "pourquoi|PQ"], - ["prq", "presque"], - ["prsq", "presque"], - ["qcq", "quiconque"], - ["qd", "quand"], - ["qq", "quelque"], - ["qqch", "quelque chose"], - ["qqn", "quelqu’un"], - ["qqne", "quelqu’une"], - ["qqs", "quelques"], - ["qqunes", "quelques-unes"], - ["qquns", "quelques-uns"], - ["tdq", "tandis que"], - ["tj", "toujours"], - ["tjs", "toujours"], - ["tq", "tant que|tandis que"], - ["ts", "tous"], - ["tt", "tant|tout"], - ["tte", "toute"], - ["ttes", "toutes"], - - ["Iier", "Iᵉʳ"], - ["Iière", "Iʳᵉ"], - ["IIième", "IIᵉ"], - ["IIIième", "IIIᵉ"], - ["IVième", "IVᵉ"], - ["Vième", "Vᵉ"], - ["VIième", "VIᵉ"], - ["VIIième", "VIIᵉ"], - ["VIIIième", "VIIIᵉ"], - ["IXième", "IXᵉ"], - ["Xième", "Xᵉ"], - ["XIième", "XIᵉ"], - ["XIIième", "XIIᵉ"], - ["XIIIième", "XIIIᵉ"], - ["XIVième", "XIVᵉ"], - ["XVième", "XVᵉ"], - ["XVIième", "XVIᵉ"], - ["XVIIième", "XVIIᵉ"], - ["XVIIIième", "XVIIIᵉ"], - ["XIXième", "XIXᵉ"], - ["XXième", "XXᵉ"], - ["XXIième", "XXIᵉ"], - ["XXIIième", "XXIIᵉ"], - ["XXIIIième", "XXIIIᵉ"], - ["XXIVième", "XXIVᵉ"], - ["XXVième", "XXVᵉ"], - ["XXVIième", "XXVIᵉ"], - ["XXVIIième", "XXVIIᵉ"], - ["XXVIIIième", "XXVIIIᵉ"], - ["XXIXième", "XXIXᵉ"], - ["XXXième", "XXXᵉ"], - ["Ier", "Iᵉʳ"], - ["Ière", "Iʳᵉ"], - ["IIème", "IIᵉ"], - ["IIIème", "IIIᵉ"], - ["IVème", "IVᵉ"], - ["Vème", "Vᵉ"], - ["VIème", "VIᵉ"], - ["VIIème", "VIIᵉ"], - ["VIIIème", "VIIIᵉ"], - ["IXème", "IXᵉ"], - ["Xème", "Xᵉ"], - ["XIème", "XIᵉ"], - ["XIIème", "XIIᵉ"], - ["XIIIème", "XIIIᵉ"], - ["XIVème", "XIVᵉ"], - ["XVème", "XVᵉ"], - ["XVIème", "XVIᵉ"], - ["XVIIème", "XVIIᵉ"], - ["XVIIIème", "XVIIIᵉ"], - ["XIXème", "XIXᵉ"], - ["XXème", "XXᵉ"], - ["XXIème", "XXIᵉ"], - ["XXIIème", "XXIIᵉ"], - ["XXIIIème", "XXIIIᵉ"], - ["XXIVème", "XXIVᵉ"], - ["XXVème", "XXVᵉ"], - ["XXVIème", "XXVIᵉ"], - ["XXVIIème", "XXVIIᵉ"], - ["XXVIIIème", "XXVIIIᵉ"], - ["XXIXème", "XXIXᵉ"], - ["XXXème", "XXXᵉ"] - ]) -}; - - -if (typeof(exports) !== 'undefined') { - exports.fr = suggest.fr; -} DELETED graphspell/fr.py Index: graphspell/fr.py ================================================================== --- graphspell/fr.py +++ /dev/null @@ -1,122 +0,0 @@ -""" -Default suggestion for French language -""" - -dSugg = { - "bcp": "beaucoup", - "ca": "ça", - "cad": "c’est-à-dire", - "cb": "combien|CB", - "cdlt": "cordialement", - "construirent": "construire|construisirent|construisent|construiront", - "càd": "c’est-à-dire", - "chai": "j’sais|je sais", - "chais": "j’sais|je sais", - "chui": "j’suis|je suis", - "chuis": "j’suis|je suis", - "done": "donc|donne", - "dc": "de|donc", - "email": "courriel|e-mail|émail", - "emails": "courriels|e-mails", - "ete": "êtes|été", - "Etes-vous": "Êtes-vous", - "Etiez-vous": "Étiez-vous", - "Etions-nous": "Étions-nous", - "loins": "loin", - "mn": "min", - "mns": "min", - "parce-que": "parce que", - "pcq": "parce que", - "pd": "pendant", - "pdq": "pendant que", - "pdt": "pendant", - "pdtq": "pendant que", - "pécunier": "pécuniaire", - "pécuniers": "pécuniaires", - "pk": "pourquoi", - "pkoi": "pourquoi", - "pq": "pourquoi|PQ", - "prq": "presque", - "prsq": "presque", - "qcq": "quiconque", - "qd": "quand", - "qq": "quelque", - "qqch": "quelque chose", - "qqn": "quelqu’un", - "qqne": "quelqu’une", - "qqs": "quelques", - "qqunes": "quelques-unes", - "qquns": "quelques-uns", - "tdq": "tandis que", - "tj": "toujours", - "tjs": "toujours", - "tq": "tant que|tandis que", - "ts": "tous", - "tt": "tant|tout", - "tte": "toute", - "ttes": "toutes", - "y’a": "y a", - - "Iier": "Iᵉʳ", - "Iière": "Iʳᵉ", - "IIième": "IIᵉ", - "IIIième": "IIIᵉ", - "IVième": "IVᵉ", - "Vième": "Vᵉ", - "VIième": "VIᵉ", - "VIIième": "VIIᵉ", - "VIIIième": "VIIIᵉ", - "IXième": "IXᵉ", - "Xième": "Xᵉ", - "XIième": "XIᵉ", - "XIIième": "XIIᵉ", - "XIIIième": "XIIIᵉ", - "XIVième": "XIVᵉ", - "XVième": "XVᵉ", - "XVIième": "XVIᵉ", - "XVIIième": "XVIIᵉ", - "XVIIIième": "XVIIIᵉ", - "XIXième": "XIXᵉ", - "XXième": "XXᵉ", - "XXIième": "XXIᵉ", - "XXIIième": "XXIIᵉ", - "XXIIIième": "XXIIIᵉ", - "XXIVième": "XXIVᵉ", - "XXVième": "XXVᵉ", - "XXVIième": "XXVIᵉ", - "XXVIIième": "XXVIIᵉ", - "XXVIIIième": "XXVIIIᵉ", - "XXIXième": "XXIXᵉ", - "XXXième": "XXXᵉ", - "Ier": "Iᵉʳ", - "Ière": "Iʳᵉ", - "IIème": "IIᵉ", - "IIIème": "IIIᵉ", - "IVème": "IVᵉ", - "Vème": "Vᵉ", - "VIème": "VIᵉ", - "VIIème": "VIIᵉ", - "VIIIème": "VIIIᵉ", - "IXème": "IXᵉ", - "Xème": "Xᵉ", - "XIème": "XIᵉ", - "XIIème": "XIIᵉ", - "XIIIème": "XIIIᵉ", - "XIVème": "XIVᵉ", - "XVème": "XVᵉ", - "XVIème": "XVIᵉ", - "XVIIème": "XVIIᵉ", - "XVIIIème": "XVIIIᵉ", - "XIXème": "XIXᵉ", - "XXème": "XXᵉ", - "XXIème": "XXIᵉ", - "XXIIème": "XXIIᵉ", - "XXIIIème": "XXIIIᵉ", - "XXIVème": "XXIVᵉ", - "XXVème": "XXVᵉ", - "XXVIème": "XXVIᵉ", - "XXVIIème": "XXVIIᵉ", - "XXVIIIème": "XXVIIIᵉ", - "XXIXème": "XXIXᵉ", - "XXXème": "XXXᵉ" -} ADDED graphspell/lexgraph_fr.py Index: graphspell/lexgraph_fr.py ================================================================== --- /dev/null +++ graphspell/lexgraph_fr.py @@ -0,0 +1,358 @@ +""" +Lexicographer for the French language +""" + +# Note: +# This mode must contains at least: +# : a dictionary for default suggestions. +# : a boolean False +# if the boolean is True, 3 functions are required: +# split(sWord) -> returns a list of string (that will be analyzed) +# analyze(sWord) -> returns a string with the meaning of word +# formatTags(sTags) -> returns a string with the meaning of tags + + +import re + +#### Suggestions + +dSugg = { + "bcp": "beaucoup", + "ca": "ça", + "cad": "c’est-à-dire", + "cb": "combien|CB", + "cdlt": "cordialement", + "construirent": "construire|construisirent|construisent|construiront", + "càd": "c’est-à-dire", + "chai": "j’sais|je sais", + "chais": "j’sais|je sais", + "chui": "j’suis|je suis", + "chuis": "j’suis|je suis", + "done": "donc|donne", + "dc": "de|donc", + "email": "courriel|e-mail|émail", + "emails": "courriels|e-mails", + "ete": "êtes|été", + "Etes-vous": "Êtes-vous", + "Etiez-vous": "Étiez-vous", + "Etions-nous": "Étions-nous", + "loins": "loin", + "mn": "min", + "mns": "min", + "parce-que": "parce que", + "pcq": "parce que", + "pd": "pendant", + "pdq": "pendant que", + "pdt": "pendant", + "pdtq": "pendant que", + "pécunier": "pécuniaire", + "pécuniers": "pécuniaires", + "pk": "pourquoi", + "pkoi": "pourquoi", + "pq": "pourquoi|PQ", + "prq": "presque", + "prsq": "presque", + "qcq": "quiconque", + "qd": "quand", + "qq": "quelque", + "qqch": "quelque chose", + "qqn": "quelqu’un", + "qqne": "quelqu’une", + "qqs": "quelques", + "qqunes": "quelques-unes", + "qquns": "quelques-uns", + "tdq": "tandis que", + "tj": "toujours", + "tjs": "toujours", + "tq": "tant que|tandis que", + "ts": "tous", + "tt": "tant|tout", + "tte": "toute", + "ttes": "toutes", + "y’a": "y a", + + "Iier": "Iᵉʳ", + "Iière": "Iʳᵉ", + "IIième": "IIᵉ", + "IIIième": "IIIᵉ", + "IVième": "IVᵉ", + "Vième": "Vᵉ", + "VIième": "VIᵉ", + "VIIième": "VIIᵉ", + "VIIIième": "VIIIᵉ", + "IXième": "IXᵉ", + "Xième": "Xᵉ", + "XIième": "XIᵉ", + "XIIième": "XIIᵉ", + "XIIIième": "XIIIᵉ", + "XIVième": "XIVᵉ", + "XVième": "XVᵉ", + "XVIième": "XVIᵉ", + "XVIIième": "XVIIᵉ", + "XVIIIième": "XVIIIᵉ", + "XIXième": "XIXᵉ", + "XXième": "XXᵉ", + "XXIième": "XXIᵉ", + "XXIIième": "XXIIᵉ", + "XXIIIième": "XXIIIᵉ", + "XXIVième": "XXIVᵉ", + "XXVième": "XXVᵉ", + "XXVIième": "XXVIᵉ", + "XXVIIième": "XXVIIᵉ", + "XXVIIIième": "XXVIIIᵉ", + "XXIXième": "XXIXᵉ", + "XXXième": "XXXᵉ", + + "Ier": "Iᵉʳ", + "Ière": "Iʳᵉ", + "IIème": "IIᵉ", + "IIIème": "IIIᵉ", + "IVème": "IVᵉ", + "Vème": "Vᵉ", + "VIème": "VIᵉ", + "VIIème": "VIIᵉ", + "VIIIème": "VIIIᵉ", + "IXème": "IXᵉ", + "Xème": "Xᵉ", + "XIème": "XIᵉ", + "XIIème": "XIIᵉ", + "XIIIème": "XIIIᵉ", + "XIVème": "XIVᵉ", + "XVème": "XVᵉ", + "XVIème": "XVIᵉ", + "XVIIème": "XVIIᵉ", + "XVIIIème": "XVIIIᵉ", + "XIXème": "XIXᵉ", + "XXème": "XXᵉ", + "XXIème": "XXIᵉ", + "XXIIème": "XXIIᵉ", + "XXIIIème": "XXIIIᵉ", + "XXIVème": "XXIVᵉ", + "XXVème": "XXVᵉ", + "XXVIème": "XXVIᵉ", + "XXVIIème": "XXVIIᵉ", + "XXVIIIème": "XXVIIIᵉ", + "XXIXème": "XXIXᵉ", + "XXXème": "XXXᵉ" +} + + +#### Lexicographer + +bLexicographer = True + +_dTAGS = { + ':N': (" nom,", "Nom"), + ':A': (" adjectif,", "Adjectif"), + ':M1': (" prénom,", "Prénom"), + ':M2': (" patronyme,", "Patronyme, matronyme, nom de famille…"), + ':MP': (" nom propre,", "Nom propre"), + ':W': (" adverbe,", "Adverbe"), + ':J': (" interjection,", "Interjection"), + ':B': (" nombre,", "Nombre"), + ':T': (" titre,", "Titre de civilité"), + + ':e': (" épicène", "épicène"), + ':m': (" masculin", "masculin"), + ':f': (" féminin", "féminin"), + ':s': (" singulier", "singulier"), + ':p': (" pluriel", "pluriel"), + ':i': (" invariable", "invariable"), + + ':V1': (" verbe (1ᵉʳ gr.),", "Verbe du 1ᵉʳ groupe"), + ':V2': (" verbe (2ᵉ gr.),", "Verbe du 2ᵉ groupe"), + ':V3': (" verbe (3ᵉ gr.),", "Verbe du 3ᵉ groupe"), + ':V0e': (" verbe,", "Verbe auxiliaire être"), + ':V0a': (" verbe,", "Verbe auxiliaire avoir"), + + ':Y': (" infinitif,", "infinitif"), + ':P': (" participe présent,", "participe présent"), + ':Q': (" participe passé,", "participe passé"), + ':Ip': (" présent,", "indicatif présent"), + ':Iq': (" imparfait,", "indicatif imparfait"), + ':Is': (" passé simple,", "indicatif passé simple"), + ':If': (" futur,", "indicatif futur"), + ':K': (" conditionnel présent,", "conditionnel présent"), + ':Sp': (" subjonctif présent,", "subjonctif présent"), + ':Sq': (" subjonctif imparfait,", "subjonctif imparfait"), + ':E': (" impératif,", "impératif"), + + ':1s': (" 1ʳᵉ p. sg.,", "verbe : 1ʳᵉ personne du singulier"), + ':1ŝ': (" présent interr. 1ʳᵉ p. sg.,", "verbe : 1ʳᵉ personne du singulier (présent interrogatif)"), + ':1ś': (" présent interr. 1ʳᵉ p. sg.,", "verbe : 1ʳᵉ personne du singulier (présent interrogatif)"), + ':2s': (" 2ᵉ p. sg.,", "verbe : 2ᵉ personne du singulier"), + ':3s': (" 3ᵉ p. sg.,", "verbe : 3ᵉ personne du singulier"), + ':1p': (" 1ʳᵉ p. pl.,", "verbe : 1ʳᵉ personne du pluriel"), + ':2p': (" 2ᵉ p. pl.,", "verbe : 2ᵉ personne du pluriel"), + ':3p': (" 3ᵉ p. pl.,", "verbe : 3ᵉ personne du pluriel"), + ':3p!': (" 3ᵉ p. pl.,", "verbe : 3ᵉ personne du pluriel (prononciation distinctive)"), + + ':G': ("", "Mot grammatical"), + ':X': (" adverbe de négation,", "Adverbe de négation"), + ':U': (" adverbe interrogatif,", "Adverbe interrogatif"), + ':R': (" préposition,", "Préposition"), + ':Rv': (" préposition verbale,", "Préposition verbale"), + ':D': (" déterminant,", "Déterminant"), + ':Dd': (" déterminant démonstratif,", "Déterminant démonstratif"), + ':De': (" déterminant exclamatif,", "Déterminant exclamatif"), + ':Dp': (" déterminant possessif,", "Déterminant possessif"), + ':Di': (" déterminant indéfini,", "Déterminant indéfini"), + ':Dn': (" déterminant négatif,", "Déterminant négatif"), + ':Od': (" pronom démonstratif,", "Pronom démonstratif"), + ':Oi': (" pronom indéfini,", "Pronom indéfini"), + ':On': (" pronom indéfini négatif,", "Pronom indéfini négatif"), + ':Ot': (" pronom interrogatif,", "Pronom interrogatif"), + ':Or': (" pronom relatif,", "Pronom relatif"), + ':Ow': (" pronom adverbial,", "Pronom adverbial"), + ':Os': (" pronom personnel sujet,", "Pronom personnel sujet"), + ':Oo': (" pronom personnel objet,", "Pronom personnel objet"), + ':Ov': (" préverbe,", "Préverbe (pronom personnel objet, +ne)"), + ':O1': (" 1ʳᵉ pers.,", "Pronom : 1ʳᵉ personne"), + ':O2': (" 2ᵉ pers.,", "Pronom : 2ᵉ personne"), + ':O3': (" 3ᵉ pers.,", "Pronom : 3ᵉ personne"), + ':C': (" conjonction,", "Conjonction"), + ':Ĉ': (" conjonction (él.),", "Conjonction (élément)"), + ':Cc': (" conjonction de coordination,", "Conjonction de coordination"), + ':Cs': (" conjonction de subordination,", "Conjonction de subordination"), + ':Ĉs': (" conjonction de subordination (él.),", "Conjonction de subordination (élément)"), + + ':Ñ': (" locution nominale (él.),", "Locution nominale (élément)"), + ':Â': (" locution adjectivale (él.),", "Locution adjectivale (élément)"), + ':Ṽ': (" locution verbale (él.),", "Locution verbale (élément)"), + ':Ŵ': (" locution adverbiale (él.),", "Locution adverbiale (élément)"), + ':Ŕ': (" locution prépositive (él.),", "Locution prépositive (élément)"), + ':Ĵ': (" locution interjective (él.),", "Locution interjective (élément)"), + + ':Zp': (" préfixe,", "Préfixe"), + ':Zs': (" suffixe,", "Suffixe"), + + ':H': ("", ""), + + ':@': ("", ""), + ':@p': ("signe de ponctuation", "Signe de ponctuation"), + ':@s': ("signe", "Signe divers"), + + ';S': (" : symbole (unité de mesure)", "Symbole (unité de mesure)"), + + '/*': ("", "Sous-dictionnaire "), + '/C': (" ", "Sous-dictionnaire "), + '/M': ("", "Sous-dictionnaire "), + '/R': (" ", "Sous-dictionnaire "), + '/A': ("", "Sous-dictionnaire "), + '/X': ("", "Sous-dictionnaire ") +} + +_dValues = { + 'd’': "(de), préposition ou déterminant épicène invariable", + 'l’': "(le/la), déterminant ou pronom personnel objet, masculin/féminin singulier", + 'j’': "(je), pronom personnel sujet, 1ʳᵉ pers., épicène singulier", + 'm’': "(me), pronom personnel objet, 1ʳᵉ pers., épicène singulier", + 't’': "(te), pronom personnel objet, 2ᵉ pers., épicène singulier", + 's’': "(se), pronom personnel objet, 3ᵉ pers., épicène singulier/pluriel", + 'n’': "(ne), adverbe de négation", + 'c’': "(ce), pronom démonstratif, masculin singulier/pluriel", + 'ç’': "(ça), pronom démonstratif, masculin singulier", + 'qu’': "(que), conjonction de subordination", + 'lorsqu’': "(lorsque), conjonction de subordination", + 'puisqu’': "(puisque), conjonction de subordination", + 'quoiqu’': "(quoique), conjonction de subordination", + 'jusqu’': "(jusque), préposition", + + '-je': " pronom personnel sujet, 1ʳᵉ pers. sing.", + '-tu': " pronom personnel sujet, 2ᵉ pers. sing.", + '-il': " pronom personnel sujet, 3ᵉ pers. masc. sing.", + '-on': " pronom personnel sujet, 3ᵉ pers. sing. ou plur.", + '-elle': " pronom personnel sujet, 3ᵉ pers. fém. sing.", + '-t-il': " “t” euphonique + pronom personnel sujet, 3ᵉ pers. masc. sing.", + '-t-on': " “t” euphonique + pronom personnel sujet, 3ᵉ pers. sing. ou plur.", + '-t-elle': " “t” euphonique + pronom personnel sujet, 3ᵉ pers. fém. sing.", + '-nous': " pronom personnel sujet/objet, 1ʳᵉ pers. plur. ou COI (à nous), plur.", + '-vous': " pronom personnel sujet/objet, 2ᵉ pers. plur. ou COI (à vous), plur.", + '-ils': " pronom personnel sujet, 3ᵉ pers. masc. plur.", + '-elles': " pronom personnel sujet, 3ᵉ pers. masc. plur.", + + "-là": " particule démonstrative", + "-ci": " particule démonstrative", + + '-le': " COD, masc. sing.", + '-la': " COD, fém. sing.", + '-les': " COD, plur.", + + '-moi': " COI (à moi), sing.", + '-toi': " COI (à toi), sing.", + '-lui': " COI (à lui ou à elle), sing.", + '-leur': " COI (à eux ou à elles), plur.", + + '-le-moi': " COD, masc. sing. + COI (à moi), sing.", + '-le-toi': " COD, masc. sing. + COI (à toi), sing.", + '-le-lui': " COD, masc. sing. + COI (à lui ou à elle), sing.", + '-le-nous': " COD, masc. sing. + COI (à nous), plur.", + '-le-vous': " COD, masc. sing. + COI (à vous), plur.", + '-le-leur': " COD, masc. sing. + COI (à eux ou à elles), plur.", + + '-la-moi': " COD, fém. sing. + COI (à moi), sing.", + '-la-toi': " COD, fém. sing. + COI (à toi), sing.", + '-la-lui': " COD, fém. sing. + COI (à lui ou à elle), sing.", + '-la-nous': " COD, fém. sing. + COI (à nous), plur.", + '-la-vous': " COD, fém. sing. + COI (à vous), plur.", + '-la-leur': " COD, fém. sing. + COI (à eux ou à elles), plur.", + + '-les-moi': " COD, plur. + COI (à moi), sing.", + '-les-toi': " COD, plur. + COI (à toi), sing.", + '-les-lui': " COD, plur. + COI (à lui ou à elle), sing.", + '-les-nous': " COD, plur. + COI (à nous), plur.", + '-les-vous': " COD, plur. + COI (à vous), plur.", + '-les-leur': " COD, plur. + COI (à eux ou à elles), plur.", + + '-y': " pronom adverbial", + "-m’y": " (me) pronom personnel objet + (y) pronom adverbial", + "-t’y": " (te) pronom personnel objet + (y) pronom adverbial", + "-s’y": " (se) pronom personnel objet + (y) pronom adverbial", + + '-en': " pronom adverbial", + "-m’en": " (me) pronom personnel objet + (en) pronom adverbial", + "-t’en": " (te) pronom personnel objet + (en) pronom adverbial", + "-s’en": " (se) pronom personnel objet + (en) pronom adverbial", +} + + +_zElidedPrefix = re.compile("(?i)^((?:[dljmtsncç]|quoiqu|lorsqu|jusqu|puisqu|qu)’)(.+)") +_zCompoundWord = re.compile("(?i)(\\w+)(-(?:(?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts]’(?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous))$") +_zTag = re.compile("[:;/][\\w*][^:;/]*") + +def split (sWord): + "split word in 3 parts: prefix, root, suffix" + sWord = sWord.replace("'", "’") + sPrefix = "" + sSuffix = "" + # préfixe élidé + m = _zElidedPrefix.match(sWord) + if m: + sPrefix = m.group(1) + sWord = m.group(2) + # mots composés + m = _zCompoundWord.match(sWord) + if m: + sWord = m.group(1) + sSuffix = m.group(2) + return sPrefix, sWord, sSuffix + + +def analyze (sWord): + "return meaning of if found else an empty string" + sWord = sWord.lower() + if sWord in _dValues: + return _dValues[sWord] + return "" + + +def formatTags (sTags): + "returns string: readable tags" + sRes = "" + sTags = re.sub("(?<=V[1-3])[itpqnmr_eaxz]+", "", sTags) + sTags = re.sub("(?<=V0[ea])[itpqnmr_eaxz]+", "", sTags) + for m in _zTag.finditer(sTags): + sRes += _dTAGS.get(m.group(0), " [{}]".format(m.group(0)))[0] + if sRes.startswith(" verbe") and not sRes.endswith("infinitif"): + sRes += " [{}]".format(sTags[1:sTags.find("/")]) + return sRes.rstrip(",") Index: graphspell/spellchecker.py ================================================================== --- graphspell/spellchecker.py +++ graphspell/spellchecker.py @@ -34,12 +34,12 @@ self.oPersonalDic = self._loadDictionary(sfPersonalDic) self.bCommunityDic = bool(self.oCommunityDic) self.bPersonalDic = bool(self.oPersonalDic) self.oTokenizer = None # Default suggestions - self.dDefaultSugg = None - self.loadSuggestions(sLangCode) + self.lexicographer = None + self.loadLexicographer(sLangCode) # storage self.bStorage = False self._dMorphologies = {} # key: flexion, value: list of morphologies self._dLemmas = {} # key: flexion, value: list of lemmas @@ -100,18 +100,34 @@ self.bPersonalDic = False # Default suggestions - def loadSuggestions (self, sLangCode): + def loadLexicographer (self, sLangCode): "load default suggestion module for " try: - suggest = importlib.import_module("."+sLangCode, "grammalecte.graphspell") + self.lexicographer = importlib.import_module(".lexgraph_"+sLangCode, "grammalecte.graphspell") except ImportError: print("No suggestion module for language <"+sLangCode+">") return - self.dDefaultSugg = suggest.dSugg + + def analyze (self, sWord): + "returns a list of words and their morphologies" + if not self.lexicographer: + return [] + lWordAndMorph = [] + for sElem in self.lexicographer.split(sWord): + if sElem: + lMorph = self.getMorph(sElem) + sLex = self.lexicographer.analyze(sElem) + if sLex: + aRes = [ (" | ".join(lMorph), sLex) ] + else: + aRes = [ (sMorph, self.lexicographer.formatTags(sMorph)) for sMorph in lMorph ] + if aRes: + lWordAndMorph.append((sElem, aRes)) + return lWordAndMorph # Storage def activateStorage (self): @@ -159,10 +175,11 @@ dWord[dToken['sValue']] = dWord.get(dToken['sValue'], 0) + 1 else: for sLemma in self.getLemma(dToken['sValue']): dWord[sLemma] = dWord.get(sLemma, 0) + 1 return dWord + # IBDAWG functions def isValidToken (self, sToken): "checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)" @@ -216,15 +233,15 @@ return self._dLemmas[sWord] return { s[1:s.find("/")] for s in self.getMorph(sWord) } def suggest (self, sWord, nSuggLimit=10): "generator: returns 1, 2 or 3 lists of suggestions" - if self.dDefaultSugg: - if sWord in self.dDefaultSugg: - yield self.dDefaultSugg[sWord].split("|") - elif sWord.istitle() and sWord.lower() in self.dDefaultSugg: - lRes = self.dDefaultSugg[sWord.lower()].split("|") + if self.lexicographer.dSugg: + if sWord in self.lexicographer.dSugg: + yield self.lexicographer.dSugg[sWord].split("|") + elif sWord.istitle() and sWord.lower() in self.lexicographer.dSugg: + lRes = self.lexicographer.dSugg[sWord.lower()].split("|") yield list(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes)) else: yield self.oMainDic.suggest(sWord, nSuggLimit, True) else: yield self.oMainDic.suggest(sWord, nSuggLimit, True)