Index: compile_rules.py ================================================================== --- compile_rules.py +++ compile_rules.py @@ -54,11 +54,11 @@ s = re.sub(r"after0_chk1\(\s*", 'look_chk1(dDA, sx[m.end():], m.end(), ', s) # after0_chk1(s) s = re.sub(r"textarea0_chk1\(\s*", 'look_chk1(dDA, sx, 0, ', s) # textarea0_chk1(s) s = re.sub(r"isEndOfNG\(\s*\)", 'isEndOfNG(dDA, s[m.end():], m.end())', s) # isEndOfNG(s) s = re.sub(r"isNextNotCOD\(\s*\)", 'isNextNotCOD(dDA, s[m.end():], m.end())', s) # isNextNotCOD(s) s = re.sub(r"isNextVerb\(\s*\)", 'isNextVerb(dDA, s[m.end():], m.end())', s) # isNextVerb(s) - s = re.sub(r"\bspell *[(]", '_oDict.isValid(', s) + s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s) s = re.sub(r"[\\](\d+)", 'm.group(\\1)', s) return s def uppercase (s, sLang): Index: gc_core/js/lang_core/gc_engine.js ================================================================== --- gc_core/js/lang_core/gc_engine.js +++ gc_core/js/lang_core/gc_engine.js @@ -36,11 +36,11 @@ // data let _sAppContext = ""; // what software is running let _dOptions = null; let _aIgnoredRules = new Set(); -let _oDict = null; +let _oSpellChecker = null; let _dAnalyses = new Map(); // cache for data from dictionary var gc_engine = { @@ -320,25 +320,25 @@ //// Initialization load: function (sContext="JavaScript", sPath="") { try { if (typeof(require) !== 'undefined') { - var ibdawg = require("resource://grammalecte/graphspell/ibdawg.js"); - _oDict = new ibdawg.IBDAWG("${dic_filename}.json"); + var spellchecker = require("resource://grammalecte/graphspell/spellchecker.js"); + _oSpellChecker = new spellchecker.SpellChecker("${lang}", "", "${dic_filename}.json"); } else { - _oDict = new IBDAWG("${dic_filename}.json", sPath); + _oSpellChecker = new SpellChecker("${lang}", sPath, "${dic_filename}.json"); } _sAppContext = sContext; _dOptions = gc_options.getOptions(sContext).gl_shallowCopy(); // duplication necessary, to be able to reset to default } catch (e) { helpers.logerror(e); } }, - getDictionary: function () { - return _oDict; + getSpellChecker: function () { + return _oSpellChecker; }, //// Options setOption: function (sOpt, bVal) { @@ -388,13 +388,13 @@ helpers.echo("FSA: " + _dAnalyses.get(aWord[1])); return true; } function _storeMorphFromFSA (sWord) { - // retrieves morphologies list from _oDict -> _dAnalyses - //helpers.echo("register: "+sWord + " " + _oDict.getMorph(sWord).toString()) - _dAnalyses.set(sWord, _oDict.getMorph(sWord)); + // retrieves morphologies list from _oSpellChecker -> _dAnalyses + //helpers.echo("register: "+sWord + " " + _oSpellChecker.getMorph(sWord).toString()) + _dAnalyses.set(sWord, _oSpellChecker.getMorph(sWord)); return !!_dAnalyses.get(sWord); } function morph (dDA, aWord, sPattern, bStrict=true, bNoWord=false) { // analyse a tuple (position, word), return true if sPattern in morphologies (disambiguation on) @@ -642,12 +642,12 @@ exports.resetIgnoreRules = gc_engine.resetIgnoreRules; exports.reactivateRule = gc_engine.reactivateRule; exports.listRules = gc_engine.listRules; exports._getRules = gc_engine._getRules; exports.load = gc_engine.load; - exports.getDictionary = gc_engine.getDictionary; + exports.getSpellChecker = gc_engine.getSpellChecker; exports.setOption = gc_engine.setOption; exports.setOptions = gc_engine.setOptions; exports.getOptions = gc_engine.getOptions; exports.getDefaultOptions = gc_engine.getDefaultOptions; exports.resetOptions = gc_engine.resetOptions; } Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -6,17 +6,17 @@ import os import traceback #import unicodedata from itertools import chain -from ..graphspell.ibdawg import IBDAWG +from ..graphspell.spellchecker import SpellChecker from ..graphspell.echo import echo from . import gc_options __all__ = [ "lang", "locales", "pkg", "name", "version", "author", \ - "load", "parse", "getDictionary", \ + "load", "parse", "getSpellChecker", \ "setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \ "ignoreRule", "resetIgnoreRules", "reactivateRule", "listRules", "displayRules" ] __version__ = "${version}" @@ -32,11 +32,11 @@ # data _sAppContext = "" # what software is running _dOptions = None _aIgnoredRules = set() -_oDict = None +_oSpellChecker = None _dAnalyses = {} # cache for data from dictionary #### Parsing @@ -286,15 +286,15 @@ except ImportError: _createError = _createDictError def load (sContext="Python"): - global _oDict + global _oSpellChecker global _sAppContext global _dOptions try: - _oDict = IBDAWG("${dic_filename}.bdic") + _oSpellChecker = SpellChecker("${lang}", "${dic_filename}.bdic") _sAppContext = sContext _dOptions = dict(gc_options.getOptions(sContext)) # duplication necessary, to be able to reset to default except: traceback.print_exc() @@ -331,12 +331,12 @@ def resetOptions (): global _dOptions _dOptions = dict(gc_options.getOptions(_sAppContext)) -def getDictionary (): - return _oDict +def getSpellChecker (): + return _oSpellChecker def _getRules (bParagraph): try: if not bParagraph: @@ -396,13 +396,13 @@ echo("FSA: " + str(_dAnalyses[tWord[1]])) return True def _storeMorphFromFSA (sWord): - "retrieves morphologies list from _oDict -> _dAnalyses" + "retrieves morphologies list from _oSpellChecker -> _dAnalyses" global _dAnalyses - _dAnalyses[sWord] = _oDict.getMorph(sWord) + _dAnalyses[sWord] = _oSpellChecker.getMorph(sWord) return True if _dAnalyses[sWord] else False def morph (dDA, tWord, sPattern, bStrict=True, bNoWord=False): "analyse a tuple (position, word), return True if sPattern in morphologies (disambiguation on)" Index: gc_lang/fr/modules-js/gce_suggestions.js ================================================================== --- gc_lang/fr/modules-js/gce_suggestions.js +++ gc_lang/fr/modules-js/gce_suggestions.js @@ -208,21 +208,21 @@ } } let aSugg = new Set(); if (!sFlex.includes("-")) { if (sFlex.endsWith("l")) { - if (sFlex.endsWith("al") && sFlex.length > 2 && _oDict.isValid(sFlex.slice(0,-1)+"ux")) { + if (sFlex.endsWith("al") && sFlex.length > 2 && _oSpellChecker.isValid(sFlex.slice(0,-1)+"ux")) { aSugg.add(sFlex.slice(0,-1)+"ux"); } - if (sFlex.endsWith("ail") && sFlex.length > 3 && _oDict.isValid(sFlex.slice(0,-2)+"ux")) { + if (sFlex.endsWith("ail") && sFlex.length > 3 && _oSpellChecker.isValid(sFlex.slice(0,-2)+"ux")) { aSugg.add(sFlex.slice(0,-2)+"ux"); } } - if (_oDict.isValid(sFlex+"s")) { + if (_oSpellChecker.isValid(sFlex+"s")) { aSugg.add(sFlex+"s"); } - if (_oDict.isValid(sFlex+"x")) { + if (_oSpellChecker.isValid(sFlex+"x")) { aSugg.add(sFlex+"x"); } } if (mfsp.hasMiscPlural(sFlex)) { mfsp.getMiscPlural(sFlex).forEach(function(x) { aSugg.add(x); }); @@ -238,18 +238,18 @@ if (sFlex.includes("-")) { return ""; } let aSugg = new Set(); if (sFlex.endsWith("ux")) { - if (_oDict.isValid(sFlex.slice(0,-2)+"l")) { + if (_oSpellChecker.isValid(sFlex.slice(0,-2)+"l")) { aSugg.add(sFlex.slice(0,-2)+"l"); } - if (_oDict.isValid(sFlex.slice(0,-2)+"il")) { + if (_oSpellChecker.isValid(sFlex.slice(0,-2)+"il")) { aSugg.add(sFlex.slice(0,-2)+"il"); } } - if (_oDict.isValid(sFlex.slice(0,-1))) { + if (_oSpellChecker.isValid(sFlex.slice(0,-1))) { aSugg.add(sFlex.slice(0,-1)); } if (aSugg.size > 0) { return Array.from(aSugg).join("|"); } Index: gc_lang/fr/modules-js/lexicographe.js ================================================================== --- gc_lang/fr/modules-js/lexicographe.js +++ gc_lang/fr/modules-js/lexicographe.js @@ -224,12 +224,12 @@ ]); class Lexicographe { - constructor (oDict, oTokenizer, oLocGraph) { - this.oDict = oDict; + constructor (oSpellChecker, oTokenizer, oLocGraph) { + this.oSpellChecker = oSpellChecker; this.oTokenizer = oTokenizer; this.oLocGraph = JSON.parse(oLocGraph); this._zPartDemForm = new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)-(là|ci)$", "i"); this._aPartDemExceptList = new Set(["celui", "celle", "ceux", "celles", "de", "jusque", "par", "marie-couche-toi"]); @@ -339,11 +339,11 @@ aSubElem: [ { sType: oToken.sType, sValue: m[1], aLabel: this._getMorph(m[1]) }, { sType: oToken.sType, sValue: "-" + m[2], aLabel: [this._formatSuffix(m[2].toLowerCase())] } ] }; - } else if (this.oDict.isValidToken(oToken.sValue)) { + } else if (this.oSpellChecker.isValidToken(oToken.sValue)) { return { sType: oToken.sType, sValue: oToken.sValue, aLabel: this._getMorph(oToken.sValue) }; @@ -362,11 +362,11 @@ return null; } _getMorph (sWord) { let aElem = []; - for (let s of this.oDict.getMorph(sWord)) { + for (let s of this.oSpellChecker.getMorph(sWord)) { if (s.includes(":")) aElem.push(this._formatTags(s)); } if (aElem.length == 0) { aElem.push("mot inconnu du dictionnaire"); } Index: gc_lang/fr/modules/gce_suggestions.py ================================================================== --- gc_lang/fr/modules/gce_suggestions.py +++ gc_lang/fr/modules/gce_suggestions.py @@ -157,17 +157,17 @@ elif sGender == ":f": return suggFemPlur(sFlex) aSugg = set() if "-" not in sFlex: if sFlex.endswith("l"): - if sFlex.endswith("al") and len(sFlex) > 2 and _oDict.isValid(sFlex[:-1]+"ux"): + if sFlex.endswith("al") and len(sFlex) > 2 and _oSpellChecker.isValid(sFlex[:-1]+"ux"): aSugg.add(sFlex[:-1]+"ux") - if sFlex.endswith("ail") and len(sFlex) > 3 and _oDict.isValid(sFlex[:-2]+"ux"): + if sFlex.endswith("ail") and len(sFlex) > 3 and _oSpellChecker.isValid(sFlex[:-2]+"ux"): aSugg.add(sFlex[:-2]+"ux") - if _oDict.isValid(sFlex+"s"): + if _oSpellChecker.isValid(sFlex+"s"): aSugg.add(sFlex+"s") - if _oDict.isValid(sFlex+"x"): + if _oSpellChecker.isValid(sFlex+"x"): aSugg.add(sFlex+"x") if mfsp.hasMiscPlural(sFlex): aSugg.update(mfsp.getMiscPlural(sFlex)) if aSugg: return "|".join(aSugg) @@ -178,15 +178,15 @@ "returns singular forms assuming sFlex is plural" if "-" in sFlex: return "" aSugg = set() if sFlex.endswith("ux"): - if _oDict.isValid(sFlex[:-2]+"l"): + if _oSpellChecker.isValid(sFlex[:-2]+"l"): aSugg.add(sFlex[:-2]+"l") - if _oDict.isValid(sFlex[:-2]+"il"): + if _oSpellChecker.isValid(sFlex[:-2]+"il"): aSugg.add(sFlex[:-2]+"il") - if _oDict.isValid(sFlex[:-1]): + if _oSpellChecker.isValid(sFlex[:-1]): aSugg.add(sFlex[:-1]) if aSugg: return "|".join(aSugg) return "" Index: gc_lang/fr/modules/lexicographe.py ================================================================== --- gc_lang/fr/modules/lexicographe.py +++ gc_lang/fr/modules/lexicographe.py @@ -155,12 +155,12 @@ } class Lexicographe: - def __init__ (self, oDict): - self.oDict = oDict + def __init__ (self, oSpellChecker): + self.oSpellChecker = oSpellChecker self._zElidedPrefix = re.compile("(?i)^([dljmtsncç]|quoiqu|lorsqu|jusqu|puisqu|qu)['’](.+)") self._zCompoundWord = re.compile("(?i)(\\w+)-((?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts][’'](?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous)$") self._zTag = re.compile("[:;/][\\w*][^:;/]*") def analyzeWord (self, sWord): @@ -181,11 +181,11 @@ # mots composés m2 = self._zCompoundWord.match(sWord) if m2: sWord = m2.group(1) # Morphologies - lMorph = self.oDict.getMorph(sWord) + lMorph = self.oSpellChecker.getMorph(sWord) if len(lMorph) > 1: # sublist aMorph.append( (sWord, [ self.formatTags(s) for s in lMorph if ":" in s ]) ) elif len(lMorph) == 1: aMorph.append( "{} : {}".format(sWord, self.formatTags(lMorph[0])) ) Index: gc_lang/fr/webext/gce_worker.js ================================================================== --- gc_lang/fr/webext/gce_worker.js +++ gc_lang/fr/webext/gce_worker.js @@ -34,10 +34,11 @@ importScripts("grammalecte/graphspell/helpers.js"); importScripts("grammalecte/graphspell/str_transform.js"); importScripts("grammalecte/graphspell/char_player.js"); importScripts("grammalecte/graphspell/ibdawg.js"); +importScripts("grammalecte/graphspell/spellchecker.js"); importScripts("grammalecte/text.js"); importScripts("grammalecte/graphspell/tokenizer.js"); importScripts("grammalecte/fr/conj.js"); importScripts("grammalecte/fr/mfsp.js"); importScripts("grammalecte/fr/phonet.js"); @@ -134,11 +135,11 @@ let bInitDone = false; -let oDict = null; +let oSpellChecker = null; let oTokenizer = null; let oLxg = null; let oTest = null; let oLocution = null; @@ -159,17 +160,17 @@ conj.init(helpers.loadFile(sExtensionPath + "/grammalecte/fr/conj_data.json")); phonet.init(helpers.loadFile(sExtensionPath + "/grammalecte/fr/phonet_data.json")); mfsp.init(helpers.loadFile(sExtensionPath + "/grammalecte/fr/mfsp_data.json")); //console.log("[Worker] Modules have been initialized…"); gc_engine.load(sContext, sExtensionPath+"grammalecte/graphspell/_dictionaries"); - oDict = gc_engine.getDictionary(); + oSpellChecker = gc_engine.getSpellChecker(); oTest = new TestGrammarChecking(gc_engine, sExtensionPath+"/grammalecte/fr/tests_data.json"); oTokenizer = new Tokenizer("fr"); oLocution = helpers.loadFile(sExtensionPath + "/grammalecte/fr/locutions_data.json"); - oLxg = new Lexicographe(oDict, oTokenizer, oLocution); + oLxg = new Lexicographe(oSpellChecker, oTokenizer, oLocution); if (dOptions !== null) { gc_engine.setOptions(dOptions); } //tests(); bInitDone = true; @@ -198,21 +199,21 @@ function parseAndSpellcheck (sText, sCountry, bDebug, bContext, dInfo={}) { let i = 0; sText = sText.replace(/­/g, "").normalize("NFC"); for (let sParagraph of text.getParagraph(sText)) { let aGrammErr = gc_engine.parse(sParagraph, sCountry, bDebug, bContext); - let aSpellErr = oTokenizer.getSpellingErrors(sParagraph, oDict); + let aSpellErr = oTokenizer.getSpellingErrors(sParagraph, oSpellChecker); postMessage(createResponse("parseAndSpellcheck", {sParagraph: sParagraph, iParaNum: i, aGrammErr: aGrammErr, aSpellErr: aSpellErr}, dInfo, false)); i += 1; } postMessage(createResponse("parseAndSpellcheck", null, dInfo, true)); } function parseAndSpellcheck1 (sParagraph, sCountry, bDebug, bContext, dInfo={}) { sParagraph = sParagraph.replace(/­/g, "").normalize("NFC"); let aGrammErr = gc_engine.parse(sParagraph, sCountry, bDebug, bContext); - let aSpellErr = oTokenizer.getSpellingErrors(sParagraph, oDict); + let aSpellErr = oTokenizer.getSpellingErrors(sParagraph, oSpellChecker); postMessage(createResponse("parseAndSpellcheck1", {sParagraph: sParagraph, aGrammErr: aGrammErr, aSpellErr: aSpellErr}, dInfo, true)); } function getOptions (dInfo={}) { postMessage(createResponse("getOptions", gc_engine.getOptions(), dInfo, true)); @@ -289,16 +290,19 @@ // Spellchecker function getSpellSuggestions (sWord, dInfo) { - if (!oDict) { + if (!oSpellChecker) { postMessage(createResponse("getSpellSuggestions", "# Error. Dictionary not loaded.", dInfo, true)); return; } - let aSugg = oDict.suggest(sWord); - postMessage(createResponse("getSpellSuggestions", {sWord: sWord, aSugg: aSugg}, dInfo, true)); + let i = 1; + for (let aSugg of oSpellChecker.suggest(sWord)) { + postMessage(createResponse("getSpellSuggestions", {sWord: sWord, aSugg: aSugg, iSugg: i}, dInfo, true)); + i += 1; + } } // Lexicographer Index: grammalecte-cli.py ================================================================== --- grammalecte-cli.py +++ grammalecte-cli.py @@ -42,31 +42,33 @@ # So we reverse it to avoid many useless warnings. sText = sText.replace("'", "’") return sText -def _getErrors (sText, oTokenizer, oDict, bContext=False, bSpellSugg=False, bDebug=False): +def _getErrors (sText, oTokenizer, oSpellChecker, bContext=False, bSpellSugg=False, bDebug=False): "returns a tuple: (grammar errors, spelling errors)" aGrammErrs = gce.parse(sText, "FR", bDebug=bDebug, bContext=bContext) aSpellErrs = [] for dToken in oTokenizer.genTokens(sText): - if dToken['sType'] == "WORD" and not oDict.isValidToken(dToken['sValue']): + if dToken['sType'] == "WORD" and not oSpellChecker.isValidToken(dToken['sValue']): if bSpellSugg: - dToken['aSuggestions'] = oDict.suggest(dToken['sValue']) + dToken['aSuggestions'] = [] + for lSugg in oSpellChecker.suggest(dToken['sValue']): + dToken['aSuggestions'].extend(lSugg) aSpellErrs.append(dToken) return aGrammErrs, aSpellErrs -def generateText (sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=False, bSpellSugg=False, nWidth=100): - aGrammErrs, aSpellErrs = _getErrors(sText, oTokenizer, oDict, False, bSpellSugg, bDebug) +def generateText (sText, oTokenizer, oSpellChecker, bDebug=False, bEmptyIfNoErrors=False, bSpellSugg=False, nWidth=100): + aGrammErrs, aSpellErrs = _getErrors(sText, oTokenizer, oSpellChecker, False, bSpellSugg, bDebug) if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs: return "" return txt.generateParagraph(sText, aGrammErrs, aSpellErrs, nWidth) -def generateJSON (iIndex, sText, oTokenizer, oDict, bContext=False, bDebug=False, bEmptyIfNoErrors=False, bSpellSugg=False, lLineSet=None, bReturnText=False): - aGrammErrs, aSpellErrs = _getErrors(sText, oTokenizer, oDict, bContext, bSpellSugg, bDebug) +def generateJSON (iIndex, sText, oTokenizer, oSpellChecker, bContext=False, bDebug=False, bEmptyIfNoErrors=False, bSpellSugg=False, lLineSet=None, bReturnText=False): + aGrammErrs, aSpellErrs = _getErrors(sText, oTokenizer, oSpellChecker, bContext, bSpellSugg, bDebug) aGrammErrs = list(aGrammErrs) if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs: return "" if lLineSet: aGrammErrs, aSpellErrs = txt.convertToXY(aGrammErrs, aSpellErrs, lLineSet) @@ -128,13 +130,13 @@ xArgs = xParser.parse_args() gce.load() if not xArgs.json: echo("Grammalecte v{}".format(gce.version)) - oDict = gce.getDictionary() + oSpellChecker = gce.getSpellChecker() oTokenizer = tkz.Tokenizer("fr") - oLexGraphe = lxg.Lexicographe(oDict) + oLexGraphe = lxg.Lexicographe(oSpellChecker) if xArgs.textformatter or xArgs.textformatteronly: oTF = tf.TextFormatter() if xArgs.list_options or xArgs.list_rules: if xArgs.list_options: @@ -142,16 +144,16 @@ if xArgs.list_rules: gce.displayRules(None if xArgs.list_rules == "*" else xArgs.list_rules) exit() if xArgs.suggest: - lSugg = oDict.suggest(xArgs.suggest) - if xArgs.json: - sText = json.dumps({ "aSuggestions": lSugg }, ensure_ascii=False) - else: - sText = "Suggestions : " + " | ".join(lSugg) - echo(sText) + for lSugg in oSpellChecker.suggest(xArgs.suggest): + if xArgs.json: + sText = json.dumps({ "aSuggestions": lSugg }, ensure_ascii=False) + else: + sText = "Suggestions : " + " | ".join(lSugg) + echo(sText) exit() if not xArgs.json: xArgs.context = False @@ -179,13 +181,13 @@ sText = oTF.formatText(sText) if xArgs.textformatteronly: output(sText, hDst) else: if xArgs.json: - sText = generateJSON(i, sText, oTokenizer, oDict, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSugg=xArgs.with_spell_sugg, bReturnText=xArgs.textformatter) + sText = generateJSON(i, sText, oTokenizer, oSpellChecker, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSugg=xArgs.with_spell_sugg, bReturnText=xArgs.textformatter) else: - sText = generateText(sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSugg=xArgs.with_spell_sugg, nWidth=xArgs.width) + sText = generateText(sText, oTokenizer, oSpellChecker, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSugg=xArgs.with_spell_sugg, nWidth=xArgs.width) if sText: if xArgs.json and bComma: output(",\n", hDst) output(sText, hDst) bComma = True @@ -194,13 +196,13 @@ else: # concaténation des lignes non séparées par une ligne vide for i, lLine in enumerate(readfileAndConcatLines(sFile), 1): sText, lLineSet = txt.createParagraphWithLines(lLine) if xArgs.json: - sText = generateJSON(i, sText, oTokenizer, oDict, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSugg=xArgs.with_spell_sugg, lLineSet=lLineSet) + sText = generateJSON(i, sText, oTokenizer, oSpellChecker, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSugg=xArgs.with_spell_sugg, lLineSet=lLineSet) else: - sText = generateText(sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSugg=xArgs.with_spell_sugg, nWidth=xArgs.width) + sText = generateText(sText, oTokenizer, oSpellChecker, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSugg=xArgs.with_spell_sugg, nWidth=xArgs.width) if sText: if xArgs.json and bComma: output(",\n", hDst) output(sText, hDst) bComma = True @@ -215,21 +217,21 @@ while True: if sText.startswith("?"): for sWord in sText[1:].strip().split(): if sWord: echo("* " + sWord) - for sMorph in oDict.getMorph(sWord): + for sMorph in oSpellChecker.getMorph(sWord): echo(" {:<32} {}".format(sMorph, oLexGraphe.formatTags(sMorph))) elif sText.startswith("!"): for sWord in sText[1:].strip().split(): if sWord: - echo(" | ".join(oDict.suggest(sWord))) - #echo(" | ".join(oDict.suggest2(sWord))) + for lSugg in oSpellChecker.suggest(sWord): + echo(" | ".join(lSugg)) elif sText.startswith(">"): - oDict.drawPath(sText[1:].strip()) + oSpellChecker.drawPath(sText[1:].strip()) elif sText.startswith("="): - for sRes in oDict.select(sText[1:].strip()): + for sRes in oSpellChecker.select(sText[1:].strip()): echo(sRes) elif sText.startswith("/+ "): gce.setOptions({ opt:True for opt in sText[3:].strip().split() if opt in gce.getOptions() }) echo("done") elif sText.startswith("/- "): @@ -264,15 +266,15 @@ pass else: for sParagraph in txt.getParagraph(sText): if xArgs.textformatter: sText = oTF.formatText(sText) - sRes = generateText(sText, oTokenizer, oDict, bDebug=xArgs.debug, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width) + sRes = generateText(sText, oTokenizer, oSpellChecker, bDebug=xArgs.debug, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width) if sRes: echo("\n" + sRes) else: echo("\nNo error found.") sText = _getText(sInputText) if __name__ == '__main__': main() Index: grammalecte-server.py ================================================================== --- grammalecte-server.py +++ grammalecte-server.py @@ -127,16 +127,16 @@ while True: yield str(i) i += 1 -def parseParagraph (iParagraph, sText, oTokenizer, oDict, dOptions, bDebug=False, bEmptyIfNoErrors=False): +def parseParagraph (iParagraph, sText, oTokenizer, oSpellChecker, dOptions, bDebug=False, bEmptyIfNoErrors=False): aGrammErrs = gce.parse(sText, "FR", bDebug, dOptions) aGrammErrs = list(aGrammErrs) aSpellErrs = [] for dToken in oTokenizer.genTokens(sText): - if dToken['sType'] == "WORD" and not oDict.isValidToken(dToken['sValue']): + if dToken['sType'] == "WORD" and not oSpellChecker.isValidToken(dToken['sValue']): aSpellErrs.append(dToken) if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs: return "" return " " + json.dumps({ "iParagraph": iParagraph, "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False) @@ -149,11 +149,11 @@ dGCOptions = getConfigOptions("fr") if dGCOptions: gce.setOptions(dGCOptions) dServerGCOptions = gce.getOptions() echo("Grammar options:\n" + " | ".join([ k + ": " + str(v) for k, v in sorted(dServerGCOptions.items()) ])) - oDict = gce.getDictionary() + oSpellChecker = gce.getSpellChecker() oTokenizer = tkz.Tokenizer("fr") oTF = tf.TextFormatter() dUser = {} userGenerator = genUserId() @@ -197,11 +197,11 @@ sError = "request options not used" sJSON = '{ "program": "grammalecte-fr", "version": "'+gce.version+'", "lang": "'+gce.lang+'", "error": "'+sError+'", "data" : [\n' for i, sText in enumerate(txt.getParagraph(request.forms.text), 1): if bTF: sText = oTF.formatText(sText) - sText = parseParagraph(i, sText, oTokenizer, oDict, dOptions, bEmptyIfNoErrors=True) + sText = parseParagraph(i, sText, oTokenizer, oSpellChecker, dOptions, bEmptyIfNoErrors=True) if sText: if bComma: sJSON += ",\n" sJSON += sText bComma = True Index: graphspell-js/ibdawg.js ================================================================== --- graphspell-js/ibdawg.js +++ graphspell-js/ibdawg.js @@ -87,12 +87,11 @@ constructor (param1, sPath="") { // param1 can be a filename or a object with all the necessary data. try { let oData = null; if (typeof(param1) == "string") { - let sDicName = param1; - let sURL = (sPath !== "") ? sPath + "/" + sDicName : "resource://grammalecte/graphspell/_dictionaries/"+sDicName; + let sURL = (sPath !== "") ? sPath + "/" + param1 : "resource://grammalecte/graphspell/_dictionaries/"+param1; oData = JSON.parse(helpers.loadFile(sURL)); } else { oData = param1; } Object.assign(this, oData); Index: graphspell-js/spellchecker.js ================================================================== --- graphspell-js/spellchecker.js +++ graphspell-js/spellchecker.js @@ -23,17 +23,15 @@ ["fr", "fr.json"], ["en", "en.json"] ]); -class Spellchecker { +class SpellChecker { - constructor (sLangCode, mainDic=null, extentedDic=null, personalDic=null, sPath="") { + constructor (sLangCode, sPath="", mainDic=null, extentedDic=null, personalDic=null) { // returns true if the main dictionary is loaded this.sLangCode = sLangCode; - console.log(sLangCode); - console.log(mainDic); if (mainDic === null) { mainDic = dDefaultDictionaries.gl_get(sLangCode, ""); } this.oMainDic = this._loadDictionary(mainDic, sPath, true); this.oExtendedDic = this._loadDictionary(extentedDic, sPath); @@ -44,23 +42,21 @@ // returns an IBDAWG object if (dictionary === null) { return null; } try { - if (typeof(require) !== 'undefined') { - console.log(">>>> "); + if (typeof(require) !== 'undefined') { return new ibdawg.IBDAWG(dictionary); // dictionary can be a filename or a JSON object } else { - console.log(">>>> no "); return new IBDAWG(dictionary, sPath); // dictionary can be a filename or a JSON object } } catch (e) { - if (bNecessary) { - throw e.message; - } console.log(e.message); + if (bNecessary) { + throw e.message; + } return null; } } setMainDictionary (dictionary) { @@ -97,44 +93,44 @@ return false; } isValid (sWord) { // checks if sWord is valid (different casing tested if the first letter is a capital) - if (this.oMainDic.isValid(sToken)) { + if (this.oMainDic.isValid(sWord)) { return true; } - if (this.oExtendedDic && this.oExtendedDic.isValid(sToken)) { + if (this.oExtendedDic && this.oExtendedDic.isValid(sWord)) { return true; } - if (this.oPersonalDic && this.oPersonalDic.isValid(sToken)) { + if (this.oPersonalDic && this.oPersonalDic.isValid(sWord)) { return true; } return false; } lookup (sWord) { // checks if sWord is in dictionary as is (strict verification) - if (this.oMainDic.lookup(sToken)) { + if (this.oMainDic.lookup(sWord)) { return true; } - if (this.oExtendedDic && this.oExtendedDic.lookup(sToken)) { + if (this.oExtendedDic && this.oExtendedDic.lookup(sWord)) { return true; } - if (this.oPersonalDic && this.oPersonalDic.lookup(sToken)) { + if (this.oPersonalDic && this.oPersonalDic.lookup(sWord)) { return true; } return false; } getMorph (sWord) { // retrieves morphologies list, different casing allowed - let lResult = this.oMainDic.getMorph(sToken); + let lResult = this.oMainDic.getMorph(sWord); if (this.oExtendedDic) { - lResult.extends(this.oExtendedDic.getMorph(sToken)); + lResult.extends(this.oExtendedDic.getMorph(sWord)); } if (this.oPersonalDic) { - lResult.extends(this.oPersonalDic.getMorph(sToken)); + lResult.extends(this.oPersonalDic.getMorph(sWord)); } return lResult; } * suggest (sWord, nSuggLimit=10) { Index: graphspell-js/tokenizer.js ================================================================== --- graphspell-js/tokenizer.js +++ graphspell-js/tokenizer.js @@ -86,14 +86,14 @@ i += nCut; sText = sText.slice(nCut); } } - getSpellingErrors (sText, oDict) { + getSpellingErrors (sText, oSpellChecker) { let aSpellErr = []; for (let oToken of this.genTokens(sText)) { - if (oToken.sType === 'WORD' && !oDict.isValidToken(oToken.sValue)) { + if (oToken.sType === 'WORD' && !oSpellChecker.isValidToken(oToken.sValue)) { aSpellErr.push(oToken); } } return aSpellErr; } Index: graphspell/spellchecker.py ================================================================== --- graphspell/spellchecker.py +++ graphspell/spellchecker.py @@ -17,21 +17,20 @@ "fr": "fr.bdic", "en": "en.bdic" } -class Spellchecker (): +class SpellChecker (): def __init__ (self, sLangCode, sfMainDic="", sfExtendedDic="", sfPersonalDic=""): "returns True if the main dictionary is loaded" self.sLangCode = sLangCode if not sfMainDic: sfMainDic = dDefaultDictionaries.get(sLangCode, "") self.oMainDic = self._loadDictionary(sfMainDic) self.oExtendedDic = self._loadDictionary(sfExtendedDic) self.oPersonalDic = self._loadDictionary(sfPersonalDic) - return bool(self.oMainDic) def _loadDictionary (self, sfDictionary): "returns an IBDAWG object" if not sfDictionary: return None @@ -70,35 +69,35 @@ return True return False def isValid (self, sWord): "checks if sWord is valid (different casing tested if the first letter is a capital)" - if self.oMainDic.isValid(sToken): + if self.oMainDic.isValid(sWord): return True - if self.oExtendedDic and self.oExtendedDic.isValid(sToken): + if self.oExtendedDic and self.oExtendedDic.isValid(sWord): return True - if self.oPersonalDic and self.oPersonalDic.isValid(sToken): + if self.oPersonalDic and self.oPersonalDic.isValid(sWord): return True return False def lookup (self, sWord): "checks if sWord is in dictionary as is (strict verification)" - if self.oMainDic.lookup(sToken): + if self.oMainDic.lookup(sWord): return True - if self.oExtendedDic and self.oExtendedDic.lookup(sToken): + if self.oExtendedDic and self.oExtendedDic.lookup(sWord): return True - if self.oPersonalDic and self.oPersonalDic.lookup(sToken): + if self.oPersonalDic and self.oPersonalDic.lookup(sWord): return True return False def getMorph (self, sWord): "retrieves morphologies list, different casing allowed" - lResult = self.oMainDic.getMorph(sToken) + lResult = self.oMainDic.getMorph(sWord) if self.oExtendedDic: - lResult.extends(self.oExtendedDic.getMorph(sToken)) + lResult.extends(self.oExtendedDic.getMorph(sWord)) if self.oPersonalDic: - lResult.extends(self.oPersonalDic.getMorph(sToken)) + lResult.extends(self.oPersonalDic.getMorph(sWord)) return lResult def suggest (self, sWord, nSuggLimit=10): "generator: returns 1,2 or 3 lists of suggestions" yield self.oMainDic.suggest(sWord, nSuggLimit) @@ -112,5 +111,14 @@ yield from self.oMainDic.select(sPattern) if self.oExtendedDic: yield from self.oExtendedDic.select(sPattern) if self.oPersonalDic: yield from self.oPersonalDic.select(sPattern) + + def drawPath (self, sWord): + self.oMainDic.drawPath(sWord) + if self.oExtendedDic: + print("-----") + self.oExtendedDic.drawPath(sWord) + if self.oPersonalDic: + print("-----") + self.oPersonalDic.drawPath(sWord)