Index: gc_lang/fr/webext/gce_worker.js ================================================================== --- gc_lang/fr/webext/gce_worker.js +++ gc_lang/fr/webext/gce_worker.js @@ -202,21 +202,21 @@ function parseAndSpellcheck (sText, sCountry, bDebug, bContext, dInfo={}) { let i = 0; sText = sText.replace(/­/g, "").normalize("NFC"); for (let sParagraph of text.getParagraph(sText)) { let aGrammErr = gc_engine.parse(sParagraph, sCountry, bDebug, bContext); - let aSpellErr = oTokenizer.getSpellingErrors(sParagraph, oSpellChecker); + let aSpellErr = oSpellChecker.parseParagraph(sParagraph); postMessage(createResponse("parseAndSpellcheck", {sParagraph: sParagraph, iParaNum: i, aGrammErr: aGrammErr, aSpellErr: aSpellErr}, dInfo, false)); i += 1; } postMessage(createResponse("parseAndSpellcheck", null, dInfo, true)); } function parseAndSpellcheck1 (sParagraph, sCountry, bDebug, bContext, dInfo={}) { sParagraph = sParagraph.replace(/­/g, "").normalize("NFC"); let aGrammErr = gc_engine.parse(sParagraph, sCountry, bDebug, bContext); - let aSpellErr = oTokenizer.getSpellingErrors(sParagraph, oSpellChecker); + let aSpellErr = oSpellChecker.parseParagraph(sParagraph); postMessage(createResponse("parseAndSpellcheck1", {sParagraph: sParagraph, aGrammErr: aGrammErr, aSpellErr: aSpellErr}, dInfo, true)); } function getOptions (dInfo={}) { postMessage(createResponse("getOptions", gc_engine.getOptions(), dInfo, true)); Index: graphspell-js/spellchecker.js ================================================================== --- graphspell-js/spellchecker.js +++ graphspell-js/spellchecker.js @@ -11,10 +11,11 @@ "use strict"; if (typeof(require) !== 'undefined') { var ibdawg = require("resource://grammalecte/graphspell/ibdawg.js"); + var tokenizer = require("resource://grammalecte/graphspell/tokenizer.js"); } ${map} @@ -34,19 +35,20 @@ mainDic = dDefaultDictionaries.gl_get(sLangCode, ""); } this.oMainDic = this._loadDictionary(mainDic, sPath, true); this.oExtendedDic = this._loadDictionary(extentedDic, sPath); this.oPersonalDic = this._loadDictionary(personalDic, sPath); + this.oTokenizer = null; } _loadDictionary (dictionary, sPath, bNecessary=false) { // returns an IBDAWG object if (!dictionary) { return null; } try { - if (typeof(require) !== 'undefined') { + if (typeof(ibdawg) !== 'undefined') { return new ibdawg.IBDAWG(dictionary); // dictionary can be a filename or a JSON object } else { return new IBDAWG(dictionary, sPath); // dictionary can be a filename or a JSON object } } @@ -58,10 +60,18 @@ console.log("Error: <" + sfDictionary + "> not loaded.") console.log(e.message); return null; } } + + loadTokenizer () { + if (typeof(tokenizer) !== 'undefined') { + this.oTokenizer = new tokenizer.Tokenizer(this.sLangCode); + } else { + this.oTokenizer = new Tokenizer(this.sLangCode); + } + } setMainDictionary (dictionary) { // returns true if the dictionary is loaded this.oMainDic = this._loadDictionary(dictionary); return Boolean(this.oMainDic); @@ -76,10 +86,25 @@ setPersonalDictionary (dictionary) { // returns true if the dictionary is loaded this.oPersonalDic = this._loadDictionary(dictionary); return Boolean(this.oPersonalDic); } + + // parse text functions + + parseParagraph (sText) { + if (!this.oTokenizer) { + this.loadTokenizer(); + } + let aSpellErr = []; + for (let oToken of this.oTokenizer.genTokens(sText)) { + if (oToken.sType === 'WORD' && !this.isValidToken(oToken.sValue)) { + aSpellErr.push(oToken); + } + } + return aSpellErr; + } // IBDAWG functions isValidToken (sToken) { // checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked) Index: graphspell-js/tokenizer.js ================================================================== --- graphspell-js/tokenizer.js +++ graphspell-js/tokenizer.js @@ -85,21 +85,11 @@ } i += nCut; sText = sText.slice(nCut); } } - - getSpellingErrors (sText, oSpellChecker) { - let aSpellErr = []; - for (let oToken of this.genTokens(sText)) { - if (oToken.sType === 'WORD' && !oSpellChecker.isValidToken(oToken.sValue)) { - aSpellErr.push(oToken); - } - } - return aSpellErr; - } } if (typeof(exports) !== 'undefined') { exports.Tokenizer = Tokenizer; } Index: graphspell/spellchecker.py ================================================================== --- graphspell/spellchecker.py +++ graphspell/spellchecker.py @@ -9,10 +9,11 @@ import traceback from . import ibdawg +from . import tokenizer dDefaultDictionaries = { "fr": "fr.bdic", "en": "en.bdic" @@ -27,10 +28,11 @@ if not sfMainDic: sfMainDic = dDefaultDictionaries.get(sLangCode, "") self.oMainDic = self._loadDictionary(sfMainDic, True) self.oExtendedDic = self._loadDictionary(sfExtendedDic) self.oPersonalDic = self._loadDictionary(sfPersonalDic) + self.oTokenizer = None def _loadDictionary (self, sfDictionary, bNecessary=False): "returns an IBDAWG object" if not sfDictionary: return None @@ -41,10 +43,13 @@ raise Exception(str(e), "Error: <" + sfDictionary + "> not loaded.") print("Error: <" + sfDictionary + "> not loaded.") traceback.print_exc() return None + def loadTokenizer (self): + self.oTokenizer = tokenizer.Tokenizer(self.sLangCode) + def setMainDictionary (self, sfDictionary): "returns True if the dictionary is loaded" self.oMainDic = self._loadDictionary(sfDictionary) return bool(self.oMainDic) @@ -56,10 +61,24 @@ def setPersonalDictionary (self, sfDictionary): "returns True if the dictionary is loaded" self.oPersonalDic = self._loadDictionary(sfDictionary) return bool(self.oPersonalDic) + # parse text functions + + def parseParagraph (self, sText, bSpellSugg=False): + if not self.oTokenizer: + self.loadTokenizer() + aSpellErrs = [] + for dToken in self.oTokenizer.genTokens(sText): + if dToken['sType'] == "WORD" and not self.isValidToken(dToken['sValue']): + if bSpellSugg: + dToken['aSuggestions'] = [] + for lSugg in self.suggest(dToken['sValue']): + dToken['aSuggestions'].extend(lSugg) + aSpellErrs.append(dToken) + return aSpellErrs # IBDAWG functions def isValidToken (self, sToken): "checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)"