Overview
Comment: | [graphspell] spellchecker: add parseParagraph() |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | graphspell |
Files: | files | file ages | folders |
SHA3-256: |
7616aa7ef9d3cc203b118f77e0749b0b |
User & Date: | olr on 2018-02-20 08:40:03 |
Other Links: | manifest | tags |
Context
2018-02-20
| ||
12:06 | [fr][tests] Update: Le Horla check-in: bf58e39b3f user: olr tags: trunk, fr | |
08:40 | [graphspell] spellchecker: add parseParagraph() check-in: 7616aa7ef9 user: olr tags: trunk, graphspell | |
2018-02-19
| ||
18:08 | [fr] new performance test (better when the processor isn’t converting a video!) check-in: b34690f0d8 user: olr tags: trunk, fr | |
Changes
Modified gc_lang/fr/webext/gce_worker.js from [c20f81d8f3] to [efd11a103b].
︙ | ︙ | |||
200 201 202 203 204 205 206 | } function parseAndSpellcheck (sText, sCountry, bDebug, bContext, dInfo={}) { let i = 0; sText = sText.replace(//g, "").normalize("NFC"); for (let sParagraph of text.getParagraph(sText)) { let aGrammErr = gc_engine.parse(sParagraph, sCountry, bDebug, bContext); | | | | 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 | } function parseAndSpellcheck (sText, sCountry, bDebug, bContext, dInfo={}) { let i = 0; sText = sText.replace(//g, "").normalize("NFC"); for (let sParagraph of text.getParagraph(sText)) { let aGrammErr = gc_engine.parse(sParagraph, sCountry, bDebug, bContext); let aSpellErr = oSpellChecker.parseParagraph(sParagraph); postMessage(createResponse("parseAndSpellcheck", {sParagraph: sParagraph, iParaNum: i, aGrammErr: aGrammErr, aSpellErr: aSpellErr}, dInfo, false)); i += 1; } postMessage(createResponse("parseAndSpellcheck", null, dInfo, true)); } function parseAndSpellcheck1 (sParagraph, sCountry, bDebug, bContext, dInfo={}) { sParagraph = sParagraph.replace(//g, "").normalize("NFC"); let aGrammErr = gc_engine.parse(sParagraph, sCountry, bDebug, bContext); let aSpellErr = oSpellChecker.parseParagraph(sParagraph); postMessage(createResponse("parseAndSpellcheck1", {sParagraph: sParagraph, aGrammErr: aGrammErr, aSpellErr: aSpellErr}, dInfo, true)); } function getOptions (dInfo={}) { postMessage(createResponse("getOptions", gc_engine.getOptions(), dInfo, true)); } |
︙ | ︙ |
Modified graphspell-js/spellchecker.js from [e878cd2181] to [7b8a526c88].
︙ | ︙ | |||
9 10 11 12 13 14 15 16 17 18 19 20 21 22 | "use strict"; if (typeof(require) !== 'undefined') { var ibdawg = require("resource://grammalecte/graphspell/ibdawg.js"); } ${map} const dDefaultDictionaries = new Map([ | > | 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | "use strict"; if (typeof(require) !== 'undefined') { var ibdawg = require("resource://grammalecte/graphspell/ibdawg.js"); var tokenizer = require("resource://grammalecte/graphspell/tokenizer.js"); } ${map} const dDefaultDictionaries = new Map([ |
︙ | ︙ | |||
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | this.sLangCode = sLangCode; if (!mainDic) { mainDic = dDefaultDictionaries.gl_get(sLangCode, ""); } this.oMainDic = this._loadDictionary(mainDic, sPath, true); this.oExtendedDic = this._loadDictionary(extentedDic, sPath); this.oPersonalDic = this._loadDictionary(personalDic, sPath); } _loadDictionary (dictionary, sPath, bNecessary=false) { // returns an IBDAWG object if (!dictionary) { return null; } try { | > | > > > > > > > > > > > > > > > > > > > > > > > | 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | this.sLangCode = sLangCode; if (!mainDic) { mainDic = dDefaultDictionaries.gl_get(sLangCode, ""); } this.oMainDic = this._loadDictionary(mainDic, sPath, true); this.oExtendedDic = this._loadDictionary(extentedDic, sPath); this.oPersonalDic = this._loadDictionary(personalDic, sPath); this.oTokenizer = null; } _loadDictionary (dictionary, sPath, bNecessary=false) { // returns an IBDAWG object if (!dictionary) { return null; } try { if (typeof(ibdawg) !== 'undefined') { return new ibdawg.IBDAWG(dictionary); // dictionary can be a filename or a JSON object } else { return new IBDAWG(dictionary, sPath); // dictionary can be a filename or a JSON object } } catch (e) { let sfDictionary = (typeof(dictionary) == "string") ? dictionary : dictionary.sLangName + "/" + dictionary.sFileName; if (bNecessary) { throw "Error: <" + sfDictionary + "> not loaded. " + e.message; } console.log("Error: <" + sfDictionary + "> not loaded.") console.log(e.message); return null; } } loadTokenizer () { if (typeof(tokenizer) !== 'undefined') { this.oTokenizer = new tokenizer.Tokenizer(this.sLangCode); } else { this.oTokenizer = new Tokenizer(this.sLangCode); } } setMainDictionary (dictionary) { // returns true if the dictionary is loaded this.oMainDic = this._loadDictionary(dictionary); return Boolean(this.oMainDic); } setExtendedDictionary (dictionary) { // returns true if the dictionary is loaded this.oExtendedDic = this._loadDictionary(dictionary); return Boolean(this.oExtendedDic); } setPersonalDictionary (dictionary) { // returns true if the dictionary is loaded this.oPersonalDic = this._loadDictionary(dictionary); return Boolean(this.oPersonalDic); } // parse text functions parseParagraph (sText) { if (!this.oTokenizer) { this.loadTokenizer(); } let aSpellErr = []; for (let oToken of this.oTokenizer.genTokens(sText)) { if (oToken.sType === 'WORD' && !this.isValidToken(oToken.sValue)) { aSpellErr.push(oToken); } } return aSpellErr; } // IBDAWG functions isValidToken (sToken) { // checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked) if (this.oMainDic.isValidToken(sToken)) { return true; |
︙ | ︙ |
Modified graphspell-js/tokenizer.js from [c3f0ee8c90] to [bdd895b918].
︙ | ︙ | |||
83 84 85 86 87 88 89 | helpers.logerror(e); } } i += nCut; sText = sText.slice(nCut); } } | < < < < < < < < < < | 83 84 85 86 87 88 89 90 91 92 93 94 95 | helpers.logerror(e); } } i += nCut; sText = sText.slice(nCut); } } } if (typeof(exports) !== 'undefined') { exports.Tokenizer = Tokenizer; } |
Modified graphspell/spellchecker.py from [638f8d8cdf] to [b9fb2c7b70].
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | # Spellchecker # Wrapper for the IBDAWG class. # Useful to check several dictionaries at once. # To avoid iterating over a pile of dictionaries, it is assumed that 3 are enough: # - the main dictionary, bundled with the package # - the extended dictionary, added by an organization # - the personal dictionary, created by the user for its own convenience import traceback from . import ibdawg dDefaultDictionaries = { "fr": "fr.bdic", "en": "en.bdic" } class SpellChecker (): def __init__ (self, sLangCode, sfMainDic="", sfExtendedDic="", sfPersonalDic=""): "returns True if the main dictionary is loaded" self.sLangCode = sLangCode if not sfMainDic: sfMainDic = dDefaultDictionaries.get(sLangCode, "") self.oMainDic = self._loadDictionary(sfMainDic, True) self.oExtendedDic = self._loadDictionary(sfExtendedDic) self.oPersonalDic = self._loadDictionary(sfPersonalDic) def _loadDictionary (self, sfDictionary, bNecessary=False): "returns an IBDAWG object" if not sfDictionary: return None try: return ibdawg.IBDAWG(sfDictionary) except Exception as e: if bNecessary: raise Exception(str(e), "Error: <" + sfDictionary + "> not loaded.") print("Error: <" + sfDictionary + "> not loaded.") traceback.print_exc() return None def setMainDictionary (self, sfDictionary): "returns True if the dictionary is loaded" self.oMainDic = self._loadDictionary(sfDictionary) return bool(self.oMainDic) def setExtendedDictionary (self, sfDictionary): "returns True if the dictionary is loaded" self.oExtendedDic = self._loadDictionary(sfDictionary) return bool(self.oExtendedDic) def setPersonalDictionary (self, sfDictionary): "returns True if the dictionary is loaded" self.oPersonalDic = self._loadDictionary(sfDictionary) return bool(self.oPersonalDic) # IBDAWG functions def isValidToken (self, sToken): "checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)" if self.oMainDic.isValidToken(sToken): return True | > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | # Spellchecker # Wrapper for the IBDAWG class. # Useful to check several dictionaries at once. # To avoid iterating over a pile of dictionaries, it is assumed that 3 are enough: # - the main dictionary, bundled with the package # - the extended dictionary, added by an organization # - the personal dictionary, created by the user for its own convenience import traceback from . import ibdawg from . import tokenizer dDefaultDictionaries = { "fr": "fr.bdic", "en": "en.bdic" } class SpellChecker (): def __init__ (self, sLangCode, sfMainDic="", sfExtendedDic="", sfPersonalDic=""): "returns True if the main dictionary is loaded" self.sLangCode = sLangCode if not sfMainDic: sfMainDic = dDefaultDictionaries.get(sLangCode, "") self.oMainDic = self._loadDictionary(sfMainDic, True) self.oExtendedDic = self._loadDictionary(sfExtendedDic) self.oPersonalDic = self._loadDictionary(sfPersonalDic) self.oTokenizer = None def _loadDictionary (self, sfDictionary, bNecessary=False): "returns an IBDAWG object" if not sfDictionary: return None try: return ibdawg.IBDAWG(sfDictionary) except Exception as e: if bNecessary: raise Exception(str(e), "Error: <" + sfDictionary + "> not loaded.") print("Error: <" + sfDictionary + "> not loaded.") traceback.print_exc() return None def loadTokenizer (self): self.oTokenizer = tokenizer.Tokenizer(self.sLangCode) def setMainDictionary (self, sfDictionary): "returns True if the dictionary is loaded" self.oMainDic = self._loadDictionary(sfDictionary) return bool(self.oMainDic) def setExtendedDictionary (self, sfDictionary): "returns True if the dictionary is loaded" self.oExtendedDic = self._loadDictionary(sfDictionary) return bool(self.oExtendedDic) def setPersonalDictionary (self, sfDictionary): "returns True if the dictionary is loaded" self.oPersonalDic = self._loadDictionary(sfDictionary) return bool(self.oPersonalDic) # parse text functions def parseParagraph (self, sText, bSpellSugg=False): if not self.oTokenizer: self.loadTokenizer() aSpellErrs = [] for dToken in self.oTokenizer.genTokens(sText): if dToken['sType'] == "WORD" and not self.isValidToken(dToken['sValue']): if bSpellSugg: dToken['aSuggestions'] = [] for lSugg in self.suggest(dToken['sValue']): dToken['aSuggestions'].extend(lSugg) aSpellErrs.append(dToken) return aSpellErrs # IBDAWG functions def isValidToken (self, sToken): "checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)" if self.oMainDic.isValidToken(sToken): return True |
︙ | ︙ |