Overview
Comment: | [graphspell] spellchecker: add parseParagraph() |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | graphspell |
Files: | files | file ages | folders |
SHA3-256: |
7616aa7ef9d3cc203b118f77e0749b0b |
User & Date: | olr on 2018-02-20 08:40:03 |
Other Links: | manifest | tags |
Context
2018-02-20
| ||
12:06 | [fr][tests] Update: Le Horla check-in: bf58e39b3f user: olr tags: trunk, fr | |
08:40 | [graphspell] spellchecker: add parseParagraph() check-in: 7616aa7ef9 user: olr tags: trunk, graphspell | |
2018-02-19
| ||
18:08 | [fr] new performance test (better when the processor isn’t converting a video!) check-in: b34690f0d8 user: olr tags: trunk, fr | |
Changes
Modified gc_lang/fr/webext/gce_worker.js from [c20f81d8f3] to [efd11a103b].
︙ | |||
200 201 202 203 204 205 206 | 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 | - + - + | } function parseAndSpellcheck (sText, sCountry, bDebug, bContext, dInfo={}) { let i = 0; sText = sText.replace(//g, "").normalize("NFC"); for (let sParagraph of text.getParagraph(sText)) { let aGrammErr = gc_engine.parse(sParagraph, sCountry, bDebug, bContext); |
︙ |
Modified graphspell-js/spellchecker.js from [e878cd2181] to [7b8a526c88].
︙ | |||
9 10 11 12 13 14 15 16 17 18 19 20 21 22 | 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | + | "use strict"; if (typeof(require) !== 'undefined') { var ibdawg = require("resource://grammalecte/graphspell/ibdawg.js"); var tokenizer = require("resource://grammalecte/graphspell/tokenizer.js"); } ${map} const dDefaultDictionaries = new Map([ |
︙ | |||
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | + - + + + + + + + + + + + + + + + + + + + + + + + + | this.sLangCode = sLangCode; if (!mainDic) { mainDic = dDefaultDictionaries.gl_get(sLangCode, ""); } this.oMainDic = this._loadDictionary(mainDic, sPath, true); this.oExtendedDic = this._loadDictionary(extentedDic, sPath); this.oPersonalDic = this._loadDictionary(personalDic, sPath); this.oTokenizer = null; } _loadDictionary (dictionary, sPath, bNecessary=false) { // returns an IBDAWG object if (!dictionary) { return null; } try { |
︙ |
Modified graphspell-js/tokenizer.js from [c3f0ee8c90] to [bdd895b918].
︙ | |||
83 84 85 86 87 88 89 | 83 84 85 86 87 88 89 90 91 92 93 94 95 | - - - - - - - - - - | helpers.logerror(e); } } i += nCut; sText = sText.slice(nCut); } } |
Modified graphspell/spellchecker.py from [638f8d8cdf] to [b9fb2c7b70].
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | + + + + + + + + + + + + + + + + + + + | # Spellchecker # Wrapper for the IBDAWG class. # Useful to check several dictionaries at once. # To avoid iterating over a pile of dictionaries, it is assumed that 3 are enough: # - the main dictionary, bundled with the package # - the extended dictionary, added by an organization # - the personal dictionary, created by the user for its own convenience import traceback from . import ibdawg from . import tokenizer dDefaultDictionaries = { "fr": "fr.bdic", "en": "en.bdic" } class SpellChecker (): def __init__ (self, sLangCode, sfMainDic="", sfExtendedDic="", sfPersonalDic=""): "returns True if the main dictionary is loaded" self.sLangCode = sLangCode if not sfMainDic: sfMainDic = dDefaultDictionaries.get(sLangCode, "") self.oMainDic = self._loadDictionary(sfMainDic, True) self.oExtendedDic = self._loadDictionary(sfExtendedDic) self.oPersonalDic = self._loadDictionary(sfPersonalDic) self.oTokenizer = None def _loadDictionary (self, sfDictionary, bNecessary=False): "returns an IBDAWG object" if not sfDictionary: return None try: return ibdawg.IBDAWG(sfDictionary) except Exception as e: if bNecessary: raise Exception(str(e), "Error: <" + sfDictionary + "> not loaded.") print("Error: <" + sfDictionary + "> not loaded.") traceback.print_exc() return None def loadTokenizer (self): self.oTokenizer = tokenizer.Tokenizer(self.sLangCode) def setMainDictionary (self, sfDictionary): "returns True if the dictionary is loaded" self.oMainDic = self._loadDictionary(sfDictionary) return bool(self.oMainDic) def setExtendedDictionary (self, sfDictionary): "returns True if the dictionary is loaded" self.oExtendedDic = self._loadDictionary(sfDictionary) return bool(self.oExtendedDic) def setPersonalDictionary (self, sfDictionary): "returns True if the dictionary is loaded" self.oPersonalDic = self._loadDictionary(sfDictionary) return bool(self.oPersonalDic) # parse text functions def parseParagraph (self, sText, bSpellSugg=False): if not self.oTokenizer: self.loadTokenizer() aSpellErrs = [] for dToken in self.oTokenizer.genTokens(sText): if dToken['sType'] == "WORD" and not self.isValidToken(dToken['sValue']): if bSpellSugg: dToken['aSuggestions'] = [] for lSugg in self.suggest(dToken['sValue']): dToken['aSuggestions'].extend(lSugg) aSpellErrs.append(dToken) return aSpellErrs # IBDAWG functions def isValidToken (self, sToken): "checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)" if self.oMainDic.isValidToken(sToken): return True |
︙ |