Index: gc_lang/fr/perf_memo.txt ================================================================== --- gc_lang/fr/perf_memo.txt +++ gc_lang/fr/perf_memo.txt @@ -32,5 +32,6 @@ 1.9.2 2020.05.12 08:43 1.62465 0.398831 0.273012 0.0810811 0.080937 0.0845885 0.204133 0.114146 0.0212864 0.0029547 1.12.2 2020.09.09 13:34 1.50568 0.374504 0.233108 0.0798712 0.0804466 0.0769674 0.171519 0.0945132 0.0165344 0.0019474 1.12.2 2020.09.09 13:35 1.41094 0.359093 0.236443 0.06968 0.0734418 0.0738087 0.169371 0.0946279 0.0167106 0.0019773 1.12.2 2020.09.11 19:16 1.35297 0.330545 0.221731 0.0666998 0.0692539 0.0701707 0.160564 0.0891676 0.015807 0.0045998 1.12.2 2020.09.30 14:50 1.37531 0.330381 0.226012 0.0668063 0.0690574 0.0694727 0.160282 0.0929373 0.0176629 0.0019713 +1.12.2 2020.09.30 17:01 1.37168 0.329009 0.248127 0.0670758 0.0701238 0.0910568 0.170556 0.093876 0.0168925 0.0020051 Index: graphspell-js/ibdawg.js ================================================================== --- graphspell-js/ibdawg.js +++ graphspell-js/ibdawg.js @@ -273,11 +273,11 @@ } isValid (sWord) { // checks if sWord is valid (different casing tested if the first letter is a capital) if (!sWord) { - return null; + return true; } if (sWord.includes("'")) { // ugly hack sWord = sWord.replace("'", "’"); } if (this.lookup(sWord)) { Index: graphspell-js/spellchecker.js ================================================================== --- graphspell-js/spellchecker.js +++ graphspell-js/spellchecker.js @@ -12,10 +12,11 @@ /* global require, exports, console, IBDAWG, Tokenizer */ "use strict"; ${map} +${string} if (typeof(process) !== 'undefined') { var ibdawg = require("./ibdawg.js"); var tokenizer = require("./tokenizer.js"); @@ -227,10 +228,11 @@ // IBDAWG functions isValidToken (sToken) { // checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked) + sToken = sToken.gl_trim("_"); if (this.oMainDic.isValidToken(sToken)) { return true; } if (this.bCommunityDic && this.oCommunityDic.isValidToken(sToken)) { return true; @@ -269,10 +271,11 @@ return false; } getMorph (sWord) { // retrieves morphologies list, different casing allowed + sWord = sWord.gl_trim("_"); if (this.bStorage && this._dMorphologies.has(sWord)) { return this._dMorphologies.get(sWord); } let lMorph = this.oMainDic.getMorph(sWord); if (this.bCommunityDic) { @@ -289,10 +292,11 @@ return lMorph; } getLemma (sWord) { // retrieves lemmas + sWord = sWord.gl_trim("_"); if (this.bStorage) { if (!this._dLemmas.has(sWord)) { this.getMorph(sWord); } return this._dLemmas.get(sWord); @@ -300,10 +304,11 @@ return Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); }))); } * suggest (sWord, nSuggLimit=10) { // generator: returns 1, 2 or 3 lists of suggestions + sWord = sWord.gl_trim("_"); if (this.lexicographer) { if (this.lexicographer.dSugg.has(sWord)) { yield this.lexicographer.dSugg.get(sWord).split("|"); } else if (sWord.gl_isTitle() && this.lexicographer.dSugg.has(sWord.toLowerCase())) { let lRes = this.lexicographer.dSugg.get(sWord.toLowerCase()).split("|"); Index: graphspell/ibdawg.py ================================================================== --- graphspell/ibdawg.py +++ graphspell/ibdawg.py @@ -276,11 +276,11 @@ return False def isValid (self, sWord): "checks if is valid (different casing tested if the first letter is a capital)" if not sWord: - return None + return True if "'" in sWord: # ugly hack sWord = sWord.replace("'", "’") if self.lookup(sWord): return True if sWord[0:1].isupper(): Index: graphspell/spellchecker.py ================================================================== --- graphspell/spellchecker.py +++ graphspell/spellchecker.py @@ -211,10 +211,11 @@ # IBDAWG functions def isValidToken (self, sToken): "checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)" + sToken = sToken.strip("_") if self.oMainDic.isValidToken(sToken): return True if self.bCommunityDic and self.oCommunityDic.isValidToken(sToken): return True if self.bPersonalDic and self.oPersonalDic.isValidToken(sToken): @@ -241,10 +242,11 @@ return True return False def getMorph (self, sWord): "retrieves morphologies list, different casing allowed" + sWord = sWord.strip("_") if self.bStorage and sWord in self._dMorphologies: return self._dMorphologies[sWord] lMorph = self.oMainDic.getMorph(sWord) if self.bCommunityDic: lMorph.extend(self.oCommunityDic.getMorph(sWord)) @@ -255,18 +257,20 @@ self._dLemmas[sWord] = { s[1:s.find("/")] for s in lMorph } return lMorph def getLemma (self, sWord): "retrieves lemmas" + sWord = sWord.strip("_") if self.bStorage: if sWord not in self._dLemmas: self.getMorph(sWord) return self._dLemmas[sWord] return { s[1:s.find("/")] for s in self.getMorph(sWord) } def suggest (self, sWord, nSuggLimit=10): "generator: returns 1, 2 or 3 lists of suggestions" + sWord = sWord.strip("_") if self.lexicographer: if sWord in self.lexicographer.dSugg: yield self.lexicographer.dSugg[sWord].split("|") elif sWord.istitle() and sWord.lower() in self.lexicographer.dSugg: lRes = self.lexicographer.dSugg[sWord.lower()].split("|")