Index: graphspell-js/ibdawg.js ================================================================== --- graphspell-js/ibdawg.js +++ graphspell-js/ibdawg.js @@ -338,13 +338,10 @@ } this._splitSuggest(oSuggResult, sWord); this._suggest(oSuggResult, sWord); this._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump); let aSugg = oSuggResult.getSuggestions(); - if (this.lexicographer) { - aSugg = this.lexicographer.filterSugg(aSugg); - } if (sSfx || sPfx) { // we add what we removed return aSugg.map( (sSugg) => { return sPfx + sSugg + sSfx; } ); } //console.timeEnd("Suggestions for " + sWord); Index: graphspell-js/lexgraph_fr.js ================================================================== --- graphspell-js/lexgraph_fr.js +++ graphspell-js/lexgraph_fr.js @@ -546,15 +546,33 @@ }, // Other functions - filterSugg: function (aSugg) { - return aSugg.filter((sSugg) => { return !sSugg.endsWith("è") && !sSugg.endsWith("È"); }); + isValidSugg: function (sSugg, oSpellChecker) { + "return True if is valid" + if (sSugg.endsWith("è") || sSugg.endsWith("È")) { + return false; + } + if (sSugg.includes("’")) { + if (sSugg.search(/^[dD]’/) == 0 && !oSpellChecker.morph(sSugg.slice(2), ":[YNAW]")) { + return false; + } + if (sSugg.search(/^[nmtsNMTS]’/) == 0 && !oSpellChecker.morph(sSugg.slice(2), ":V")) { + return false; + } + if (sSugg.search(/^[jJ]’/) == 0 && !oSpellChecker.morph(sSugg.slice(2), ":(?:Y|[123][sp])")) { + return false; + } + if (sSugg.search(/^[cçCÇ]’/) == 0 && !oSpellChecker.morph(sSugg.slice(2), ":3[sp]")) { + return false; + } + } + return true; } } if (typeof(exports) !== 'undefined') { exports.lexgraph_fr = lexgraph_fr; } Index: graphspell-js/spellchecker.js ================================================================== --- graphspell-js/spellchecker.js +++ graphspell-js/spellchecker.js @@ -289,10 +289,32 @@ this._dLemmas.set(sWord, Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); })))); //console.log(sWord, this._dLemmas.get(sWord)); } return lMorph; } + + morph (sWord, sPattern, sNegPattern="") { + // analyse a token, return True if not in morphologies and in morphologies + let lMorph = this.getMorph(sWord); + if (lMorph.length == 0) { + return false; + } + // check negative condition + if (sNegPattern) { + if (sNegPattern == "*") { + // all morph must match sPattern + return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1)); + } + else { + if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { + return false; + } + } + } + // search sPattern + return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1)); + } getLemma (sWord) { // retrieves lemmas if (this.bStorage) { if (!this._dLemmas.has(sWord)) { @@ -307,14 +329,16 @@ // generator: returns 1, 2 or 3 lists of suggestions if (this.lexicographer) { if (this.lexicographer.dSugg.has(sWord)) { yield this.lexicographer.dSugg.get(sWord).split("|"); } else if (sWord.gl_isTitle() && this.lexicographer.dSugg.has(sWord.toLowerCase())) { - let lRes = this.lexicographer.dSugg.get(sWord.toLowerCase()).split("|"); - yield lRes.map((sSugg) => { return sSugg.slice(0,1).toUpperCase() + sSugg.slice(1); }); + let lSuggs = this.lexicographer.dSugg.get(sWord.toLowerCase()).split("|"); + yield lSuggs.map((sSugg) => { return sSugg.slice(0,1).toUpperCase() + sSugg.slice(1); }); } else { - yield this.oMainDic.suggest(sWord, nSuggLimit, true); + let lSuggs = this.oMainDic.suggest(sWord, nSuggLimit, true); + lSuggs = lSuggs.filter((sSugg) => this.lexicographer.isValidSugg(sSugg, this)); + yield lSuggs; } } else { yield this.oMainDic.suggest(sWord, nSuggLimit, true); } if (this.bCommunityDic) { Index: graphspell/ibdawg.py ================================================================== --- graphspell/ibdawg.py +++ graphspell/ibdawg.py @@ -242,12 +242,10 @@ self._splitTrailingNumbers(oSuggResult, sWord) self._splitSuggest(oSuggResult, sWord) self._suggest(oSuggResult, sWord) self._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump) aSugg = oSuggResult.getSuggestions() - if self.lexicographer: - aSugg = self.lexicographer.filterSugg(aSugg) if sSfx or sPfx: # we add what we removed return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg)) return aSugg Index: graphspell/lexgraph_fr.py ================================================================== --- graphspell/lexgraph_fr.py +++ graphspell/lexgraph_fr.py @@ -9,11 +9,11 @@ # if the boolean is True, 4 functions are required: # split(sWord) -> returns a list of string (that will be analyzed) # analyze(sWord) -> returns a string with the meaning of word # readableMorph(sMorph) -> returns a string with the meaning of tags # setLabelsOnToken(dToken) -> adds readable information on token -# filterSugg(aWord) -> returns a filtered list of suggestions +# isValidSugg(sWord, oSpellChecker) -> returns a filtered list of suggestions import re #### Suggestions @@ -508,9 +508,19 @@ return # Other functions -def filterSugg (aSuggs): - "exclude suggestions" - return [ sSugg for sSugg in aSuggs if not sSugg.endswith(("è", "È")) ] - #return filter(lambda sSugg: not sSugg.endswith(("è", "È")), aSuggs) # return an object filter +def isValidSugg (sSugg, oSpellChecker): + "return True if is valid" + if sSugg.endswith(("è", "È")): + return False + if "’" in sSugg: + if sSugg.startswith(("d’", "D’")) and not oSpellChecker.morph(sSugg[2:], ":[YNAW]"): + return False + if sSugg.startswith(("n’", "m’", "t’", "s’", "N’", "M’", "T’", "S’")) and not oSpellChecker.morph(sSugg[2:], ":V"): + return False + if sSugg.startswith(("j’", "J’")) and not oSpellChecker.morph(sSugg[2:], ":(?:Y|[123][sp])"): + return False + if sSugg.startswith(("c’", "C’")) and not oSpellChecker.morph(sSugg[2:], ":3[sp]"): + return False + return True Index: graphspell/spellchecker.py ================================================================== --- graphspell/spellchecker.py +++ graphspell/spellchecker.py @@ -6,10 +6,11 @@ - the main dictionary, bundled with the package - the community dictionary, added by an organization - the personal dictionary, created by the user for its own convenience """ +import re import importlib import traceback from . import ibdawg from . import tokenizer @@ -254,10 +255,28 @@ if self.bStorage: self._dMorphologies[sWord] = lMorph self._dLemmas[sWord] = { s[1:s.find("/")] for s in lMorph } return lMorph + def morph (self, sWord, sPattern, sNegPattern=""): + "analyse a word, return True if not in morphologies and in morphologies" + lMorph = self.getMorph(sWord) + if not lMorph: + return False + # check negative condition + if sNegPattern: + if sNegPattern == "*": + # all morph must match sPattern + zPattern = re.compile(sPattern) + return all(zPattern.search(sMorph) for sMorph in lMorph) + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(sMorph) for sMorph in lMorph): + return False + # search sPattern + zPattern = re.compile(sPattern) + return any(zPattern.search(sMorph) for sMorph in lMorph) + def getLemma (self, sWord): "retrieves lemmas" if self.bStorage: if sWord not in self._dLemmas: self.getMorph(sWord) @@ -268,14 +287,16 @@ "generator: returns 1, 2 or 3 lists of suggestions" if self.lexicographer: if sWord in self.lexicographer.dSugg: yield self.lexicographer.dSugg[sWord].split("|") elif sWord.istitle() and sWord.lower() in self.lexicographer.dSugg: - lRes = self.lexicographer.dSugg[sWord.lower()].split("|") - yield list(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes)) + lSuggs = self.lexicographer.dSugg[sWord.lower()].split("|") + yield list(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lSuggs)) else: - yield self.oMainDic.suggest(sWord, nSuggLimit, True) + lSuggs = self.oMainDic.suggest(sWord, nSuggLimit, True) + lSuggs = [ sSugg for sSugg in lSuggs if self.lexicographer.isValidSugg(sSugg, self) ] + yield lSuggs else: yield self.oMainDic.suggest(sWord, nSuggLimit, True) if self.bCommunityDic: yield self.oCommunityDic.suggest(sWord, (nSuggLimit//2)+1) if self.bPersonalDic: