Overview
| Comment: | [graphspell] ibdawg > suggest(): seek first simple combinations |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | graphspell |
| Files: | files | file ages | folders |
| SHA3-256: |
982c1b5eb083ff0cc78f2fb608eb7c72 |
| User & Date: | olr on 2021-02-18 08:49:19 |
| Other Links: | manifest | tags |
Context
|
2021-02-18
| ||
| 10:26 | [fr] remove old useless tests check-in: 15a51e51ca user: olr tags: trunk, fr | |
| 08:49 | [graphspell] ibdawg > suggest(): seek first simple combinations check-in: 982c1b5eb0 user: olr tags: trunk, graphspell | |
| 08:48 | [fr] faux positifs check-in: 950c661775 user: olr tags: trunk, fr | |
Changes
Modified gc_lang/fr/modules/tests_modules.py from [5c8bb6ae99] to [2556c753ce].
| ︙ | ︙ | |||
56 57 58 59 60 61 62 |
def test_suggest (self):
for sWord in [
"déelirranttesss", "vallidasion", "Emilie", "exibission", "ditirembique", "jai", "email",
"fatiqué", "coeur", "trèèèèèèèèès", "vraaaaiiiimeeeeennnt", "apele", "Co2",
"emmppâiiiller", "testt", "apelaion", "exsepttion", "sintaxik", "ebriete", "ennormmement"
]:
| < < | | > > | 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
def test_suggest (self):
for sWord in [
"déelirranttesss", "vallidasion", "Emilie", "exibission", "ditirembique", "jai", "email",
"fatiqué", "coeur", "trèèèèèèèèès", "vraaaaiiiimeeeeennnt", "apele", "Co2",
"emmppâiiiller", "testt", "apelaion", "exsepttion", "sintaxik", "ebriete", "ennormmement"
]:
#with timeblock(sWord):
for lSugg in self.oSpellChecker.suggest(sWord):
#print(sWord, "->", " ".join(lSugg))
self.assertTrue(len(lSugg) > 0)
def test_lemmas (self):
for sWord, sInfi in [
("suis", "suivre"),
("suis", "être"),
("a", "avoir"),
("a", "a"),
|
| ︙ | ︙ |
Modified graphspell-js/ibdawg.js from [44a920520f] to [20fbadf805].
| ︙ | ︙ | |||
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
if (this.aAllSugg.has(sSugg)) {
return;
}
this.aAllSugg.add(sSugg);
// jaro 0->1 1 les chaines sont égale
let nDistJaro = 1 - str_transform.distanceJaroWinkler(this.sSimplifiedWord, str_transform.simplifyWord(sSugg));
let nDist = Math.floor(nDistJaro * 10);
if (nDistJaro < .11) { // Best suggestions
this.dBestSugg.set(sSugg, Math.round(nDistJaro*1000));
if (this.dBestSugg.size > this.nBestSuggLimit) {
this.nDistLimit = -1; // make suggest() to end search
}
} else if (nDistJaro < .33) { // Good suggestions
this.dGoodSugg.set(sSugg, Math.round(nDistJaro*1000));
if (this.dGoodSugg.size > this.nGoodSuggLimit) {
this.nDistLimit = -1; // make suggest() to end search
}
| > > > < < < | < < < < < < | < | 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
if (this.aAllSugg.has(sSugg)) {
return;
}
this.aAllSugg.add(sSugg);
// jaro 0->1 1 les chaines sont égale
let nDistJaro = 1 - str_transform.distanceJaroWinkler(this.sSimplifiedWord, str_transform.simplifyWord(sSugg));
let nDist = Math.floor(nDistJaro * 10);
if (nDist < this.nMinDist) {
this.nMinDist = nDist;
}
if (nDistJaro < .11) { // Best suggestions
this.dBestSugg.set(sSugg, Math.round(nDistJaro*1000));
if (this.dBestSugg.size > this.nBestSuggLimit) {
this.nDistLimit = -1; // make suggest() to end search
}
} else if (nDistJaro < .33) { // Good suggestions
this.dGoodSugg.set(sSugg, Math.round(nDistJaro*1000));
if (this.dGoodSugg.size > this.nGoodSuggLimit) {
this.nDistLimit = -1; // make suggest() to end search
}
}
this.nDistLimit = Math.min(this.nDistLimit, this.nMinDist+1);
}
getSuggestions () {
// return a list of suggestions
let lRes = [];
if (this.dBestSugg.size > 0) {
// sort only with simplified words
|
| ︙ | ︙ | |||
340 341 342 343 344 345 346 347 348 349 350 351 352 353 |
let nMaxJump = Math.max(Math.floor(sWord.length / 4), 1);
let oSuggResult = new SuggResult(sWord, nSuggLimit);
sWord = str_transform.cleanWord(sWord);
if (bSplitTrailingNumbers) {
this._splitTrailingNumbers(oSuggResult, sWord);
}
this._splitSuggest(oSuggResult, sWord);
this._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump);
let aSugg = oSuggResult.getSuggestions();
if (this.lexicographer) {
aSugg = this.lexicographer.filterSugg(aSugg);
}
if (sSfx || sPfx) {
// we add what we removed
| > | 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 |
let nMaxJump = Math.max(Math.floor(sWord.length / 4), 1);
let oSuggResult = new SuggResult(sWord, nSuggLimit);
sWord = str_transform.cleanWord(sWord);
if (bSplitTrailingNumbers) {
this._splitTrailingNumbers(oSuggResult, sWord);
}
this._splitSuggest(oSuggResult, sWord);
this._suggest(oSuggResult, sWord);
this._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump);
let aSugg = oSuggResult.getSuggestions();
if (this.lexicographer) {
aSugg = this.lexicographer.filterSugg(aSugg);
}
if (sSfx || sPfx) {
// we add what we removed
|
| ︙ | ︙ |
Modified graphspell/ibdawg.py from [13d2327263] to [e27ae4ab79].
| ︙ | ︙ | |||
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
def addSugg (self, sSugg, nDeep=0):
"add a suggestion"
if sSugg in self.aAllSugg:
return
self.aAllSugg.add(sSugg)
nDistJaro = 1 - st.distanceJaroWinkler(self.sSimplifiedWord, st.simplifyWord(sSugg))
nDist = floor(nDistJaro * 10)
#logging.info((nDeep * " ") + "__" + sSugg + "__ " + str(round(nDistJaro*1000)))
if nDistJaro < .11: # Best suggestions
self.dBestSugg[sSugg] = round(nDistJaro*1000)
if len(self.dBestSugg) > self.nBestSuggLimit:
self.nDistLimit = -1 # make suggest() to end search
elif nDistJaro < .33: # Good suggestions
self.dGoodSugg[sSugg] = round(nDistJaro*1000)
if len(self.dGoodSugg) > self.nGoodSuggLimit:
self.nDistLimit = -1 # make suggest() to end search
| > > < < < < < < < | | 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
def addSugg (self, sSugg, nDeep=0):
"add a suggestion"
if sSugg in self.aAllSugg:
return
self.aAllSugg.add(sSugg)
nDistJaro = 1 - st.distanceJaroWinkler(self.sSimplifiedWord, st.simplifyWord(sSugg))
nDist = floor(nDistJaro * 10)
if nDist < self.nMinDist:
self.nMinDist = nDist
#logging.info((nDeep * " ") + "__" + sSugg + "__ " + str(round(nDistJaro*1000)))
if nDistJaro < .11: # Best suggestions
self.dBestSugg[sSugg] = round(nDistJaro*1000)
if len(self.dBestSugg) > self.nBestSuggLimit:
self.nDistLimit = -1 # make suggest() to end search
elif nDistJaro < .33: # Good suggestions
self.dGoodSugg[sSugg] = round(nDistJaro*1000)
if len(self.dGoodSugg) > self.nGoodSuggLimit:
self.nDistLimit = -1 # make suggest() to end search
self.nDistLimit = min(self.nDistLimit, self.nMinDist+1)
def getSuggestions (self):
"return a list of suggestions"
# we sort the better results with the original word
lRes = []
if len(self.dBestSugg) > 0:
# sort only with simplified words
|
| ︙ | ︙ | |||
242 243 244 245 246 247 248 249 250 251 252 253 254 255 |
nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
nMaxJump = max(len(sWord) // 4, 1)
oSuggResult = SuggResult(sWord, nSuggLimit)
sWord = st.cleanWord(sWord)
if bSplitTrailingNumbers:
self._splitTrailingNumbers(oSuggResult, sWord)
self._splitSuggest(oSuggResult, sWord)
self._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump)
aSugg = oSuggResult.getSuggestions()
if self.lexicographer:
aSugg = self.lexicographer.filterSugg(aSugg)
if sSfx or sPfx:
# we add what we removed
return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
| > | 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 |
nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
nMaxJump = max(len(sWord) // 4, 1)
oSuggResult = SuggResult(sWord, nSuggLimit)
sWord = st.cleanWord(sWord)
if bSplitTrailingNumbers:
self._splitTrailingNumbers(oSuggResult, sWord)
self._splitSuggest(oSuggResult, sWord)
self._suggest(oSuggResult, sWord)
self._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump)
aSugg = oSuggResult.getSuggestions()
if self.lexicographer:
aSugg = self.lexicographer.filterSugg(aSugg)
if sSfx or sPfx:
# we add what we removed
return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
|
| ︙ | ︙ |