Grammalecte  Check-in [982c1b5eb0]

Overview
Comment:[graphspell] ibdawg > suggest(): seek first simple combinations
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | graphspell
Files: files | file ages | folders
SHA3-256: 982c1b5eb083ff0cc78f2fb608eb7c729543b8f1d696ef0ef6576fbc2c093c57
User & Date: olr on 2021-02-18 08:49:19
Other Links: manifest | tags
Context
2021-02-18
10:26
[fr] remove old useless tests check-in: 15a51e51ca user: olr tags: trunk, fr
08:49
[graphspell] ibdawg > suggest(): seek first simple combinations check-in: 982c1b5eb0 user: olr tags: trunk, graphspell
08:48
[fr] faux positifs check-in: 950c661775 user: olr tags: trunk, fr
Changes

Modified gc_lang/fr/modules/tests_modules.py from [5c8bb6ae99] to [2556c753ce].

56
57
58
59
60
61
62
63
64
65
66
67




68
69
70
71
72
73
74
56
57
58
59
60
61
62


63


64
65
66
67
68
69
70
71
72
73
74







-
-

-
-
+
+
+
+








    def test_suggest (self):
        for sWord in [
            "déelirranttesss", "vallidasion", "Emilie", "exibission", "ditirembique", "jai", "email",
            "fatiqué", "coeur", "trèèèèèèèèès", "vraaaaiiiimeeeeennnt", "apele", "Co2",
            "emmppâiiiller", "testt", "apelaion", "exsepttion", "sintaxik", "ebriete", "ennormmement"
        ]:
            for lSugg in self.oSpellChecker.suggest(sWord):
                self.assertTrue(len(lSugg) > 0)
            #with timeblock(sWord):
            #    aSugg = self.oSpellChecker.suggest(sWord)
            #    print(sWord, "->", " ".join(aSugg))
            for lSugg in self.oSpellChecker.suggest(sWord):
                #print(sWord, "->", " ".join(lSugg))
                self.assertTrue(len(lSugg) > 0)


    def test_lemmas (self):
        for sWord, sInfi in [
            ("suis",        "suivre"),
            ("suis",        "être"),
            ("a",           "avoir"),
            ("a",           "a"),

Modified graphspell-js/ibdawg.js from [44a920520f] to [20fbadf805].

43
44
45
46
47
48
49



50
51
52
53
54
55
56
57
58
59
60
61
62
63

64
65
66
67
68
69
70

71
72
73
74
75
76
77
78
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62




63







64

65
66
67
68
69
70
71







+
+
+










-
-
-
-
+
-
-
-
-
-
-
-
+
-







        if (this.aAllSugg.has(sSugg)) {
            return;
        }
        this.aAllSugg.add(sSugg);
        // jaro 0->1 1 les chaines sont égale
        let nDistJaro = 1 - str_transform.distanceJaroWinkler(this.sSimplifiedWord, str_transform.simplifyWord(sSugg));
        let nDist = Math.floor(nDistJaro * 10);
        if (nDist < this.nMinDist) {
            this.nMinDist = nDist;
        }
        if (nDistJaro < .11) {        // Best suggestions
            this.dBestSugg.set(sSugg, Math.round(nDistJaro*1000));
            if (this.dBestSugg.size > this.nBestSuggLimit) {
                this.nDistLimit = -1; // make suggest() to end search
            }
        } else if (nDistJaro < .33) { // Good suggestions
            this.dGoodSugg.set(sSugg, Math.round(nDistJaro*1000));
            if (this.dGoodSugg.size > this.nGoodSuggLimit) {
                this.nDistLimit = -1; // make suggest() to end search
            }
        } else {
            if (nDist < this.nMinDist) {
                this.nMinDist = nDist;
            }
        }
            this.nDistLimit = Math.min(this.nDistLimit, this.nMinDist);
        }
        if (nDist <= this.nDistLimit) {
            if (nDist < this.nMinDist) {
                this.nMinDist = nDist;
            }
            this.nDistLimit = Math.min(this.nDistLimit, this.nMinDist+1);
        this.nDistLimit = Math.min(this.nDistLimit, this.nMinDist+1);
        }
    }

    getSuggestions () {
        // return a list of suggestions
        let lRes = [];
        if (this.dBestSugg.size > 0) {
            // sort only with simplified words
340
341
342
343
344
345
346

347
348
349
350
351
352
353
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347







+







        let nMaxJump = Math.max(Math.floor(sWord.length / 4), 1);
        let oSuggResult = new SuggResult(sWord, nSuggLimit);
        sWord = str_transform.cleanWord(sWord);
        if (bSplitTrailingNumbers) {
            this._splitTrailingNumbers(oSuggResult, sWord);
        }
        this._splitSuggest(oSuggResult, sWord);
        this._suggest(oSuggResult, sWord);
        this._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump);
        let aSugg = oSuggResult.getSuggestions();
        if (this.lexicographer) {
            aSugg = this.lexicographer.filterSugg(aSugg);
        }
        if (sSfx || sPfx) {
            // we add what we removed

Modified graphspell/ibdawg.py from [13d2327263] to [e27ae4ab79].

59
60
61
62
63
64
65


66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82

83
84
85
86
87
88
89
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76








77
78
79
80
81
82
83
84







+
+









-
-
-
-
-
-
-
-
+







    def addSugg (self, sSugg, nDeep=0):
        "add a suggestion"
        if sSugg in self.aAllSugg:
            return
        self.aAllSugg.add(sSugg)
        nDistJaro = 1 - st.distanceJaroWinkler(self.sSimplifiedWord, st.simplifyWord(sSugg))
        nDist = floor(nDistJaro * 10)
        if nDist < self.nMinDist:
            self.nMinDist = nDist
        #logging.info((nDeep * "  ") + "__" + sSugg + "__ " + str(round(nDistJaro*1000)))
        if nDistJaro < .11:     # Best suggestions
            self.dBestSugg[sSugg] = round(nDistJaro*1000)
            if len(self.dBestSugg) > self.nBestSuggLimit:
                self.nDistLimit = -1  # make suggest() to end search
        elif nDistJaro < .33:   # Good suggestions
            self.dGoodSugg[sSugg] = round(nDistJaro*1000)
            if len(self.dGoodSugg) > self.nGoodSuggLimit:
                self.nDistLimit = -1  # make suggest() to end search
        else:
            if nDist < self.nMinDist:
                self.nMinDist = nDist
            self.nDistLimit = min(self.nDistLimit, self.nMinDist)
        if nDist <= self.nDistLimit:
            if nDist < self.nMinDist:
                self.nMinDist = nDist
            self.nDistLimit = min(self.nDistLimit, self.nMinDist+1)
        self.nDistLimit = min(self.nDistLimit, self.nMinDist+1)

    def getSuggestions (self):
        "return a list of suggestions"
        # we sort the better results with the original word
        lRes = []
        if len(self.dBestSugg) > 0:
            # sort only with simplified words
242
243
244
245
246
247
248

249
250
251
252
253
254
255
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251







+







        nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
        nMaxJump = max(len(sWord) // 4, 1)
        oSuggResult = SuggResult(sWord, nSuggLimit)
        sWord = st.cleanWord(sWord)
        if bSplitTrailingNumbers:
            self._splitTrailingNumbers(oSuggResult, sWord)
        self._splitSuggest(oSuggResult, sWord)
        self._suggest(oSuggResult, sWord)
        self._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump)
        aSugg = oSuggResult.getSuggestions()
        if self.lexicographer:
            aSugg = self.lexicographer.filterSugg(aSugg)
        if sSfx or sPfx:
            # we add what we removed
            return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))