Grammalecte  Check-in [982c1b5eb0]

Overview
Comment:[graphspell] ibdawg > suggest(): seek first simple combinations
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | graphspell
Files: files | file ages | folders
SHA3-256: 982c1b5eb083ff0cc78f2fb608eb7c729543b8f1d696ef0ef6576fbc2c093c57
User & Date: olr on 2021-02-18 08:49:19
Other Links: manifest | tags
Context
2021-02-18
10:26
[fr] remove old useless tests check-in: 15a51e51ca user: olr tags: trunk, fr
08:49
[graphspell] ibdawg > suggest(): seek first simple combinations check-in: 982c1b5eb0 user: olr tags: trunk, graphspell
08:48
[fr] faux positifs check-in: 950c661775 user: olr tags: trunk, fr
Changes

Modified gc_lang/fr/modules/tests_modules.py from [5c8bb6ae99] to [2556c753ce].

56
57
58
59
60
61
62
63
64
65
66
67


68
69
70
71
72
73
74

    def test_suggest (self):
        for sWord in [
            "déelirranttesss", "vallidasion", "Emilie", "exibission", "ditirembique", "jai", "email",
            "fatiqué", "coeur", "trèèèèèèèèès", "vraaaaiiiimeeeeennnt", "apele", "Co2",
            "emmppâiiiller", "testt", "apelaion", "exsepttion", "sintaxik", "ebriete", "ennormmement"
        ]:
            for lSugg in self.oSpellChecker.suggest(sWord):
                self.assertTrue(len(lSugg) > 0)
            #with timeblock(sWord):
            #    aSugg = self.oSpellChecker.suggest(sWord)
            #    print(sWord, "->", " ".join(aSugg))



    def test_lemmas (self):
        for sWord, sInfi in [
            ("suis",        "suivre"),
            ("suis",        "être"),
            ("a",           "avoir"),
            ("a",           "a"),







<
<

|
|
>
>







56
57
58
59
60
61
62


63
64
65
66
67
68
69
70
71
72
73
74

    def test_suggest (self):
        for sWord in [
            "déelirranttesss", "vallidasion", "Emilie", "exibission", "ditirembique", "jai", "email",
            "fatiqué", "coeur", "trèèèèèèèèès", "vraaaaiiiimeeeeennnt", "apele", "Co2",
            "emmppâiiiller", "testt", "apelaion", "exsepttion", "sintaxik", "ebriete", "ennormmement"
        ]:


            #with timeblock(sWord):
            for lSugg in self.oSpellChecker.suggest(sWord):
                #print(sWord, "->", " ".join(lSugg))
                self.assertTrue(len(lSugg) > 0)


    def test_lemmas (self):
        for sWord, sInfi in [
            ("suis",        "suivre"),
            ("suis",        "être"),
            ("a",           "avoir"),
            ("a",           "a"),

Modified graphspell-js/ibdawg.js from [44a920520f] to [20fbadf805].

43
44
45
46
47
48
49



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
        if (this.aAllSugg.has(sSugg)) {
            return;
        }
        this.aAllSugg.add(sSugg);
        // jaro 0->1 1 les chaines sont égale
        let nDistJaro = 1 - str_transform.distanceJaroWinkler(this.sSimplifiedWord, str_transform.simplifyWord(sSugg));
        let nDist = Math.floor(nDistJaro * 10);



        if (nDistJaro < .11) {        // Best suggestions
            this.dBestSugg.set(sSugg, Math.round(nDistJaro*1000));
            if (this.dBestSugg.size > this.nBestSuggLimit) {
                this.nDistLimit = -1; // make suggest() to end search
            }
        } else if (nDistJaro < .33) { // Good suggestions
            this.dGoodSugg.set(sSugg, Math.round(nDistJaro*1000));
            if (this.dGoodSugg.size > this.nGoodSuggLimit) {
                this.nDistLimit = -1; // make suggest() to end search
            }
        } else {
            if (nDist < this.nMinDist) {
                this.nMinDist = nDist;
            }
            this.nDistLimit = Math.min(this.nDistLimit, this.nMinDist);
        }
        if (nDist <= this.nDistLimit) {
            if (nDist < this.nMinDist) {
                this.nMinDist = nDist;
            }
            this.nDistLimit = Math.min(this.nDistLimit, this.nMinDist+1);
        }
    }

    getSuggestions () {
        // return a list of suggestions
        let lRes = [];
        if (this.dBestSugg.size > 0) {
            // sort only with simplified words







>
>
>










<
<
<
|
<
<
<
<
<
<
|
<







43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62



63






64

65
66
67
68
69
70
71
        if (this.aAllSugg.has(sSugg)) {
            return;
        }
        this.aAllSugg.add(sSugg);
        // jaro 0->1 1 les chaines sont égale
        let nDistJaro = 1 - str_transform.distanceJaroWinkler(this.sSimplifiedWord, str_transform.simplifyWord(sSugg));
        let nDist = Math.floor(nDistJaro * 10);
        if (nDist < this.nMinDist) {
            this.nMinDist = nDist;
        }
        if (nDistJaro < .11) {        // Best suggestions
            this.dBestSugg.set(sSugg, Math.round(nDistJaro*1000));
            if (this.dBestSugg.size > this.nBestSuggLimit) {
                this.nDistLimit = -1; // make suggest() to end search
            }
        } else if (nDistJaro < .33) { // Good suggestions
            this.dGoodSugg.set(sSugg, Math.round(nDistJaro*1000));
            if (this.dGoodSugg.size > this.nGoodSuggLimit) {
                this.nDistLimit = -1; // make suggest() to end search
            }



        }






        this.nDistLimit = Math.min(this.nDistLimit, this.nMinDist+1);

    }

    getSuggestions () {
        // return a list of suggestions
        let lRes = [];
        if (this.dBestSugg.size > 0) {
            // sort only with simplified words
340
341
342
343
344
345
346

347
348
349
350
351
352
353
        let nMaxJump = Math.max(Math.floor(sWord.length / 4), 1);
        let oSuggResult = new SuggResult(sWord, nSuggLimit);
        sWord = str_transform.cleanWord(sWord);
        if (bSplitTrailingNumbers) {
            this._splitTrailingNumbers(oSuggResult, sWord);
        }
        this._splitSuggest(oSuggResult, sWord);

        this._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump);
        let aSugg = oSuggResult.getSuggestions();
        if (this.lexicographer) {
            aSugg = this.lexicographer.filterSugg(aSugg);
        }
        if (sSfx || sPfx) {
            // we add what we removed







>







333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
        let nMaxJump = Math.max(Math.floor(sWord.length / 4), 1);
        let oSuggResult = new SuggResult(sWord, nSuggLimit);
        sWord = str_transform.cleanWord(sWord);
        if (bSplitTrailingNumbers) {
            this._splitTrailingNumbers(oSuggResult, sWord);
        }
        this._splitSuggest(oSuggResult, sWord);
        this._suggest(oSuggResult, sWord);
        this._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump);
        let aSugg = oSuggResult.getSuggestions();
        if (this.lexicographer) {
            aSugg = this.lexicographer.filterSugg(aSugg);
        }
        if (sSfx || sPfx) {
            // we add what we removed

Modified graphspell/ibdawg.py from [13d2327263] to [e27ae4ab79].

59
60
61
62
63
64
65


66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
    def addSugg (self, sSugg, nDeep=0):
        "add a suggestion"
        if sSugg in self.aAllSugg:
            return
        self.aAllSugg.add(sSugg)
        nDistJaro = 1 - st.distanceJaroWinkler(self.sSimplifiedWord, st.simplifyWord(sSugg))
        nDist = floor(nDistJaro * 10)


        #logging.info((nDeep * "  ") + "__" + sSugg + "__ " + str(round(nDistJaro*1000)))
        if nDistJaro < .11:     # Best suggestions
            self.dBestSugg[sSugg] = round(nDistJaro*1000)
            if len(self.dBestSugg) > self.nBestSuggLimit:
                self.nDistLimit = -1  # make suggest() to end search
        elif nDistJaro < .33:   # Good suggestions
            self.dGoodSugg[sSugg] = round(nDistJaro*1000)
            if len(self.dGoodSugg) > self.nGoodSuggLimit:
                self.nDistLimit = -1  # make suggest() to end search
        else:
            if nDist < self.nMinDist:
                self.nMinDist = nDist
            self.nDistLimit = min(self.nDistLimit, self.nMinDist)
        if nDist <= self.nDistLimit:
            if nDist < self.nMinDist:
                self.nMinDist = nDist
            self.nDistLimit = min(self.nDistLimit, self.nMinDist+1)

    def getSuggestions (self):
        "return a list of suggestions"
        # we sort the better results with the original word
        lRes = []
        if len(self.dBestSugg) > 0:
            # sort only with simplified words







>
>









<
<
<
<
<
<
<
|







59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76







77
78
79
80
81
82
83
84
    def addSugg (self, sSugg, nDeep=0):
        "add a suggestion"
        if sSugg in self.aAllSugg:
            return
        self.aAllSugg.add(sSugg)
        nDistJaro = 1 - st.distanceJaroWinkler(self.sSimplifiedWord, st.simplifyWord(sSugg))
        nDist = floor(nDistJaro * 10)
        if nDist < self.nMinDist:
            self.nMinDist = nDist
        #logging.info((nDeep * "  ") + "__" + sSugg + "__ " + str(round(nDistJaro*1000)))
        if nDistJaro < .11:     # Best suggestions
            self.dBestSugg[sSugg] = round(nDistJaro*1000)
            if len(self.dBestSugg) > self.nBestSuggLimit:
                self.nDistLimit = -1  # make suggest() to end search
        elif nDistJaro < .33:   # Good suggestions
            self.dGoodSugg[sSugg] = round(nDistJaro*1000)
            if len(self.dGoodSugg) > self.nGoodSuggLimit:
                self.nDistLimit = -1  # make suggest() to end search







        self.nDistLimit = min(self.nDistLimit, self.nMinDist+1)

    def getSuggestions (self):
        "return a list of suggestions"
        # we sort the better results with the original word
        lRes = []
        if len(self.dBestSugg) > 0:
            # sort only with simplified words
242
243
244
245
246
247
248

249
250
251
252
253
254
255
        nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
        nMaxJump = max(len(sWord) // 4, 1)
        oSuggResult = SuggResult(sWord, nSuggLimit)
        sWord = st.cleanWord(sWord)
        if bSplitTrailingNumbers:
            self._splitTrailingNumbers(oSuggResult, sWord)
        self._splitSuggest(oSuggResult, sWord)

        self._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump)
        aSugg = oSuggResult.getSuggestions()
        if self.lexicographer:
            aSugg = self.lexicographer.filterSugg(aSugg)
        if sSfx or sPfx:
            # we add what we removed
            return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))







>







237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
        nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
        nMaxJump = max(len(sWord) // 4, 1)
        oSuggResult = SuggResult(sWord, nSuggLimit)
        sWord = st.cleanWord(sWord)
        if bSplitTrailingNumbers:
            self._splitTrailingNumbers(oSuggResult, sWord)
        self._splitSuggest(oSuggResult, sWord)
        self._suggest(oSuggResult, sWord)
        self._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump)
        aSugg = oSuggResult.getSuggestions()
        if self.lexicographer:
            aSugg = self.lexicographer.filterSugg(aSugg)
        if sSfx or sPfx:
            # we add what we removed
            return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))