Grammalecte  Check-in [848d878b75]

Overview
Comment:[graphspell] distance DamerauLevenshtein send a float instead of an integer
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | graphspell
Files: files | file ages | folders
SHA3-256: 848d878b75918b7844e54263b44f570e248223d299df7e1fd01beeb063bd25bf
User & Date: olr on 2020-09-11 07:00:30
Other Links: manifest | tags
Context
2020-09-11
07:05
[fx] gce_worker bug: remove useless code check-in: 169418beed user: olr tags: trunk, fx
07:00
[graphspell] distance DamerauLevenshtein send a float instead of an integer check-in: 848d878b75 user: olr tags: trunk, graphspell
2020-09-10
23:47
[fr] ajustements check-in: e1e8778897 user: olr tags: trunk, fr
Changes

Modified graphspell-js/ibdawg.js from [80e916efdb] to [69d7490b82].

35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
    addSugg (sSugg, nDeep=0) {
        // add a suggestion
        if (this.aAllSugg.has(sSugg)) {
            return;
        }
        this.aAllSugg.add(sSugg);
        if (!this.aSugg.has(sSugg)) {
            let nDist = str_transform.distanceDamerauLevenshtein(this.sSimplifiedWord, str_transform.simplifyWord(sSugg));
            if (nDist <= this.nDistLimit) {
                if (sSugg.includes(" ")) { // add 1 to distance for split suggestions
                    nDist += 1;
                }
                if (!this.dSugg.has(nDist)) {
                    this.dSugg.set(nDist, []);
                }







|







35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
    addSugg (sSugg, nDeep=0) {
        // add a suggestion
        if (this.aAllSugg.has(sSugg)) {
            return;
        }
        this.aAllSugg.add(sSugg);
        if (!this.aSugg.has(sSugg)) {
            let nDist = Math.floor(str_transform.distanceDamerauLevenshtein(this.sSimplifiedWord, str_transform.simplifyWord(sSugg)));
            if (nDist <= this.nDistLimit) {
                if (sSugg.includes(" ")) { // add 1 to distance for split suggestions
                    nDist += 1;
                }
                if (!this.dSugg.has(nDist)) {
                    this.dSugg.set(nDist, []);
                }

Modified graphspell-js/str_transform.js from [85944d11a1] to [8eb8f4c63c].

140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
                        matrix[i-1][j-1] + nCost    // Substitution
                    );
                    if (i > 1 && j > 1 && s1[i] == s2[j-1] && s1[i-1] == s2[j]) {
                        matrix[i][j] = Math.min(matrix[i][j], matrix[i-2][j-2] + nCost);  // Transposition
                    }
                }
            }
            return Math.floor(matrix[nLen1][nLen2]);
        }
        catch (e) {
            console.error(e);
        }
    },

    showDistance (s1, s2) {







|







140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
                        matrix[i-1][j-1] + nCost    // Substitution
                    );
                    if (i > 1 && j > 1 && s1[i] == s2[j-1] && s1[i-1] == s2[j]) {
                        matrix[i][j] = Math.min(matrix[i][j], matrix[i-2][j-2] + nCost);  // Transposition
                    }
                }
            }
            return matrix[nLen1][nLen2];
        }
        catch (e) {
            console.error(e);
        }
    },

    showDistance (s1, s2) {

Modified graphspell/ibdawg.py from [e07922ff50] to [d16ed0d683].

53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
        "add a suggestion"
        #logging.info((nDeep * "  ") + "__" + sSugg + "__")
        if sSugg in self.aAllSugg:
            return
        self.aAllSugg.add(sSugg)
        if sSugg not in self.aSugg:
            #nDist = min(st.distanceDamerauLevenshtein(self.sWord, sSugg), st.distanceDamerauLevenshtein(self.sSimplifiedWord, st.simplifyWord(sSugg)))
            nDist = st.distanceDamerauLevenshtein(self.sSimplifiedWord, st.simplifyWord(sSugg))
            #logging.info((nDeep * "  ") + "__" + sSugg + "__ :" + self.sSimplifiedWord +"|"+ st.simplifyWord(sSugg) +" -> "+ str(nDist))
            if nDist <= self.nDistLimit:
                if " " in sSugg:
                    nDist += 1
                if nDist not in self.dSugg:
                    self.dSugg[nDist] = []
                self.dSugg[nDist].append(sSugg)







|







53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
        "add a suggestion"
        #logging.info((nDeep * "  ") + "__" + sSugg + "__")
        if sSugg in self.aAllSugg:
            return
        self.aAllSugg.add(sSugg)
        if sSugg not in self.aSugg:
            #nDist = min(st.distanceDamerauLevenshtein(self.sWord, sSugg), st.distanceDamerauLevenshtein(self.sSimplifiedWord, st.simplifyWord(sSugg)))
            nDist = int(st.distanceDamerauLevenshtein(self.sSimplifiedWord, st.simplifyWord(sSugg)))
            #logging.info((nDeep * "  ") + "__" + sSugg + "__ :" + self.sSimplifiedWord +"|"+ st.simplifyWord(sSugg) +" -> "+ str(nDist))
            if nDist <= self.nDistLimit:
                if " " in sSugg:
                    nDist += 1
                if nDist not in self.dSugg:
                    self.dSugg[nDist] = []
                self.dSugg[nDist].append(sSugg)
77
78
79
80
81
82
83



84
85
86
87
88
89
90
        bFirstListSorted = False
        for nDist, lSugg in self.dSugg.items():
            if nDist > self.nDistLimit:
                break
            if not bFirstListSorted and len(lSugg) > 1:
                lSugg.sort(key=lambda sSugg: st.distanceDamerauLevenshtein(self.sWord, sSugg))
                bFirstListSorted = True



            lRes.extend(lSugg)
            if len(lRes) > nSuggLimit:
                break
        if self.sWord.isupper():
            lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg.upper(), lRes))) # use dict, when Python 3.6+
        elif self.sWord[0:1].isupper():
            # dont’ use <.istitle>







>
>
>







77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
        bFirstListSorted = False
        for nDist, lSugg in self.dSugg.items():
            if nDist > self.nDistLimit:
                break
            if not bFirstListSorted and len(lSugg) > 1:
                lSugg.sort(key=lambda sSugg: st.distanceDamerauLevenshtein(self.sWord, sSugg))
                bFirstListSorted = True
            #print(nDist, "|".join(lSugg))
            #for sSugg in lSugg:
            #    print(sSugg, st.distanceDamerauLevenshtein(self.sWord, sSugg))
            lRes.extend(lSugg)
            if len(lRes) > nSuggLimit:
                break
        if self.sWord.isupper():
            lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg.upper(), lRes))) # use dict, when Python 3.6+
        elif self.sWord[0:1].isupper():
            # dont’ use <.istitle>

Modified graphspell/str_transform.py from [c7bf16d7ce] to [fea83aad08].

98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
            d[i, j] = min(
                d[i-1, j]   + 1,        # Deletion
                d[i,   j-1] + 1,        # Insertion
                d[i-1, j-1] + nCost,    # Substitution
            )
            if i and j and s1[i] == s2[j-1] and s1[i-1] == s2[j]:
                d[i, j] = min(d[i, j], d[i-2, j-2] + nCost)     # Transposition
    return int(d[nLen1-1, nLen2-1])


def distanceSift4 (s1, s2, nMaxOffset=5):
    "implementation of general Sift4."
    # https://siderite.blogspot.com/2014/11/super-fast-and-accurate-string-distance.html
    if not s1:
        return len(s2)







|







98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
            d[i, j] = min(
                d[i-1, j]   + 1,        # Deletion
                d[i,   j-1] + 1,        # Insertion
                d[i-1, j-1] + nCost,    # Substitution
            )
            if i and j and s1[i] == s2[j-1] and s1[i-1] == s2[j]:
                d[i, j] = min(d[i, j], d[i-2, j-2] + nCost)     # Transposition
    return d[nLen1-1, nLen2-1]


def distanceSift4 (s1, s2, nMaxOffset=5):
    "implementation of general Sift4."
    # https://siderite.blogspot.com/2014/11/super-fast-and-accurate-string-distance.html
    if not s1:
        return len(s2)