Overview
Comment: | [graphspell] distance DamerauLevenshtein send a float instead of an integer |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | graphspell |
Files: | files | file ages | folders |
SHA3-256: |
848d878b75918b7844e54263b44f570e |
User & Date: | olr on 2020-09-11 07:00:30 |
Other Links: | manifest | tags |
Context
2020-09-11
| ||
07:05 | [fx] gce_worker bug: remove useless code check-in: 169418beed user: olr tags: trunk, fx | |
07:00 | [graphspell] distance DamerauLevenshtein send a float instead of an integer check-in: 848d878b75 user: olr tags: trunk, graphspell | |
2020-09-10
| ||
23:47 | [fr] ajustements check-in: e1e8778897 user: olr tags: trunk, fr | |
Changes
Modified graphspell-js/ibdawg.js from [80e916efdb] to [69d7490b82].
︙ | ︙ | |||
35 36 37 38 39 40 41 | addSugg (sSugg, nDeep=0) { // add a suggestion if (this.aAllSugg.has(sSugg)) { return; } this.aAllSugg.add(sSugg); if (!this.aSugg.has(sSugg)) { | | | 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | addSugg (sSugg, nDeep=0) { // add a suggestion if (this.aAllSugg.has(sSugg)) { return; } this.aAllSugg.add(sSugg); if (!this.aSugg.has(sSugg)) { let nDist = Math.floor(str_transform.distanceDamerauLevenshtein(this.sSimplifiedWord, str_transform.simplifyWord(sSugg))); if (nDist <= this.nDistLimit) { if (sSugg.includes(" ")) { // add 1 to distance for split suggestions nDist += 1; } if (!this.dSugg.has(nDist)) { this.dSugg.set(nDist, []); } |
︙ | ︙ |
Modified graphspell-js/str_transform.js from [85944d11a1] to [8eb8f4c63c].
︙ | ︙ | |||
140 141 142 143 144 145 146 | matrix[i-1][j-1] + nCost // Substitution ); if (i > 1 && j > 1 && s1[i] == s2[j-1] && s1[i-1] == s2[j]) { matrix[i][j] = Math.min(matrix[i][j], matrix[i-2][j-2] + nCost); // Transposition } } } | | | 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | matrix[i-1][j-1] + nCost // Substitution ); if (i > 1 && j > 1 && s1[i] == s2[j-1] && s1[i-1] == s2[j]) { matrix[i][j] = Math.min(matrix[i][j], matrix[i-2][j-2] + nCost); // Transposition } } } return matrix[nLen1][nLen2]; } catch (e) { console.error(e); } }, showDistance (s1, s2) { |
︙ | ︙ |
Modified graphspell/ibdawg.py from [e07922ff50] to [d16ed0d683].
︙ | ︙ | |||
53 54 55 56 57 58 59 | "add a suggestion" #logging.info((nDeep * " ") + "__" + sSugg + "__") if sSugg in self.aAllSugg: return self.aAllSugg.add(sSugg) if sSugg not in self.aSugg: #nDist = min(st.distanceDamerauLevenshtein(self.sWord, sSugg), st.distanceDamerauLevenshtein(self.sSimplifiedWord, st.simplifyWord(sSugg))) | | | 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | "add a suggestion" #logging.info((nDeep * " ") + "__" + sSugg + "__") if sSugg in self.aAllSugg: return self.aAllSugg.add(sSugg) if sSugg not in self.aSugg: #nDist = min(st.distanceDamerauLevenshtein(self.sWord, sSugg), st.distanceDamerauLevenshtein(self.sSimplifiedWord, st.simplifyWord(sSugg))) nDist = int(st.distanceDamerauLevenshtein(self.sSimplifiedWord, st.simplifyWord(sSugg))) #logging.info((nDeep * " ") + "__" + sSugg + "__ :" + self.sSimplifiedWord +"|"+ st.simplifyWord(sSugg) +" -> "+ str(nDist)) if nDist <= self.nDistLimit: if " " in sSugg: nDist += 1 if nDist not in self.dSugg: self.dSugg[nDist] = [] self.dSugg[nDist].append(sSugg) |
︙ | ︙ | |||
77 78 79 80 81 82 83 84 85 86 87 88 89 90 | bFirstListSorted = False for nDist, lSugg in self.dSugg.items(): if nDist > self.nDistLimit: break if not bFirstListSorted and len(lSugg) > 1: lSugg.sort(key=lambda sSugg: st.distanceDamerauLevenshtein(self.sWord, sSugg)) bFirstListSorted = True lRes.extend(lSugg) if len(lRes) > nSuggLimit: break if self.sWord.isupper(): lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg.upper(), lRes))) # use dict, when Python 3.6+ elif self.sWord[0:1].isupper(): # dont’ use <.istitle> | > > > | 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | bFirstListSorted = False for nDist, lSugg in self.dSugg.items(): if nDist > self.nDistLimit: break if not bFirstListSorted and len(lSugg) > 1: lSugg.sort(key=lambda sSugg: st.distanceDamerauLevenshtein(self.sWord, sSugg)) bFirstListSorted = True #print(nDist, "|".join(lSugg)) #for sSugg in lSugg: # print(sSugg, st.distanceDamerauLevenshtein(self.sWord, sSugg)) lRes.extend(lSugg) if len(lRes) > nSuggLimit: break if self.sWord.isupper(): lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg.upper(), lRes))) # use dict, when Python 3.6+ elif self.sWord[0:1].isupper(): # dont’ use <.istitle> |
︙ | ︙ |
Modified graphspell/str_transform.py from [c7bf16d7ce] to [fea83aad08].
︙ | ︙ | |||
98 99 100 101 102 103 104 | d[i, j] = min( d[i-1, j] + 1, # Deletion d[i, j-1] + 1, # Insertion d[i-1, j-1] + nCost, # Substitution ) if i and j and s1[i] == s2[j-1] and s1[i-1] == s2[j]: d[i, j] = min(d[i, j], d[i-2, j-2] + nCost) # Transposition | | | 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | d[i, j] = min( d[i-1, j] + 1, # Deletion d[i, j-1] + 1, # Insertion d[i-1, j-1] + nCost, # Substitution ) if i and j and s1[i] == s2[j-1] and s1[i-1] == s2[j]: d[i, j] = min(d[i, j], d[i-2, j-2] + nCost) # Transposition return d[nLen1-1, nLen2-1] def distanceSift4 (s1, s2, nMaxOffset=5): "implementation of general Sift4." # https://siderite.blogspot.com/2014/11/super-fast-and-accurate-string-distance.html if not s1: return len(s2) |
︙ | ︙ |