Overview
| Comment: | [graphspell] distance DamerauLevenshtein send a float instead of an integer |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | graphspell |
| Files: | files | file ages | folders |
| SHA3-256: |
848d878b75918b7844e54263b44f570e |
| User & Date: | olr on 2020-09-11 07:00:30 |
| Other Links: | manifest | tags |
Context
|
2020-09-11
| ||
| 07:05 | [fx] gce_worker bug: remove useless code check-in: 169418beed user: olr tags: trunk, fx | |
| 07:00 | [graphspell] distance DamerauLevenshtein send a float instead of an integer check-in: 848d878b75 user: olr tags: trunk, graphspell | |
|
2020-09-10
| ||
| 23:47 | [fr] ajustements check-in: e1e8778897 user: olr tags: trunk, fr | |
Changes
Modified graphspell-js/ibdawg.js from [80e916efdb] to [69d7490b82].
| ︙ | ︙ | |||
35 36 37 38 39 40 41 |
addSugg (sSugg, nDeep=0) {
// add a suggestion
if (this.aAllSugg.has(sSugg)) {
return;
}
this.aAllSugg.add(sSugg);
if (!this.aSugg.has(sSugg)) {
| | | 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
addSugg (sSugg, nDeep=0) {
// add a suggestion
if (this.aAllSugg.has(sSugg)) {
return;
}
this.aAllSugg.add(sSugg);
if (!this.aSugg.has(sSugg)) {
let nDist = Math.floor(str_transform.distanceDamerauLevenshtein(this.sSimplifiedWord, str_transform.simplifyWord(sSugg)));
if (nDist <= this.nDistLimit) {
if (sSugg.includes(" ")) { // add 1 to distance for split suggestions
nDist += 1;
}
if (!this.dSugg.has(nDist)) {
this.dSugg.set(nDist, []);
}
|
| ︙ | ︙ |
Modified graphspell-js/str_transform.js from [85944d11a1] to [8eb8f4c63c].
| ︙ | ︙ | |||
140 141 142 143 144 145 146 |
matrix[i-1][j-1] + nCost // Substitution
);
if (i > 1 && j > 1 && s1[i] == s2[j-1] && s1[i-1] == s2[j]) {
matrix[i][j] = Math.min(matrix[i][j], matrix[i-2][j-2] + nCost); // Transposition
}
}
}
| | | 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
matrix[i-1][j-1] + nCost // Substitution
);
if (i > 1 && j > 1 && s1[i] == s2[j-1] && s1[i-1] == s2[j]) {
matrix[i][j] = Math.min(matrix[i][j], matrix[i-2][j-2] + nCost); // Transposition
}
}
}
return matrix[nLen1][nLen2];
}
catch (e) {
console.error(e);
}
},
showDistance (s1, s2) {
|
| ︙ | ︙ |
Modified graphspell/ibdawg.py from [e07922ff50] to [d16ed0d683].
| ︙ | ︙ | |||
53 54 55 56 57 58 59 |
"add a suggestion"
#logging.info((nDeep * " ") + "__" + sSugg + "__")
if sSugg in self.aAllSugg:
return
self.aAllSugg.add(sSugg)
if sSugg not in self.aSugg:
#nDist = min(st.distanceDamerauLevenshtein(self.sWord, sSugg), st.distanceDamerauLevenshtein(self.sSimplifiedWord, st.simplifyWord(sSugg)))
| | | 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
"add a suggestion"
#logging.info((nDeep * " ") + "__" + sSugg + "__")
if sSugg in self.aAllSugg:
return
self.aAllSugg.add(sSugg)
if sSugg not in self.aSugg:
#nDist = min(st.distanceDamerauLevenshtein(self.sWord, sSugg), st.distanceDamerauLevenshtein(self.sSimplifiedWord, st.simplifyWord(sSugg)))
nDist = int(st.distanceDamerauLevenshtein(self.sSimplifiedWord, st.simplifyWord(sSugg)))
#logging.info((nDeep * " ") + "__" + sSugg + "__ :" + self.sSimplifiedWord +"|"+ st.simplifyWord(sSugg) +" -> "+ str(nDist))
if nDist <= self.nDistLimit:
if " " in sSugg:
nDist += 1
if nDist not in self.dSugg:
self.dSugg[nDist] = []
self.dSugg[nDist].append(sSugg)
|
| ︙ | ︙ | |||
77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
bFirstListSorted = False
for nDist, lSugg in self.dSugg.items():
if nDist > self.nDistLimit:
break
if not bFirstListSorted and len(lSugg) > 1:
lSugg.sort(key=lambda sSugg: st.distanceDamerauLevenshtein(self.sWord, sSugg))
bFirstListSorted = True
lRes.extend(lSugg)
if len(lRes) > nSuggLimit:
break
if self.sWord.isupper():
lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg.upper(), lRes))) # use dict, when Python 3.6+
elif self.sWord[0:1].isupper():
# dont’ use <.istitle>
| > > > | 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
bFirstListSorted = False
for nDist, lSugg in self.dSugg.items():
if nDist > self.nDistLimit:
break
if not bFirstListSorted and len(lSugg) > 1:
lSugg.sort(key=lambda sSugg: st.distanceDamerauLevenshtein(self.sWord, sSugg))
bFirstListSorted = True
#print(nDist, "|".join(lSugg))
#for sSugg in lSugg:
# print(sSugg, st.distanceDamerauLevenshtein(self.sWord, sSugg))
lRes.extend(lSugg)
if len(lRes) > nSuggLimit:
break
if self.sWord.isupper():
lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg.upper(), lRes))) # use dict, when Python 3.6+
elif self.sWord[0:1].isupper():
# dont’ use <.istitle>
|
| ︙ | ︙ |
Modified graphspell/str_transform.py from [c7bf16d7ce] to [fea83aad08].
| ︙ | ︙ | |||
98 99 100 101 102 103 104 |
d[i, j] = min(
d[i-1, j] + 1, # Deletion
d[i, j-1] + 1, # Insertion
d[i-1, j-1] + nCost, # Substitution
)
if i and j and s1[i] == s2[j-1] and s1[i-1] == s2[j]:
d[i, j] = min(d[i, j], d[i-2, j-2] + nCost) # Transposition
| | | 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
d[i, j] = min(
d[i-1, j] + 1, # Deletion
d[i, j-1] + 1, # Insertion
d[i-1, j-1] + nCost, # Substitution
)
if i and j and s1[i] == s2[j-1] and s1[i-1] == s2[j]:
d[i, j] = min(d[i, j], d[i-2, j-2] + nCost) # Transposition
return d[nLen1-1, nLen2-1]
def distanceSift4 (s1, s2, nMaxOffset=5):
"implementation of general Sift4."
# https://siderite.blogspot.com/2014/11/super-fast-and-accurate-string-distance.html
if not s1:
return len(s2)
|
| ︙ | ︙ |