Overview
| Comment: | [core] ibdawg: clean words before damerau-levenshtein comparison |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | core |
| Files: | files | file ages | folders |
| SHA3-256: |
1329ae8f1c84636f6974834cc2bb7383 |
| User & Date: | olr on 2017-10-25 11:37:33 |
| Other Links: | manifest | tags |
Context
|
2017-10-25
| ||
| 14:30 | [core][fr] ibdawg: char_player > phonème o check-in: 0ad1970e9c user: olr tags: trunk, fr, core | |
| 11:37 | [core] ibdawg: clean words before damerau-levenshtein comparison check-in: 1329ae8f1c user: olr tags: trunk, core | |
| 09:41 | [core] ibdawg: suggestion mechanism > split word function check-in: 388e8809cf user: olr tags: trunk, core | |
Changes
Modified gc_core/js/char_player.js from [c0ed55106f] to [9c8e1eeca8].
1 2 3 4 5 6 7 8 9 10 11 12 13 14 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | + + + + + + + + + + + + + + + + + + + + |
// list of similar chars
// useful for suggestion mechanism
${map}
var char_player = {
_dTransChars: new Map([
['à', 'a'], ['é', 'e'], ['î', 'i'], ['ô', 'o'], ['û', 'u'], ['ÿ', 'y'],
['â', 'a'], ['è', 'e'], ['ï', 'i'], ['ö', 'o'], ['ù', 'u'], ['ŷ', 'y'],
['ä', 'a'], ['ê', 'e'], ['í', 'i'], ['ó', 'o'], ['ü', 'u'], ['ý', 'y'],
['á', 'a'], ['ë', 'e'], ['ì', 'i'], ['ò', 'o'], ['ú', 'u'], ['ỳ', 'y'],
['ā', 'a'], ['ē', 'e'], ['ī', 'i'], ['ō', 'o'], ['ū', 'u'], ['ȳ', 'y'],
['ñ', 'n'],
['œ', 'oe'], ['æ', 'ae'],
]),
cleanWord: function (sWord) {
// word simplication before calculating distance between words
sWord = sWord.toLowerCase();
let sRes = "";
for (let c of sWord) {
sRes += this._dTransChars.gl_get(c, c);
}
return sWord;
},
distanceDamerauLevenshtein: function (s1, s2) {
// distance of Damerau-Levenshtein between <s1> and <s2>
// https://fr.wikipedia.org/wiki/Distance_de_Damerau-Levenshtein
try {
let nLen1 = s1.length;
let nLen2 = s2.length;
|
| ︙ | |||
52 53 54 55 56 57 58 | 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | - + |
'â', 'è', 'ï', 'ö', 'ù', 'ŷ',
'ä', 'ê', 'í', 'ó', 'ü', 'ý',
'á', 'ë', 'ì', 'ò', 'ú', 'ỳ',
'ā', 'ē', 'ī', 'ō', 'ū', 'ȳ',
'h', 'œ', 'æ'
]),
|
| ︙ |
Modified gc_core/js/ibdawg.js from [c871817c8f] to [82209aec2c].
| ︙ | |||
199 200 201 202 203 204 205 | 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 | - + + - + |
if (sWord.gl_isTitle()) {
aSugg.gl_update(this._suggest(sWord.toLowerCase(), nMaxDel, nMaxHardRepl));
}
else if (sWord.gl_isLowerCase()) {
aSugg.gl_update(this._suggest(sWord.gl_toCapitalize(), nMaxDel, nMaxHardRepl));
}
if (aSugg.size == 0) {
|
| ︙ |
Modified gc_core/py/char_player.py from [2ac4c0eb20] to [83d2f45b7c].
1 2 3 4 5 6 7 8 9 10 11 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | + + + + + + + + + + + + + + + |
# list of similar chars
# useful for suggestion mechanism
import re
_xTransChars = str.maketrans({
'à': 'a', 'é': 'e', 'î': 'i', 'ô': 'o', 'û': 'u', 'ÿ': 'y',
'â': 'a', 'è': 'e', 'ï': 'i', 'ö': 'o', 'ù': 'u', 'ŷ': 'y',
'ä': 'a', 'ê': 'e', 'í': 'i', 'ó': 'o', 'ü': 'u', 'ý': 'y',
'á': 'a', 'ë': 'e', 'ì': 'i', 'ò': 'o', 'ú': 'u', 'ỳ': 'y',
'ā': 'a', 'ē': 'e', 'ī': 'i', 'ō': 'o', 'ū': 'u', 'ȳ': 'y',
'ñ': 'n',
'œ': 'oe', 'æ': 'ae',
})
def cleanWord (sWord):
"word simplication before calculating distance between words"
return sWord.lower().translate(_xTransChars)
def distanceDamerauLevenshtein (s1, s2):
"distance of Damerau-Levenshtein between <s1> and <s2>"
# https://fr.wikipedia.org/wiki/Distance_de_Damerau-Levenshtein
d = {}
nLen1 = len(s1)
nLen2 = len(s2)
|
| ︙ | |||
41 42 43 44 45 46 47 | 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | - + | _xTransVovels = str.maketrans(_dVovels) aVovels = frozenset(_dVovels.keys()) |
| ︙ |
Modified gc_core/py/ibdawg.py from [e132c3a736] to [f563ae7bdb].
| ︙ | |||
196 197 198 199 200 201 202 | 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 | - + + - + |
if sWord.istitle():
aSugg.update(self._suggest(sWord.lower(), nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl))
aSugg = set(map(lambda sSugg: sSugg.title(), aSugg))
elif sWord.islower():
aSugg.update(self._suggest(sWord.title(), nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl))
if not aSugg:
#print("crush useless chars")
|
| ︙ |