Overview
| Comment: | [core] char_player: better word simplification > remove double letters |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | core |
| Files: | files | file ages | folders |
| SHA3-256: |
d6353b35f8fb8853bc446ad2554f64f5 |
| User & Date: | olr on 2017-11-16 01:17:48 |
| Other Links: | manifest | tags |
Context
|
2017-11-16
| ||
| 08:36 | [fr] phonet_simil: impact/impacte check-in: 0104769672 user: olr tags: trunk, fr | |
| 01:17 | [core] char_player: better word simplification > remove double letters check-in: d6353b35f8 user: olr tags: trunk, core | |
| 00:32 | [fr] pt: en gage de bonne foi check-in: e857da79dd user: olr tags: trunk, fr | |
Changes
Modified gc_core/js/char_player.js from [ea5bd62884] to [1665a043e0].
| ︙ | ︙ | |||
15 16 17 18 19 20 21 |
['ñ', 'n'],
['œ', 'oe'], ['æ', 'ae'],
]),
cleanWord: function (sWord) {
// word simplication before calculating distance between words
sWord = sWord.toLowerCase();
| > | | > > > | > > | | | 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
['ñ', 'n'],
['œ', 'oe'], ['æ', 'ae'],
]),
cleanWord: function (sWord) {
// word simplication before calculating distance between words
sWord = sWord.toLowerCase();
let sNewWord = "";
let i = 1;
for (let c of sWord) {
let cNew = this._dTransChars.gl_get(c, c);
let cNext = sWord.slice(i, i+1)
if (cNew != this._dTransChars.gl_get(cNext, cNext)) {
sNewWord += cNew;
}
i++;
}
return sNewWord.replace("eau", "o").replace("au", "o").replace("ai", "e").replace("ei", "e");
},
aVowel: new Set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ"),
aConsonant: new Set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ"),
aDouble: new Set("bcdfjklmnprstzBCDFJKLMNPRSTZ"), // letters that may be used twice successively
// Similar chars
d1to1: new Map([
["1", "liîLIÎ"],
["2", "zZ"],
|
| ︙ | ︙ |
Modified gc_core/py/char_player.py from [a88aa18178] to [a35943535f].
| ︙ | ︙ | |||
12 13 14 15 16 17 18 |
'ā': 'a', 'ē': 'e', 'ī': 'i', 'ō': 'o', 'ū': 'u', 'ȳ': 'y',
'ñ': 'n',
'œ': 'oe', 'æ': 'ae',
})
def cleanWord (sWord):
"word simplication before calculating distance between words"
| | > > > > > | | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
'ā': 'a', 'ē': 'e', 'ī': 'i', 'ō': 'o', 'ū': 'u', 'ȳ': 'y',
'ñ': 'n',
'œ': 'oe', 'æ': 'ae',
})
def cleanWord (sWord):
"word simplication before calculating distance between words"
sWord = sWord.lower().translate(_xTransChars)
sNewWord = ""
for i, c in enumerate(sWord, 1):
if c != sWord[i:i+1]:
sNewWord += c
return sNewWord.replace("eau", "o").replace("au", "o").replace("ai", "e").replace("ei", "e")
aVowel = set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ")
aConsonant = set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ")
aDouble = set("bcdfjklmnprstzBCDFJKLMNPRSTZ") # letters that may be used twice successively
# Similar chars
d1to1 = {
"1": "liîLIÎ",
"2": "zZ",
|
| ︙ | ︙ |