Overview
| Comment: | [core] suggestion engine: word simplification |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | core |
| Files: | files | file ages | folders |
| SHA3-256: |
fa01465ba8b5c6f925ff3794cf39b445 |
| User & Date: | olr on 2017-11-21 17:54:06 |
| Other Links: | manifest | tags |
Context
|
2017-11-22
| ||
| 14:57 | [core] ibdawg: bug fixed and code cleaning check-in: fbf59c7547 user: olr tags: trunk, core | |
|
2017-11-21
| ||
| 17:54 | [core] suggestion engine: word simplification check-in: fa01465ba8 user: olr tags: trunk, core | |
| 17:06 | [build] use Firefox Nightly 64 bits check-in: f424d4f1d9 user: olr tags: trunk, build | |
Changes
Modified gc_core/js/char_player.js from [210ed0c935] to [2d3bad14c9].
| ︙ | ︙ | |||
8 9 10 11 12 13 14 |
_dTransChars: new Map([
['à', 'a'], ['é', 'e'], ['î', 'i'], ['ô', 'o'], ['û', 'u'], ['ÿ', 'i'], ['y', 'i'],
['â', 'a'], ['è', 'e'], ['ï', 'i'], ['ö', 'o'], ['ù', 'u'], ['ŷ', 'i'],
['ä', 'a'], ['ê', 'e'], ['í', 'i'], ['ó', 'o'], ['ü', 'u'], ['ý', 'i'],
['á', 'a'], ['ë', 'e'], ['ì', 'i'], ['ò', 'o'], ['ú', 'u'], ['ỳ', 'i'],
['ā', 'a'], ['ē', 'e'], ['ī', 'i'], ['ō', 'o'], ['ū', 'u'], ['ȳ', 'i'],
| | | | 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
_dTransChars: new Map([
['à', 'a'], ['é', 'e'], ['î', 'i'], ['ô', 'o'], ['û', 'u'], ['ÿ', 'i'], ['y', 'i'],
['â', 'a'], ['è', 'e'], ['ï', 'i'], ['ö', 'o'], ['ù', 'u'], ['ŷ', 'i'],
['ä', 'a'], ['ê', 'e'], ['í', 'i'], ['ó', 'o'], ['ü', 'u'], ['ý', 'i'],
['á', 'a'], ['ë', 'e'], ['ì', 'i'], ['ò', 'o'], ['ú', 'u'], ['ỳ', 'i'],
['ā', 'a'], ['ē', 'e'], ['ī', 'i'], ['ō', 'o'], ['ū', 'u'], ['ȳ', 'i'],
['ñ', 'n'], ['k', 'q'], ['w', 'v'],
['œ', 'oe'], ['æ', 'ae'],
]),
cleanWord: function (sWord) {
// word simplication before calculating distance between words
sWord = sWord.toLowerCase();
let sNewWord = "";
let i = 1;
for (let c of sWord) {
let cNew = this._dTransChars.gl_get(c, c);
let cNext = sWord.slice(i, i+1)
if (cNew != this._dTransChars.gl_get(cNext, cNext)) {
sNewWord += cNew;
}
i++;
}
return sNewWord.replace("eau", "o").replace("au", "o").replace("ai", "e").replace("ei", "e").replace("ph", "f");
},
aVowel: new Set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ"),
aConsonant: new Set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ"),
aDouble: new Set("bcdfjklmnprstzBCDFJKLMNPRSTZ"), // letters that may be used twice successively
|
| ︙ | ︙ |
Modified gc_core/py/char_player.py from [7deb983466] to [a33649b94f].
1 2 3 4 5 6 7 8 9 10 11 12 |
# list of similar chars
# useful for suggestion mechanism
import re
_xTransChars = str.maketrans({
'à': 'a', 'é': 'e', 'î': 'i', 'ô': 'o', 'û': 'u', 'ÿ': 'i', "y": "i",
'â': 'a', 'è': 'e', 'ï': 'i', 'ö': 'o', 'ù': 'u', 'ŷ': 'i',
'ä': 'a', 'ê': 'e', 'í': 'i', 'ó': 'o', 'ü': 'u', 'ý': 'i',
'á': 'a', 'ë': 'e', 'ì': 'i', 'ò': 'o', 'ú': 'u', 'ỳ': 'i',
'ā': 'a', 'ē': 'e', 'ī': 'i', 'ō': 'o', 'ū': 'u', 'ȳ': 'i',
| | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
# list of similar chars
# useful for suggestion mechanism
import re
_xTransChars = str.maketrans({
'à': 'a', 'é': 'e', 'î': 'i', 'ô': 'o', 'û': 'u', 'ÿ': 'i', "y": "i",
'â': 'a', 'è': 'e', 'ï': 'i', 'ö': 'o', 'ù': 'u', 'ŷ': 'i',
'ä': 'a', 'ê': 'e', 'í': 'i', 'ó': 'o', 'ü': 'u', 'ý': 'i',
'á': 'a', 'ë': 'e', 'ì': 'i', 'ò': 'o', 'ú': 'u', 'ỳ': 'i',
'ā': 'a', 'ē': 'e', 'ī': 'i', 'ō': 'o', 'ū': 'u', 'ȳ': 'i',
'ñ': 'n', 'k': 'q', 'w': 'v',
'œ': 'oe', 'æ': 'ae',
})
def cleanWord (sWord):
"word simplication before calculating distance between words"
sWord = sWord.lower().translate(_xTransChars)
sNewWord = ""
for i, c in enumerate(sWord, 1):
if c != sWord[i:i+1]:
sNewWord += c
return sNewWord.replace("eau", "o").replace("au", "o").replace("ai", "e").replace("ei", "e").replace("ph", "f")
aVowel = set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ")
aConsonant = set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ")
aDouble = set("bcdfjklmnprstzBCDFJKLMNPRSTZ") # letters that may be used twice successively
|
| ︙ | ︙ |