Overview
| Comment: | [graphspell] tokenizer and suggestion engine: other apostrophes |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | graphspell |
| Files: | files | file ages | folders |
| SHA3-256: |
b68161b39849bb8fe242ed6d3c766a38 |
| User & Date: | olr on 2020-05-07 10:35:13 |
| Other Links: | manifest | tags |
Context
|
2020-05-07
| ||
| 11:33 | [fr] ajustements, +nr: iel·s +état +ppas check-in: f117d9d93a user: olr tags: trunk, fr | |
| 10:35 | [graphspell] tokenizer and suggestion engine: other apostrophes check-in: b68161b398 user: olr tags: trunk, graphspell | |
| 10:34 | [fr] ajustements: apostrophes check-in: c379fb8706 user: olr tags: trunk, fr | |
Changes
Modified graphspell-js/char_player.js from [76dbb40bcc] to [3caadd8250].
| ︙ | ︙ | |||
105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
aConsonant: new Set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ"),
aDouble: new Set("bcdfjklmnprstzBCDFJKLMNPRSTZ"), // letters that may be used twice successively
// Similar chars
d1to1: new Map([
["1", "1₁liîLIÎ"],
["2", "2₂zZ"],
["3", "3₃eéèêEÉÈÊ"],
["4", "4₄aàâAÀÂ"],
["5", "5₅sgSG"],
["6", "6₆bdgBDG"],
["7", "7₇ltLT"],
| > > > > > > > > > > > > > | 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
aConsonant: new Set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ"),
aDouble: new Set("bcdfjklmnprstzBCDFJKLMNPRSTZ"), // letters that may be used twice successively
// Similar chars
d1to1: new Map([
["'", "'’"], // U+0027: apostrophe droite
["’", "’"], // U+2019: apostrophe typographique (sera utilisée par défaut)
["ʼ", "ʼ’"], // U+02BC: Lettre modificative apostrophe
["‘", "‘’"], // U+2018: guillemet-apostrophe culbuté
["‛", "‛’"], // U+201B: guillemet-virgule supérieur culbuté
["´", "´’"], // U+00B4: accent aigu
["`", "`’"], // U+0060: accent grave
["′", "′’"], // U+2032: prime
["‵", "‵’"], // U+2035: prime réfléchi
["՚", "՚’"], // U+055A: apostrophe arménienne
["ꞌ", "ꞌ’"], // U+A78C: latin minuscule saltillo
["Ꞌ", "Ꞌ’"], // U+A78B: latin majuscule saltillo
["1", "1₁liîLIÎ"],
["2", "2₂zZ"],
["3", "3₃eéèêEÉÈÊ"],
["4", "4₄aàâAÀÂ"],
["5", "5₅sgSG"],
["6", "6₆bdgBDG"],
["7", "7₇ltLT"],
|
| ︙ | ︙ |
Modified graphspell-js/tokenizer.js from [efabea9cdf] to [0e7b889227].
| ︙ | ︙ | |||
35 36 37 38 39 40 41 |
[/^[,.;:!?…«»“”‘’"(){}\[\]·–—¿¡]/, 'PUNC'],
[/^[A-Z][.][A-Z][.](?:[A-Z][.])*/, 'WORD_ACRONYM'],
[/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
[/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]+/, 'TAG'],
[/^<[a-zA-Z]+.*?>|^<\/[a-zA-Z]+ *>/, 'HTML'],
[/^\[\/?[a-zA-Z]+\]/, 'PSEUDOHTML'],
[/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
| | | 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
[/^[,.;:!?…«»“”‘’"(){}\[\]·–—¿¡]/, 'PUNC'],
[/^[A-Z][.][A-Z][.](?:[A-Z][.])*/, 'WORD_ACRONYM'],
[/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
[/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]+/, 'TAG'],
[/^<[a-zA-Z]+.*?>|^<\/[a-zA-Z]+ *>/, 'HTML'],
[/^\[\/?[a-zA-Z]+\]/, 'PSEUDOHTML'],
[/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
[/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu|presqu|quelqu)['’ʼ‘‛´`′‵՚ꞌꞋ]/i, 'WORD_ELIDED'],
[/^\d\d?[h:]\d\d(?:[m:]\d\ds?|)\b/, 'HOUR'],
[/^\d+(?:ers?\b|res?\b|è[rm]es?\b|i[èe][mr]es?\b|de?s?\b|nde?s?\b|ès?\b|es?\b|ᵉʳˢ?|ʳᵉˢ?|ᵈᵉ?ˢ?|ⁿᵈᵉ?ˢ?|ᵉˢ?)/, 'WORD_ORDINAL'],
[/^\d+(?:[.,]\d+|)/, 'NUM'],
[/^[&%‰€$+±=*/<>⩾⩽#|×¥£§¢¬÷@-]/, 'SIGN'],
[/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-stᴀ-ᶿ\u0300-\u036fᵉʳˢⁿᵈ_]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-stᴀ-ᶿ\u0300-\u036fᵉʳˢⁿᵈ_]+)*/, 'WORD']
]
};
|
| ︙ | ︙ |
Modified graphspell/char_player.py from [fff304a6c5] to [fa338bf2f3].
| ︙ | ︙ | |||
91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
aConsonant = set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ")
aDouble = set("bcdfjklmnprstzBCDFJKLMNPRSTZ") # letters that may be used twice successively
# Similar chars
d1to1 = {
"1": "1₁liîLIÎ",
"2": "2₂zZ",
"3": "3₃eéèêEÉÈÊ",
"4": "4₄aàâAÀÂ",
"5": "5₅sgSG",
"6": "6₆bdgBDG",
"7": "7₇ltLT",
| > > > > > > > > > > > > > | 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
aConsonant = set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ")
aDouble = set("bcdfjklmnprstzBCDFJKLMNPRSTZ") # letters that may be used twice successively
# Similar chars
d1to1 = {
"'": "'’", # U+0027: apostrophe droite
"’": "’", # U+2019: apostrophe typographique (sera utilisée par défaut)
"ʼ": "ʼ’", # U+02BC: Lettre modificative apostrophe
"‘": "‘’", # U+2018: guillemet-apostrophe culbuté
"‛": "‛’", # U+201B: guillemet-virgule supérieur culbuté
"´": "´’", # U+00B4: accent aigu
"`": "`’", # U+0060: accent grave
"′": "′’", # U+2032: prime
"‵": "‵’", # U+2035: prime réfléchi
"՚": "՚’", # U+055A: apostrophe arménienne
"ꞌ": "ꞌ’", # U+A78C: latin minuscule saltillo
"Ꞌ": "Ꞌ’", # U+A78B: latin majuscule saltillo
"1": "1₁liîLIÎ",
"2": "2₂zZ",
"3": "3₃eéèêEÉÈÊ",
"4": "4₄aàâAÀÂ",
"5": "5₅sgSG",
"6": "6₆bdgBDG",
"7": "7₇ltLT",
|
| ︙ | ︙ |
Modified graphspell/tokenizer.py from [81da836011] to [b7228e1a86].
| ︙ | ︙ | |||
29 30 31 32 33 34 35 |
r'(?P<FOLDERWIN>[a-zA-Z]:\\(?:Program Files(?: [(]x86[)]|)|[\w.()]+)(?:\\[\w.()-]+)*)',
r'(?P<PUNC>[][,.;:!?…«»“”‘’"(){}·–—¿¡])',
r'(?P<WORD_ACRONYM>[A-Z][.][A-Z][.](?:[A-Z][.])*)',
r'(?P<LINK>(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
r'(?P<HASHTAG>[#@][\w-]+)',
r'(?P<HTML><\w+.*?>|</\w+ *>)',
r'(?P<PSEUDOHTML>\[/?\w+\])',
| | | 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
r'(?P<FOLDERWIN>[a-zA-Z]:\\(?:Program Files(?: [(]x86[)]|)|[\w.()]+)(?:\\[\w.()-]+)*)',
r'(?P<PUNC>[][,.;:!?…«»“”‘’"(){}·–—¿¡])',
r'(?P<WORD_ACRONYM>[A-Z][.][A-Z][.](?:[A-Z][.])*)',
r'(?P<LINK>(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
r'(?P<HASHTAG>[#@][\w-]+)',
r'(?P<HTML><\w+.*?>|</\w+ *>)',
r'(?P<PSEUDOHTML>\[/?\w+\])',
r"(?P<WORD_ELIDED>(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu|presqu|quelqu)['’ʼ‘‛´`′‵՚ꞌꞋ])",
r'(?P<WORD_ORDINAL>\d+(?:ers?|res?|è[rm]es?|i[èe][mr]es?|de?s?|nde?s?|ès?|es?|ᵉʳˢ?|ʳᵉˢ?|ᵈᵉ?ˢ?|ⁿᵈᵉ?ˢ?|ᵉˢ?)\b)',
r'(?P<HOUR>\d\d?[h:]\d\d(?:[m:]\d\ds?|)\b)',
r'(?P<NUM>\d+(?:[.,]\d+|))',
r'(?P<SIGN>[&%‰€$+±=*/<>⩾⩽#|×¥£¢§¬÷@-])',
r"(?P<WORD>[\w\u0300-\u036f]+(?:[’'`-][\w\u0300-\u036f]+)*)"
)
}
|
| ︙ | ︙ |