Overview
| Comment: | [graphspell] tokenizer update |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | graphspell |
| Files: | files | file ages | folders |
| SHA3-256: |
9678e9208c95234de0e80d32e0e8af93 |
| User & Date: | olr on 2020-12-02 07:57:11 |
| Other Links: | manifest | tags |
Context
|
2020-12-02
| ||
| 10:12 | [core][fr] improve suggestions mechanism for imperative forms check-in: ded60afd12 user: olr tags: trunk, fr, core | |
| 07:57 | [graphspell] tokenizer update check-in: 9678e9208c user: olr tags: trunk, graphspell | |
| 07:56 | [misc] sublime text syntax update check-in: 02626b166a user: olr tags: trunk, misc | |
Changes
Modified graphspell-js/tokenizer.js from [8e6d24c94a] to [5dde8e1191].
| ︙ | ︙ | |||
20 21 22 23 24 25 26 |
[/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
[/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]+/, 'TAG'],
[/^<[a-zA-Z]+.*?>|^<\/[a-zA-Z]+ *>/, 'HTML'],
[/^\[\/?[a-zA-Z]+\]/, 'PSEUDOHTML'],
[/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
[/^\d\d?[h:]\d\d(?:[m:]\d\ds?|)\b/, 'HOUR'],
[/^\d+(?:[.,]\d+|)/, 'NUM'],
| | > > | | 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
[/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
[/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]+/, 'TAG'],
[/^<[a-zA-Z]+.*?>|^<\/[a-zA-Z]+ *>/, 'HTML'],
[/^\[\/?[a-zA-Z]+\]/, 'PSEUDOHTML'],
[/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
[/^\d\d?[h:]\d\d(?:[m:]\d\ds?|)\b/, 'HOUR'],
[/^\d+(?:[.,]\d+|)/, 'NUM'],
[/^[&%‰€$+±=*/<>⩾⩽#|×¥£§¢฿¬÷@-]/, 'SIGN'],
[/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-stᴀ-ᶿ\u0300-\u036fᵉʳˢⁿᵈ]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-stᴀ-ᶿ\u0300-\u036fᵉʳˢⁿᵈ]+)*/, 'WORD'],
[/^_+/, 'UNDERSCORE'],
[/^[\u2600-\u26ff\u2700-\u27bf\u1f650-\u1f67f\u1f700-\u1f77f\u1f780-\u1f7ff\u1f800-\u1f8ff]/, 'SYMBOL'],
[/^[\u1f300-\u1f5ff\u1f600-\u1f64f\u1f680-\u1f6ff\u1f900-\u1f9ff]+/u, "EMOJI"],
[/^\S/, 'OTHER']
],
"fr":
[
[/^[ \t]+/, 'SPACE'],
[/^\/(?:~|bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_.()-]+)*/, 'FOLDERUNIX'],
[/^[a-zA-Z]:\\(?:Program Files(?: \(x86\)|)|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st.()]+)(?:\\[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_.()-]+)*/, 'FOLDERWIN'],
[/^[,.;:!?…«»“”‘’"(){}\[\]·–—¿¡]/, 'PUNC'],
[/^[A-Z][.][A-Z][.](?:[A-Z][.])*/, 'WORD_ACRONYM'],
[/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
[/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]+/, 'TAG'],
[/^<[a-zA-Z]+.*?>|^<\/[a-zA-Z]+ *>/, 'HTML'],
[/^\[\/?[a-zA-Z]+\]/, 'PSEUDOHTML'],
[/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
[/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu|presqu|quelqu)['’ʼ‘‛´`′‵՚ꞌꞋ]/i, 'WORDELD'],
[/^\d\d?[h:]\d\d(?:[m:]\d\ds?|)\b/, 'HOUR'],
[/^\d+(?:ers?\b|res?\b|è[rm]es?\b|i[èe][mr]es?\b|de?s?\b|nde?s?\b|ès?\b|es?\b|ᵉʳˢ?|ʳᵉˢ?|ᵈᵉ?ˢ?|ⁿᵈᵉ?ˢ?|ᵉˢ?)/, 'WORDORD'],
[/^\d+(?:[.,]\d+|)/, 'NUM'],
[/^[&%‰€$+±=*/<>⩾⩽#|×¥£§¢฿¬÷@-]/, 'SIGN'],
[/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-stᴀ-ᶿ\u0300-\u036fᵉʳˢⁿᵈ]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-stᴀ-ᶿ\u0300-\u036fᵉʳˢⁿᵈ]+)*/, 'WORD'],
[/^_+/, 'UNDERSCORE'],
[/^[\u2600-\u26ff\u2700-\u27bf\u1f650-\u1f67f\u1f700-\u1f77f\u1f780-\u1f7ff\u1f800-\u1f8ff]/, 'SYMBOL'],
[/^[\u1f300-\u1f5ff\u1f600-\u1f64f\u1f680-\u1f6ff\u1f900-\u1f9ff]+/u, "EMOJI"],
[/^\S/u, 'OTHER'],
]
};
|
| ︙ | ︙ |
Modified graphspell/tokenizer.py from [1e4c6dee79] to [f44ddea977].
| ︙ | ︙ | |||
16 17 18 19 20 21 22 |
r'(?P<WORD_ACRONYM>[A-Z][.][A-Z][.](?:[A-Z][.])*)',
r'(?P<LINK>(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
r'(?P<HASHTAG>[#@][\w-]+)',
r'(?P<HTML><\w+.*?>|</\w+ *>)',
r'(?P<PSEUDOHTML>\[/?\w+\])',
r'(?P<HOUR>\d\d?[h:]\d\d(?:[m:]\d\ds?|)\b)',
r'(?P<NUM>\d+(?:[.,]\d+))',
| | > > | | 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
r'(?P<WORD_ACRONYM>[A-Z][.][A-Z][.](?:[A-Z][.])*)',
r'(?P<LINK>(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
r'(?P<HASHTAG>[#@][\w-]+)',
r'(?P<HTML><\w+.*?>|</\w+ *>)',
r'(?P<PSEUDOHTML>\[/?\w+\])',
r'(?P<HOUR>\d\d?[h:]\d\d(?:[m:]\d\ds?|)\b)',
r'(?P<NUM>\d+(?:[.,]\d+))',
r'(?P<SIGN>[&%‰€$+±=*/<>⩾⩽#|×¥£§¢฿¬÷@-])',
r"(?P<WORD>(?:(?!_)[\w\u0300-\u036f])+(?:[’'`-](?:(?!_)[\w\u0300-\u036f])+)*)", # with combining diacritics
r"(?P<UNDERSCORE>_+)",
r"(?P<SYMBOL>[\u2600-\u26ff\u2700-\u27bf\U0001f650-\U0001f67f\U0001f700-\U0001f77f\U0001f780-\U0001f7ff\U0001f800-\U0001f8ff])",
r"(?P<EMOJI>[\U0001f300-\U0001f5ff\U0001f600-\U0001f64f\U0001f680-\U0001f6ff\U0001f900-\U0001f9ff]+)",
r"(?P<OTHER>\S)"
),
"fr":
(
r'(?P<FOLDERUNIX>/(?:bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:/[\w.()-]+)*)',
r'(?P<FOLDERWIN>[a-zA-Z]:\\(?:Program Files(?: [(]x86[)]|)|[\w.()]+)(?:\\[\w.()-]+)*)',
r'(?P<PUNC>[][,.;:!?…«»“”‘’"(){}·–—¿¡])',
r'(?P<WORD_ACRONYM>[A-Z][.][A-Z][.](?:[A-Z][.])*)',
r'(?P<LINK>(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
r'(?P<HASHTAG>[#@][\w-]+)',
r'(?P<HTML><\w+.*?>|</\w+ *>)',
r'(?P<PSEUDOHTML>\[/?\w+\])',
r"(?P<WORDELD>(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu|presqu|quelqu)['’ʼ‘‛´`′‵՚ꞌꞋ])",
r'(?P<WORDORD>\d+(?:ers?|res?|è[rm]es?|i[èe][mr]es?|de?s?|nde?s?|ès?|es?|ᵉʳˢ?|ʳᵉˢ?|ᵈᵉ?ˢ?|ⁿᵈᵉ?ˢ?|ᵉˢ?)\b)',
r'(?P<HOUR>\d\d?[h:]\d\d(?:[m:]\d\ds?|)\b)',
r'(?P<NUM>\d+(?:[.,]\d+|))',
r'(?P<SIGN>[&%‰€$+±=*/<>⩾⩽#|×¥£¢฿§¬÷@-])',
r"(?P<WORD>(?:(?!_)[\w\u0300-\u036f])+(?:[’'`-](?:(?!_)[\w\u0300-\u036f])+)*)", # with combining diacritics
r"(?P<UNDERSCORE>_+)",
r"(?P<SYMBOL>[\u2600-\u26ff\u2700-\u27bf\U0001f650-\U0001f67f\U0001f700-\U0001f77f\U0001f780-\U0001f7ff\U0001f800-\U0001f8ff])",
r"(?P<EMOJI>[\U0001f300-\U0001f5ff\U0001f600-\U0001f64f\U0001f680-\U0001f6ff\U0001f900-\U0001f9ff]+)",
r"(?P<OTHER>\S)"
)
}
|
| ︙ | ︙ |