16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
|
r'(?P<WORD_ACRONYM>[A-Z][.][A-Z][.](?:[A-Z][.])*)',
r'(?P<LINK>(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
r'(?P<HASHTAG>[#@][\w-]+)',
r'(?P<HTML><\w+.*?>|</\w+ *>)',
r'(?P<PSEUDOHTML>\[/?\w+\])',
r'(?P<HOUR>\d\d?[h:]\d\d(?:[m:]\d\ds?|)\b)',
r'(?P<NUM>\d+(?:[.,]\d+))',
r'(?P<SIGN>[&%‰€$+±=*/<>⩾⩽#|×¥£§¢¬÷@-])',
r"(?P<WORD>(?:(?!_)[\w\u0300-\u036f])+(?:[’'`-](?:(?!_)[\w\u0300-\u036f])+)*)", # with combining diacritics
r"(?P<UNDERSCORE>_+)",
r"(?P<OTHER>\S)"
),
"fr":
(
r'(?P<FOLDERUNIX>/(?:bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:/[\w.()-]+)*)',
r'(?P<FOLDERWIN>[a-zA-Z]:\\(?:Program Files(?: [(]x86[)]|)|[\w.()]+)(?:\\[\w.()-]+)*)',
r'(?P<PUNC>[][,.;:!?…«»“”‘’"(){}·–—¿¡])',
r'(?P<WORD_ACRONYM>[A-Z][.][A-Z][.](?:[A-Z][.])*)',
r'(?P<LINK>(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
r'(?P<HASHTAG>[#@][\w-]+)',
r'(?P<HTML><\w+.*?>|</\w+ *>)',
r'(?P<PSEUDOHTML>\[/?\w+\])',
r"(?P<WORDELD>(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu|presqu|quelqu)['’ʼ‘‛´`′‵՚ꞌꞋ])",
r'(?P<WORDORD>\d+(?:ers?|res?|è[rm]es?|i[èe][mr]es?|de?s?|nde?s?|ès?|es?|ᵉʳˢ?|ʳᵉˢ?|ᵈᵉ?ˢ?|ⁿᵈᵉ?ˢ?|ᵉˢ?)\b)',
r'(?P<HOUR>\d\d?[h:]\d\d(?:[m:]\d\ds?|)\b)',
r'(?P<NUM>\d+(?:[.,]\d+|))',
r'(?P<SIGN>[&%‰€$+±=*/<>⩾⩽#|×¥£¢§¬÷@-])',
r"(?P<WORD>(?:(?!_)[\w\u0300-\u036f])+(?:[’'`-](?:(?!_)[\w\u0300-\u036f])+)*)", # with combining diacritics
r"(?P<UNDERSCORE>_+)",
r"(?P<SYMBOL>[\u2600-\u26ff\u2700-\u27bf\U0001f650-\U0001f67f\U0001f700-\U0001f77f\U0001f780-\U0001f7ff\U0001f800-\U0001f8ff])",
r"(?P<EMOJI>[\U0001f300-\U0001f5ff\U0001f600-\U0001f64f\U0001f680-\U0001f6ff\U0001f900-\U0001f9ff]+)",
r"(?P<OTHER>\S)"
)
}
|
|
>
>
|
|
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
|
r'(?P<WORD_ACRONYM>[A-Z][.][A-Z][.](?:[A-Z][.])*)',
r'(?P<LINK>(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
r'(?P<HASHTAG>[#@][\w-]+)',
r'(?P<HTML><\w+.*?>|</\w+ *>)',
r'(?P<PSEUDOHTML>\[/?\w+\])',
r'(?P<HOUR>\d\d?[h:]\d\d(?:[m:]\d\ds?|)\b)',
r'(?P<NUM>\d+(?:[.,]\d+))',
r'(?P<SIGN>[&%‰€$+±=*/<>⩾⩽#|×¥£§¢฿¬÷@-])',
r"(?P<WORD>(?:(?!_)[\w\u0300-\u036f])+(?:[’'`-](?:(?!_)[\w\u0300-\u036f])+)*)", # with combining diacritics
r"(?P<UNDERSCORE>_+)",
r"(?P<SYMBOL>[\u2600-\u26ff\u2700-\u27bf\U0001f650-\U0001f67f\U0001f700-\U0001f77f\U0001f780-\U0001f7ff\U0001f800-\U0001f8ff])",
r"(?P<EMOJI>[\U0001f300-\U0001f5ff\U0001f600-\U0001f64f\U0001f680-\U0001f6ff\U0001f900-\U0001f9ff]+)",
r"(?P<OTHER>\S)"
),
"fr":
(
r'(?P<FOLDERUNIX>/(?:bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:/[\w.()-]+)*)',
r'(?P<FOLDERWIN>[a-zA-Z]:\\(?:Program Files(?: [(]x86[)]|)|[\w.()]+)(?:\\[\w.()-]+)*)',
r'(?P<PUNC>[][,.;:!?…«»“”‘’"(){}·–—¿¡])',
r'(?P<WORD_ACRONYM>[A-Z][.][A-Z][.](?:[A-Z][.])*)',
r'(?P<LINK>(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
r'(?P<HASHTAG>[#@][\w-]+)',
r'(?P<HTML><\w+.*?>|</\w+ *>)',
r'(?P<PSEUDOHTML>\[/?\w+\])',
r"(?P<WORDELD>(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu|presqu|quelqu)['’ʼ‘‛´`′‵՚ꞌꞋ])",
r'(?P<WORDORD>\d+(?:ers?|res?|è[rm]es?|i[èe][mr]es?|de?s?|nde?s?|ès?|es?|ᵉʳˢ?|ʳᵉˢ?|ᵈᵉ?ˢ?|ⁿᵈᵉ?ˢ?|ᵉˢ?)\b)',
r'(?P<HOUR>\d\d?[h:]\d\d(?:[m:]\d\ds?|)\b)',
r'(?P<NUM>\d+(?:[.,]\d+|))',
r'(?P<SIGN>[&%‰€$+±=*/<>⩾⩽#|×¥£¢฿§¬÷@-])',
r"(?P<WORD>(?:(?!_)[\w\u0300-\u036f])+(?:[’'`-](?:(?!_)[\w\u0300-\u036f])+)*)", # with combining diacritics
r"(?P<UNDERSCORE>_+)",
r"(?P<SYMBOL>[\u2600-\u26ff\u2700-\u27bf\U0001f650-\U0001f67f\U0001f700-\U0001f77f\U0001f780-\U0001f7ff\U0001f800-\U0001f8ff])",
r"(?P<EMOJI>[\U0001f300-\U0001f5ff\U0001f600-\U0001f64f\U0001f680-\U0001f6ff\U0001f900-\U0001f9ff]+)",
r"(?P<OTHER>\S)"
)
}
|