60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
("(?i)([a-zàâäéèêëîïôöûüù]),(?=[0-9])", "\\1, ")],
"add_space_around_hyphens": [(" ([-–—])(?=[a-zàâäéèêëîïôöûüù\"«“'‘])", " \\1 "),
("(?<=[a-zàâäéèêëîïôöûüù\"»”'’])([-–—]) ", " \\1 ")],
"add_space_repair": [("DnT, ([wA])\\b", "DnT,\\1")],
## erase
"erase_non_breaking_hyphens": [("", "")],
## typographic signs
"ts_apostrophe": [ ("(?i)\\b([ldnjmtscç])['´‘′`](?=\\w)", "\\1’"),
("(?i)(qu|jusqu|lorsqu|puisqu|quoiqu|quelqu|presqu|entr|aujourd|prud)['´‘′`]", "\\1’") ],
"ts_ellipsis": [ ("\\.\\.\\.", "…"),
("(?<=…)[.][.]", "…"),
("…[.](?![.])", "…") ],
"ts_n_dash_middle": [ (" [-—] ", " – "),
(" [-—],", " –,") ],
"ts_m_dash_middle": [ (" [-–] ", " — "),
(" [-–],", " —,") ],
|
|
|
|
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
("(?i)([a-zàâäéèêëîïôöûüù]),(?=[0-9])", "\\1, ")],
"add_space_around_hyphens": [(" ([-–—])(?=[a-zàâäéèêëîïôöûüù\"«“'‘])", " \\1 "),
("(?<=[a-zàâäéèêëîïôöûüù\"»”'’])([-–—]) ", " \\1 ")],
"add_space_repair": [("DnT, ([wA])\\b", "DnT,\\1")],
## erase
"erase_non_breaking_hyphens": [("", "")],
## typographic signs
"ts_apostrophe": [ ("(?i)\\b([ldnjmtscç])['´‘′`ʼ](?=\\w)", "\\1’"),
("(?i)(qu|jusqu|lorsqu|puisqu|quoiqu|quelqu|presqu|entr|aujourd|prud)['´‘′`ʼ]", "\\1’") ],
"ts_ellipsis": [ ("\\.\\.\\.", "…"),
("(?<=…)[.][.]", "…"),
("…[.](?![.])", "…") ],
"ts_n_dash_middle": [ (" [-—] ", " – "),
(" [-—],", " –,") ],
"ts_m_dash_middle": [ (" [-–] ", " — "),
(" [-–],", " —,") ],
|