Grammalecte  Diff

Differences From Artifact [8fb9ec33bf]:

To Artifact [d3e695233d]:


1




2
3
4
5
6
7
8
#!python3





import re


dReplTable = {
    # surnumerary_spaces
    "start_of_paragraph":          [("^[  ]+", "")],

>
>
>
>







1
2
3
4
5
6
7
8
9
10
11
12
#!python3

"""
Text formatter
"""

import re


dReplTable = {
    # surnumerary_spaces
    "start_of_paragraph":          [("^[  ]+", "")],
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
    "erase_non_breaking_hyphens":  [("­", "")],
    ## typographic signs
    "ts_apostrophe":          [ ("(?i)\\b([ldnjmtscç])['´‘′`](?=\\w)", "\\1’"),
                                ("(?i)(qu|jusqu|lorsqu|puisqu|quoiqu|quelqu|presqu|entr|aujourd|prud)['´‘′`]", "\\1’") ],
    "ts_ellipsis":            [ ("\\.\\.\\.", "…"),
                                ("(?<=…)[.][.]", "…"),
                                ("…[.](?![.])", "…") ],
    "ts_n_dash_middle":       [ (" [-—] ", " – "), 
                                (" [-—],", " –,") ],
    "ts_m_dash_middle":       [ (" [-–] ", " — "),
                                (" [-–],", " —,") ],
    "ts_n_dash_start":        [ ("^[-—][  ]", "– "),
                                ("^– ", "– "),
                                ("^[-–—](?=[\\w.…])", "– ") ],
    "ts_m_dash_start":        [ ("^[-–][  ]", "— "),







|







67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
    "erase_non_breaking_hyphens":  [("­", "")],
    ## typographic signs
    "ts_apostrophe":          [ ("(?i)\\b([ldnjmtscç])['´‘′`](?=\\w)", "\\1’"),
                                ("(?i)(qu|jusqu|lorsqu|puisqu|quoiqu|quelqu|presqu|entr|aujourd|prud)['´‘′`]", "\\1’") ],
    "ts_ellipsis":            [ ("\\.\\.\\.", "…"),
                                ("(?<=…)[.][.]", "…"),
                                ("…[.](?![.])", "…") ],
    "ts_n_dash_middle":       [ (" [-—] ", " – "),
                                (" [-—],", " –,") ],
    "ts_m_dash_middle":       [ (" [-–] ", " — "),
                                (" [-–],", " —,") ],
    "ts_n_dash_start":        [ ("^[-—][  ]", "– "),
                                ("^– ", "– "),
                                ("^[-–—](?=[\\w.…])", "– ") ],
    "ts_m_dash_start":        [ ("^[-–][  ]", "— "),