Grammalecte  Diff

Differences From Artifact [f190943db7]:

To Artifact [86523ac3fd]:


35
36
37
38
39
40
41

42
43
44
45
46
47
48
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49







+







                                    ("[  ]+:", " :")],
    "nnbsp_within_quotation_marks":[("«(?=\\w)", "« "),
                                    ("«[  ]+", "« "),
                                    ("(?<=[\\w.!?])»", " »"),
                                    ("[  ]+»", " »")],
    "nnbsp_within_numbers":        [("(\\d)[  ](\\d)", "\\1 \\2")],
    # common
    "nbsp_titles":                 [("M(mes?|ᵐᵉˢ?|grs?|ᵍʳˢ?|lles?|ˡˡᵉˢ?|rs?|ʳˢ?|M\\.) ", "M\\1 ")],
    "nbsp_before_symbol":          [("(\\d) ?([%‰€$£¥˚Ω℃])", "\\1 \\2")],
    "nbsp_before_units":           [("(?<=[0-9⁰¹²³⁴⁵⁶⁷⁸⁹]) ?([kcmµn]?(?:[slgJKΩ]|m[²³]?|Wh?|Hz|dB)|[%‰]|°C)\\b", " \\1")],
    "nbsp_repair":                 [("(?<=[[(])[   ]([!?:;])", "\\1"),
                                    ("(https?|ftp)[   ]:(?=//)", "\\1:"),
                                    ("&([a-z]+)[   ];", "&\\1;"),
                                    ("&#([0-9]+|x[0-9a-fA-F]+)[   ];", "&#\\1;")],
    ## missing spaces
192
193
194
195
196
197
198

199
200
201
202
203
204
205
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207







+







    ("within_quotation_marks", True),
    ("nbsp_before_punctuation", True),
    ("nbsp_within_quotation_marks", True),
    ("nbsp_within_numbers", True),
    ("nnbsp_before_punctuation", False),
    ("nnbsp_within_quotation_marks", False),
    ("nnbsp_within_numbers", False),
    ("nbsp_titles", False),
    ("nbsp_before_symbol", True),
    ("nbsp_before_units", True),
    ("nbsp_repair", True),
    ("add_space_after_punctuation", True),
    ("add_space_around_hyphens", True),
    ("add_space_repair", True),
    ("erase_non_breaking_hyphens", False),