Grammalecte  Diff

Differences From Artifact [f190943db7]:

To Artifact [8fb9ec33bf]:


35
36
37
38
39
40
41




42
43
44
45
46
47
48
                                    ("[  ]+:", " :")],
    "nnbsp_within_quotation_marks":[("«(?=\\w)", "« "),
                                    ("«[  ]+", "« "),
                                    ("(?<=[\\w.!?])»", " »"),
                                    ("[  ]+»", " »")],
    "nnbsp_within_numbers":        [("(\\d)[  ](\\d)", "\\1 \\2")],
    # common




    "nbsp_before_symbol":          [("(\\d) ?([%‰€$£¥˚Ω℃])", "\\1 \\2")],
    "nbsp_before_units":           [("(?<=[0-9⁰¹²³⁴⁵⁶⁷⁸⁹]) ?([kcmµn]?(?:[slgJKΩ]|m[²³]?|Wh?|Hz|dB)|[%‰]|°C)\\b", " \\1")],
    "nbsp_repair":                 [("(?<=[[(])[   ]([!?:;])", "\\1"),
                                    ("(https?|ftp)[   ]:(?=//)", "\\1:"),
                                    ("&([a-z]+)[   ];", "&\\1;"),
                                    ("&#([0-9]+|x[0-9a-fA-F]+)[   ];", "&#\\1;")],
    ## missing spaces







>
>
>
>







35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
                                    ("[  ]+:", " :")],
    "nnbsp_within_quotation_marks":[("«(?=\\w)", "« "),
                                    ("«[  ]+", "« "),
                                    ("(?<=[\\w.!?])»", " »"),
                                    ("[  ]+»", " »")],
    "nnbsp_within_numbers":        [("(\\d)[  ](\\d)", "\\1 \\2")],
    # common
    "nbsp_titles":                 [("\\bM(mes?|ᵐᵉˢ?|grs?|ᵍʳˢ?|lles?|ˡˡᵉˢ?|rs?|ʳˢ?|M\\.) ", "M\\1 "),
                                    ("\\bP(re?s?|ʳᵉ?ˢ?) ", "P\\1 "),
                                    ("\\bD(re?s?|ʳᵉ?ˢ?) ", "D\\1 "),
                                    ("\\bV(ves?|ᵛᵉˢ?) ", "V\\1 ")],
    "nbsp_before_symbol":          [("(\\d) ?([%‰€$£¥˚Ω℃])", "\\1 \\2")],
    "nbsp_before_units":           [("(?<=[0-9⁰¹²³⁴⁵⁶⁷⁸⁹]) ?([kcmµn]?(?:[slgJKΩ]|m[²³]?|Wh?|Hz|dB)|[%‰]|°C)\\b", " \\1")],
    "nbsp_repair":                 [("(?<=[[(])[   ]([!?:;])", "\\1"),
                                    ("(https?|ftp)[   ]:(?=//)", "\\1:"),
                                    ("&([a-z]+)[   ];", "&\\1;"),
                                    ("&#([0-9]+|x[0-9a-fA-F]+)[   ];", "&#\\1;")],
    ## missing spaces
192
193
194
195
196
197
198

199
200
201
202
203
204
205
    ("within_quotation_marks", True),
    ("nbsp_before_punctuation", True),
    ("nbsp_within_quotation_marks", True),
    ("nbsp_within_numbers", True),
    ("nnbsp_before_punctuation", False),
    ("nnbsp_within_quotation_marks", False),
    ("nnbsp_within_numbers", False),

    ("nbsp_before_symbol", True),
    ("nbsp_before_units", True),
    ("nbsp_repair", True),
    ("add_space_after_punctuation", True),
    ("add_space_around_hyphens", True),
    ("add_space_repair", True),
    ("erase_non_breaking_hyphens", False),







>







196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
    ("within_quotation_marks", True),
    ("nbsp_before_punctuation", True),
    ("nbsp_within_quotation_marks", True),
    ("nbsp_within_numbers", True),
    ("nnbsp_before_punctuation", False),
    ("nnbsp_within_quotation_marks", False),
    ("nnbsp_within_numbers", False),
    ("nbsp_titles", False),
    ("nbsp_before_symbol", True),
    ("nbsp_before_units", True),
    ("nbsp_repair", True),
    ("add_space_after_punctuation", True),
    ("add_space_around_hyphens", True),
    ("add_space_repair", True),
    ("erase_non_breaking_hyphens", False),