53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
|
+
+
+
+
|
[/[ ]+:/g, " :"] ],
"nnbsp_within_quotation_marks":[[/«([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ])/g, "« $1"],
[/«[ ]+/g, "« "],
[/([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ.!?])»/g, "$1 »"],
[/[ ]+»/g, " »"] ],
"nnbsp_within_numbers": [ [/(\d)[ ](\d)/g, "$1 $2"] ],
// common
"nbsp_titles": [ [/\bM(mes?|ᵐᵉˢ?|grs?|ᵍʳˢ?|lles?|ˡˡᵉˢ?|rs?|ʳˢ?|M[.]) /g, "M$1 "],
[/\bP(re?s?|ʳᵉ?ˢ?) /g, "P$1 "],
[/\bD(re?s?|ʳᵉ?ˢ?) /g, "D$1 "],
[/\bV(ves?|ᵛᵉˢ?) /g, "V$1 "] ],
"nbsp_before_symbol": [ [/(\d) ?([%‰€$£¥˚Ω℃])/g, "$1 $2"] ],
"nbsp_before_units": [ [/([0-9⁰¹²³⁴⁵⁶⁷⁸⁹]) ?([kcmµn]?(?:[slgJKΩ]|m[²³]?|Wh?|Hz|dB)|[%‰]|°C)\b/g, "$1 $2"] ],
"nbsp_repair": [ [/([\[(])[ ]([!?:;])/g, "$1$2"],
[/(https?|ftp)[ ]:\/\//g, "$1://"],
[/&([a-z]+)[ ];/g, "&$1;"],
[/&#([0-9]+|x[0-9a-fA-F]+)[ ];/g, "&#$1;"] ],
//// missing spaces
|
211
212
213
214
215
216
217
218
219
220
221
222
223
224
|
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
|
+
|
["within_quotation_marks", true],
["nbsp_before_punctuation", true],
["nbsp_within_quotation_marks", true],
["nbsp_within_numbers", true],
["nnbsp_before_punctuation", false],
["nnbsp_within_quotation_marks", false],
["nnbsp_within_numbers", false],
["nbsp_titles", false],
["nbsp_before_symbol", true],
["nbsp_before_units", true],
["nbsp_repair", true],
["add_space_after_punctuation", true],
["add_space_around_hyphens", true],
["add_space_repair", true],
["erase_non_breaking_hyphens", false],
|