60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
|
"nbsp_repair": [ [/([\[(])[ ]([!?:;])/g, "$1$2"],
[/(https?|ftp)[ ]:\/\//g, "$1://"],
[/&([a-z]+)[ ];/g, "&$1;"],
[/&#([0-9]+|x[0-9a-fA-F]+)[ ];/g, "&#$1;"] ],
//// missing spaces
"add_space_after_punctuation":[ [/[;!…](?=[a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ])/g, "$& "],
[/[?](?=[A-ZÀ-ÖØ-ßĀ-ʯ])/g, "? "],
[/\.(?=[a-zA-Zà-öÀ-Öø-ÿØ-ßĀ-ʯ][a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ])/g, ". "],
[/\.(?=À)/g, ". "],
[/[,:](?=[a-zA-Zà-öÀ-Öø-ÿØ-ßĀ-ʯ])/g, "$& "],
[/([a-zA-Zà-öÀ-Öø-ÿØ-ßĀ-ʯ]),(?=[0-9])/g, "$1, "] ],
"add_space_around_hyphens": [ [/ [-–—](?=[a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ"«“'‘])/g, "$& "],
[/([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ"»”'’])([-–—]) /g, "$1 $2 "] ],
"add_space_repair": [ [/DnT, ([wA])\b/g, "DnT,$1"] ],
//// erase
|
|
|
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
|
"nbsp_repair": [ [/([\[(])[ ]([!?:;])/g, "$1$2"],
[/(https?|ftp)[ ]:\/\//g, "$1://"],
[/&([a-z]+)[ ];/g, "&$1;"],
[/&#([0-9]+|x[0-9a-fA-F]+)[ ];/g, "&#$1;"] ],
//// missing spaces
"add_space_after_punctuation":[ [/[;!…](?=[a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ])/g, "$& "],
[/[?](?=[A-ZÀ-ÖØ-ßĀ-ʯ])/g, "? "],
[/\.(?=[A-ZÀ-ÖØ-ßĀ-ʯ][a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ])/g, ". "],
[/\.(?=À)/g, ". "],
[/[,:](?=[a-zA-Zà-öÀ-Öø-ÿØ-ßĀ-ʯ])/g, "$& "],
[/([a-zA-Zà-öÀ-Öø-ÿØ-ßĀ-ʯ]),(?=[0-9])/g, "$1, "] ],
"add_space_around_hyphens": [ [/ [-–—](?=[a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ"«“'‘])/g, "$& "],
[/([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ"»”'’])([-–—]) /g, "$1 $2 "] ],
"add_space_repair": [ [/DnT, ([wA])\b/g, "DnT,$1"] ],
//// erase
|