117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
"nnbsp4": [
("([:digit:])[ ]([:digit:])", "$1 $2", True, True)
],
"nbsp5": [
("(?<=[0-9⁰¹²³⁴⁵⁶⁷⁸⁹]) ?([kcmµnd]?(?:[slgJKΩΩℓ]|m[²³]?|Wh?|Hz|dB)|[%‰]|°C)\\b", " $1", True, True)
],
"nbsp6": [
("\\bM(mes?|ᵐᵉˢ?|grs?|ᵍʳˢ?|lles?|ˡˡᵉˢ?|rs?|ʳˢ?|M\\.) ", "M$1 ", True, True),
("\\bD(re?s?|ʳᵉ?ˢ?) ", "D$1 ", True, True),
("\\bP(re?s?|ʳᵉ?ˢ?) ", "P$1 ", True, True),
("\\bV(ves?|ᵛᵉˢ?) ", "V$1 ", True, True),
],
# espaces manquants
"space1": [
(";(?=[:alnum:])", "; ", True, True),
("\\?(?=[A-ZÉÈÊÂÀÎ])", "? ", True, True),
("!(?=[:alnum:])", "! ", True, True),
|
|
|
|
|
|
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
"nnbsp4": [
("([:digit:])[ ]([:digit:])", "$1 $2", True, True)
],
"nbsp5": [
("(?<=[0-9⁰¹²³⁴⁵⁶⁷⁸⁹]) ?([kcmµnd]?(?:[slgJKΩΩℓ]|m[²³]?|Wh?|Hz|dB)|[%‰]|°C)\\b", " $1", True, True)
],
"nbsp6": [
("M(mes?|ᵐᵉˢ?|grs?|ᵍʳˢ?|lles?|ˡˡᵉˢ?|rs?|ʳˢ?|M\\.) ", "M$1 ", True, True),
("D(re?s?|ʳᵉ?ˢ?) ", "D$1 ", True, True),
("P(re?s?|ʳᵉ?ˢ?) ", "P$1 ", True, True),
("V(ves?|ᵛᵉˢ?) ", "V$1 ", True, True),
],
# espaces manquants
"space1": [
(";(?=[:alnum:])", "; ", True, True),
("\\?(?=[A-ZÉÈÊÂÀÎ])", "? ", True, True),
("!(?=[:alnum:])", "! ", True, True),
|
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
|
("\\bJ\\.kg(?=-1)\\b", "J·kg", True, True),
("\\bJ\\.m(?=-3)\\b", "J·m", True, True),
("\\bm[2²]\\.s\\b", "m²·s", True, True),
("\\bm[3³]\\.s(?=-1)\\b", "m³·s", True, True),
#("\\bJ.kg-1.K-1\\b", "J·kg-1·K-1", True, True),
#("\\bW.m-1.K-1\\b", "W·m-1·K-1", True, True),
#("\\bW.m-2.K-1\\b", "W·m-2·K-1", True, True),
("\\b(Y|Z|E|P|T|G|M|k|h|da|d|c|m|µ|n|p|f|a|z|y)Ω\\b", "$1Ω", True, True)
],
"typo7": [
# ligatures: pas de majuscules
("coeur", "cœur", False, True),
("coel([aeio])", "cœl$1", True, True),
("choeur", "chœur", False, True),
("foet", "fœt", False, True),
|
|
|
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
|
("\\bJ\\.kg(?=-1)\\b", "J·kg", True, True),
("\\bJ\\.m(?=-3)\\b", "J·m", True, True),
("\\bm[2²]\\.s\\b", "m²·s", True, True),
("\\bm[3³]\\.s(?=-1)\\b", "m³·s", True, True),
#("\\bJ.kg-1.K-1\\b", "J·kg-1·K-1", True, True),
#("\\bW.m-1.K-1\\b", "W·m-1·K-1", True, True),
#("\\bW.m-2.K-1\\b", "W·m-2·K-1", True, True),
("(Y|Z|E|P|T|G|M|k|h|da|d|c|m|µ|n|p|f|a|z|y)Ω", "$1Ω", True, True)
],
"typo7": [
# ligatures: pas de majuscules
("coeur", "cœur", False, True),
("coel([aeio])", "cœl$1", True, True),
("choeur", "chœur", False, True),
("foet", "fœt", False, True),
|
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
|
# mots communs avec diacritiques manquants
("\\bCa\\b", "Ça", True, True),
(" ca\\b", " ça", True, True),
("\\bdej[aà]\\b", "déjà", True, True),
("\\bDej[aà]\\b", "Déjà", True, True),
("\\bplutot\\b", "plutôt", True, True),
("\\bPlutot\\b", "Plutôt", True, True),
("\\b([cC]e(?:ux|lles?|lui))-la\\b", "$1-là", True, True),
("\\bmalgre\\b", "malgré", True, True),
("\\bMalgre\\b", "Malgré", True, True),
("\\betre\\b", "être", True, True),
("\\bEtre\\b", "Être", True, True),
("\\btres\\b", "très", True, True),
("\\bTres\\b", "Très", True, True),
("\\bEtai([ts]|ent)\\b", "Étai$1", True, True),
|
|
>
|
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
|
# mots communs avec diacritiques manquants
("\\bCa\\b", "Ça", True, True),
(" ca\\b", " ça", True, True),
("\\bdej[aà]\\b", "déjà", True, True),
("\\bDej[aà]\\b", "Déjà", True, True),
("\\bplutot\\b", "plutôt", True, True),
("\\bPlutot\\b", "Plutôt", True, True),
("\\b(ce(?:ux|lles?|lui))-la\\b", "$1-là", True, True),
("\\b(Ce(?:ux|lles?|lui))-la\\b", "$1-là", True, True),
("\\bmalgre\\b", "malgré", True, True),
("\\bMalgre\\b", "Malgré", True, True),
("\\betre\\b", "être", True, True),
("\\bEtre\\b", "Être", True, True),
("\\btres\\b", "très", True, True),
("\\bTres\\b", "Très", True, True),
("\\bEtai([ts]|ent)\\b", "Étai$1", True, True),
|
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
|
("(?<!,) etc[.]", ", etc.", True, True)
],
"misc3": [
("[ -]t[’'](?=il\\b|elle|on\\b)", "-t-", True, True),
(" t-(?=il|elle|on)", "-t-", True, True),
("[ -]t[’'-](?=ils|elles)", "-", True, True),
("(?<=[td])-t-(?=il|elle|on)", "-", True, True),
("(celles?|celui|ceux) (ci|là)\\b", "$1-$2", True, False),
("\\bdix (sept|huit|neuf)", "dix-$1", True, False),
("quatre vingt", "quatre-vingt", False, True),
("(soixante|quatre-vingt) dix", "$1-dix", True, False),
("(vingt|trente|quarante|cinquante|soixante(?:-dix|)|quatre-vingt(?:-dix|)) (deux|trois|quatre|cinq|six|sept|huit|neuf)", "$1-$2", True, False),
("(?<!-)\\b(ci) (joint|desso?us|contre|devant|avant|après|incluse|g[îi]t|gisent)", "$1-$2", True, False),
("\\bvis à vis", "vis-à-vis", False, True),
("\\bVis à vis", "Vis-à-vis", False, True),
("week end", "week-end", False, True),
("Week end", "Week-end", False, True),
("(plus|moins) value", "$1-value", True, False)
],
"misc5a": [
("(qu|lorsqu|puisqu|quoiqu|presqu|jusqu|aujourd|entr|quelqu) ", "$1’", True, True),
],
"misc5b": [
("\\bj (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])", "j’", True, True),
("\\bn (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])", "n’", True, True),
|
|
>
|
>
>
|
|
>
|
|
|
|
|
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
|
("(?<!,) etc[.]", ", etc.", True, True)
],
"misc3": [
("[ -]t[’'](?=il\\b|elle|on\\b)", "-t-", True, True),
(" t-(?=il|elle|on)", "-t-", True, True),
("[ -]t[’'-](?=ils|elles)", "-", True, True),
("(?<=[td])-t-(?=il|elle|on)", "-", True, True),
(" ce(lles?|lui|ux) (ci|là)\\b", " ce$1-$2", True, True),
("Ce(lles?|lui|ux) (ci|là)\\b", "Ce$1-$2", True, True),
(" dix (sept|huit|neuf)", " dix-$1", True, True),
("Dix (sept|huit|neuf)", "Dix-$1", True, True),
("quatre vingt", "quatre-vingt", False, True),
("Quatre vingt", "Quatre-vingt", False, True),
("(soixante|quatre-vingt) (deux|trois|quatre|cinq|six|sept|huit|neuf|dix|onze|douze|treize|quatorze|quinze|seize|dix-sept|dix-huit|dix-neuf)", "$1-$2", True, False),
("(vingt|trente|quarante|cinquante) (deux|trois|quatre|cinq|six|sept|huit|neuf)", "$1-$2", True, False),
(" ci (joint|desso?us|contre|devant|avant|après|incluse|g[îi]t|gisent)", " ci-$1", True, True),
("Ci (joint|desso?us|contre|devant|avant|après|incluse|g[îi]t|gisent)", "Ci-$1", True, True),
(" vis à vis\\b", "vis-à-vis", False, True),
("Vis à vis\\b", "Vis-à-vis", False, True),
("week end", "week-end", False, True),
("Week end", "Week-end", False, True),
("(plus|moins) value", "$1-value", True, False),
],
"misc5a": [
("(qu|lorsqu|puisqu|quoiqu|presqu|jusqu|aujourd|entr|quelqu) ", "$1’", True, True),
],
"misc5b": [
("\\bj (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])", "j’", True, True),
("\\bn (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])", "n’", True, True),
|