Grammalecte  Diff

Differences From Artifact [aadf88372a]:

To Artifact [49ffcf3199]:


117
118
119
120
121
122
123
124
125
126
127




128
129
130
131
132
133
134
117
118
119
120
121
122
123




124
125
126
127
128
129
130
131
132
133
134







-
-
-
-
+
+
+
+







    "nnbsp4": [
                    ("([:digit:])[  ]([:digit:])",      "$1 $2",        True,   True)
    ],
    "nbsp5": [
                    ("(?<=[0-9⁰¹²³⁴⁵⁶⁷⁸⁹]) ?([kcmµnd]?(?:[slgJKΩΩℓ]|m[²³]?|Wh?|Hz|dB)|[%‰]|°C)\\b", " $1", True, True)
    ],
    "nbsp6": [
                    ("\\bM(mes?|ᵐᵉˢ?|grs?|ᵍʳˢ?|lles?|ˡˡᵉˢ?|rs?|ʳˢ?|M\\.) ", "M$1 ",     True,   True),
                    ("\\bD(re?s?|ʳᵉ?ˢ?) ",                                  "D$1 ",     True,   True),
                    ("\\bP(re?s?|ʳᵉ?ˢ?) ",                                  "P$1 ",     True,   True),
                    ("\\bV(ves?|ᵛᵉˢ?) ",                                    "V$1 ",     True,   True),
                    ("M(mes?|ᵐᵉˢ?|grs?|ᵍʳˢ?|lles?|ˡˡᵉˢ?|rs?|ʳˢ?|M\\.) ", "M$1 ",     True,   True),
                    ("D(re?s?|ʳᵉ?ˢ?) ",                                  "D$1 ",     True,   True),
                    ("P(re?s?|ʳᵉ?ˢ?) ",                                  "P$1 ",     True,   True),
                    ("V(ves?|ᵛᵉˢ?) ",                                    "V$1 ",     True,   True),
    ],

    # espaces manquants
    "space1": [
                    (";(?=[:alnum:])",                  "; ",           True,   True),
                    ("\\?(?=[A-ZÉÈÊÂÀÎ])",              "? ",           True,   True),
                    ("!(?=[:alnum:])",                  "! ",           True,   True),
251
252
253
254
255
256
257
258

259
260
261
262
263
264
265
251
252
253
254
255
256
257

258
259
260
261
262
263
264
265







-
+







                    ("\\bJ\\.kg(?=-1)\\b",                  "J·kg",         True,   True),
                    ("\\bJ\\.m(?=-3)\\b",                   "J·m",          True,   True),
                    ("\\bm[2²]\\.s\\b",                     "m²·s",         True,   True),
                    ("\\bm[3³]\\.s(?=-1)\\b",               "m³·s",         True,   True),
                    #("\\bJ.kg-1.K-1\\b",                   "J·kg-1·K-1",   True,   True),
                    #("\\bW.m-1.K-1\\b",                    "W·m-1·K-1",    True,   True),
                    #("\\bW.m-2.K-1\\b",                    "W·m-2·K-1",    True,   True),
                    ("\\b(Y|Z|E|P|T|G|M|k|h|da|d|c|m|µ|n|p|f|a|z|y)Ω\\b", "$1Ω", True, True)
                    ("(Y|Z|E|P|T|G|M|k|h|da|d|c|m|µ|n|p|f|a|z|y)Ω", "$1Ω", True, True)
    ],
    "typo7": [
                    # ligatures: pas de majuscules
                    ("coeur",                               "cœur",         False,  True),
                    ("coel([aeio])",                        "cœl$1",        True,   True),
                    ("choeur",                              "chœur",        False,  True),
                    ("foet",                                "fœt",          False,  True),
293
294
295
296
297
298
299
300


301
302
303
304
305
306
307
293
294
295
296
297
298
299

300
301
302
303
304
305
306
307
308







-
+
+







                    # mots communs avec diacritiques manquants
                    ("\\bCa\\b",                            "Ça",           True,   True),
                    (" ca\\b",                              " ça",          True,   True),
                    ("\\bdej[aà]\\b",                       "déjà",         True,   True),
                    ("\\bDej[aà]\\b",                       "Déjà",         True,   True),
                    ("\\bplutot\\b",                        "plutôt",       True,   True),
                    ("\\bPlutot\\b",                        "Plutôt",       True,   True),
                    ("\\b([cC]e(?:ux|lles?|lui))-la\\b",    "$1-là",        True,   True),
                    ("\\b(ce(?:ux|lles?|lui))-la\\b",       "$1-là",        True,   True),
                    ("\\b(Ce(?:ux|lles?|lui))-la\\b",       "$1-là",        True,   True),
                    ("\\bmalgre\\b",                        "malgré",       True,   True),
                    ("\\bMalgre\\b",                        "Malgré",       True,   True),
                    ("\\betre\\b",                          "être",         True,   True),
                    ("\\bEtre\\b",                          "Être",         True,   True),
                    ("\\btres\\b",                          "très",         True,   True),
                    ("\\bTres\\b",                          "Très",         True,   True),
                    ("\\bEtai([ts]|ent)\\b",                "Étai$1",       True,   True),
384
385
386
387
388
389
390
391
392




393

394
395


396
397
398
399
400
401
402
385
386
387
388
389
390
391


392
393
394
395
396
397


398
399
400
401
402
403
404
405
406







-
-
+
+
+
+

+
-
-
+
+







                    ("(?<!,) etc[.]",                           ", etc.",       True,   True)
    ],
    "misc3": [
                    ("[ -]t[’'](?=il\\b|elle|on\\b)",           "-t-",          True,   True),
                    (" t-(?=il|elle|on)",                       "-t-",          True,   True),
                    ("[ -]t[’'-](?=ils|elles)",                 "-",            True,   True),
                    ("(?<=[td])-t-(?=il|elle|on)",              "-",            True,   True),
                    ("(celles?|celui|ceux) (ci|là)\\b",         "$1-$2",        True,   False),
                    ("\\bdix (sept|huit|neuf)",                 "dix-$1",       True,   False),
                    (" c(elles?|elui|eux) (ci|là)\\b",          " c$1-$2",      True,   True),
                    ("C(elles?|elui|eux) (ci|là)\\b",           "C$1-$2",       True,   True),
                    (" dix (sept|huit|neuf)",                   " dix-$1",      True,   True),
                    ("Dix (sept|huit|neuf)",                    "Dix-$1",       True,   True),
                    ("quatre vingt",                            "quatre-vingt", False,  True),
                    ("Quatre vingt",                            "Quatre-vingt", False,  True),
                    ("(soixante|quatre-vingt) dix",             "$1-dix",       True,   False),
                    ("(vingt|trente|quarante|cinquante|soixante(?:-dix|)|quatre-vingt(?:-dix|)) (deux|trois|quatre|cinq|six|sept|huit|neuf)", "$1-$2", True, False),
                    ("(soixante|quatre-vingt) (deux|trois|quatre|cinq|six|sept|huit|neuf|dix|onze|douze|treize|quatorze|quinze|seize|dix-sept|dix-huit|dix-neuf)", "$1-$2", True, False),
                    ("(vingt|trente|quarante|cinquante) (deux|trois|quatre|cinq|six|sept|huit|neuf)", "$1-$2", True, False),
                    ("(?<!-)\\b(ci) (joint|desso?us|contre|devant|avant|après|incluse|g[îi]t|gisent)", "$1-$2", True, False),
                    ("\\bvis à vis",                            "vis-à-vis",    False,  True),
                    ("\\bVis à vis",                            "Vis-à-vis",    False,  True),
                    ("week end",                                "week-end",     False,  True),
                    ("Week end",                                "Week-end",     False,  True),
                    ("(plus|moins) value",                      "$1-value",     True,   False)
    ],