Grammalecte  Diff

Differences From Artifact [aadf88372a]:

To Artifact [e97495b087]:


117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
    "nnbsp4": [
                    ("([:digit:])[  ]([:digit:])",      "$1 $2",        True,   True)
    ],
    "nbsp5": [
                    ("(?<=[0-9⁰¹²³⁴⁵⁶⁷⁸⁹]) ?([kcmµnd]?(?:[slgJKΩΩℓ]|m[²³]?|Wh?|Hz|dB)|[%‰]|°C)\\b", " $1", True, True)
    ],
    "nbsp6": [
                    ("\\bM(mes?|ᵐᵉˢ?|grs?|ᵍʳˢ?|lles?|ˡˡᵉˢ?|rs?|ʳˢ?|M\\.) ", "M$1 ",     True,   True),
                    ("\\bD(re?s?|ʳᵉ?ˢ?) ",                                  "D$1 ",     True,   True),
                    ("\\bP(re?s?|ʳᵉ?ˢ?) ",                                  "P$1 ",     True,   True),
                    ("\\bV(ves?|ᵛᵉˢ?) ",                                    "V$1 ",     True,   True),
    ],

    # espaces manquants
    "space1": [
                    (";(?=[:alnum:])",                  "; ",           True,   True),
                    ("\\?(?=[A-ZÉÈÊÂÀÎ])",              "? ",           True,   True),
                    ("!(?=[:alnum:])",                  "! ",           True,   True),







|
|
|
|







117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
    "nnbsp4": [
                    ("([:digit:])[  ]([:digit:])",      "$1 $2",        True,   True)
    ],
    "nbsp5": [
                    ("(?<=[0-9⁰¹²³⁴⁵⁶⁷⁸⁹]) ?([kcmµnd]?(?:[slgJKΩΩℓ]|m[²³]?|Wh?|Hz|dB)|[%‰]|°C)\\b", " $1", True, True)
    ],
    "nbsp6": [
                    ("M(mes?|ᵐᵉˢ?|grs?|ᵍʳˢ?|lles?|ˡˡᵉˢ?|rs?|ʳˢ?|M\\.) ", "M$1 ",     True,   True),
                    ("D(re?s?|ʳᵉ?ˢ?) ",                                  "D$1 ",     True,   True),
                    ("P(re?s?|ʳᵉ?ˢ?) ",                                  "P$1 ",     True,   True),
                    ("V(ves?|ᵛᵉˢ?) ",                                    "V$1 ",     True,   True),
    ],

    # espaces manquants
    "space1": [
                    (";(?=[:alnum:])",                  "; ",           True,   True),
                    ("\\?(?=[A-ZÉÈÊÂÀÎ])",              "? ",           True,   True),
                    ("!(?=[:alnum:])",                  "! ",           True,   True),
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
                    ("\\bJ\\.kg(?=-1)\\b",                  "J·kg",         True,   True),
                    ("\\bJ\\.m(?=-3)\\b",                   "J·m",          True,   True),
                    ("\\bm[2²]\\.s\\b",                     "m²·s",         True,   True),
                    ("\\bm[3³]\\.s(?=-1)\\b",               "m³·s",         True,   True),
                    #("\\bJ.kg-1.K-1\\b",                   "J·kg-1·K-1",   True,   True),
                    #("\\bW.m-1.K-1\\b",                    "W·m-1·K-1",    True,   True),
                    #("\\bW.m-2.K-1\\b",                    "W·m-2·K-1",    True,   True),
                    ("\\b(Y|Z|E|P|T|G|M|k|h|da|d|c|m|µ|n|p|f|a|z|y)Ω\\b", "$1Ω", True, True)
    ],
    "typo7": [
                    # ligatures: pas de majuscules
                    ("coeur",                               "cœur",         False,  True),
                    ("coel([aeio])",                        "cœl$1",        True,   True),
                    ("choeur",                              "chœur",        False,  True),
                    ("foet",                                "fœt",          False,  True),







|







251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
                    ("\\bJ\\.kg(?=-1)\\b",                  "J·kg",         True,   True),
                    ("\\bJ\\.m(?=-3)\\b",                   "J·m",          True,   True),
                    ("\\bm[2²]\\.s\\b",                     "m²·s",         True,   True),
                    ("\\bm[3³]\\.s(?=-1)\\b",               "m³·s",         True,   True),
                    #("\\bJ.kg-1.K-1\\b",                   "J·kg-1·K-1",   True,   True),
                    #("\\bW.m-1.K-1\\b",                    "W·m-1·K-1",    True,   True),
                    #("\\bW.m-2.K-1\\b",                    "W·m-2·K-1",    True,   True),
                    ("(Y|Z|E|P|T|G|M|k|h|da|d|c|m|µ|n|p|f|a|z|y)Ω", "$1Ω", True, True)
    ],
    "typo7": [
                    # ligatures: pas de majuscules
                    ("coeur",                               "cœur",         False,  True),
                    ("coel([aeio])",                        "cœl$1",        True,   True),
                    ("choeur",                              "chœur",        False,  True),
                    ("foet",                                "fœt",          False,  True),
293
294
295
296
297
298
299
300

301
302
303
304
305
306
307
                    # mots communs avec diacritiques manquants
                    ("\\bCa\\b",                            "Ça",           True,   True),
                    (" ca\\b",                              " ça",          True,   True),
                    ("\\bdej[aà]\\b",                       "déjà",         True,   True),
                    ("\\bDej[aà]\\b",                       "Déjà",         True,   True),
                    ("\\bplutot\\b",                        "plutôt",       True,   True),
                    ("\\bPlutot\\b",                        "Plutôt",       True,   True),
                    ("\\b([cC]e(?:ux|lles?|lui))-la\\b",    "$1-là",        True,   True),

                    ("\\bmalgre\\b",                        "malgré",       True,   True),
                    ("\\bMalgre\\b",                        "Malgré",       True,   True),
                    ("\\betre\\b",                          "être",         True,   True),
                    ("\\bEtre\\b",                          "Être",         True,   True),
                    ("\\btres\\b",                          "très",         True,   True),
                    ("\\bTres\\b",                          "Très",         True,   True),
                    ("\\bEtai([ts]|ent)\\b",                "Étai$1",       True,   True),







|
>







293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
                    # mots communs avec diacritiques manquants
                    ("\\bCa\\b",                            "Ça",           True,   True),
                    (" ca\\b",                              " ça",          True,   True),
                    ("\\bdej[aà]\\b",                       "déjà",         True,   True),
                    ("\\bDej[aà]\\b",                       "Déjà",         True,   True),
                    ("\\bplutot\\b",                        "plutôt",       True,   True),
                    ("\\bPlutot\\b",                        "Plutôt",       True,   True),
                    ("\\b(ce(?:ux|lles?|lui))-la\\b",       "$1-là",        True,   True),
                    ("\\b(Ce(?:ux|lles?|lui))-la\\b",       "$1-là",        True,   True),
                    ("\\bmalgre\\b",                        "malgré",       True,   True),
                    ("\\bMalgre\\b",                        "Malgré",       True,   True),
                    ("\\betre\\b",                          "être",         True,   True),
                    ("\\bEtre\\b",                          "Être",         True,   True),
                    ("\\btres\\b",                          "très",         True,   True),
                    ("\\bTres\\b",                          "Très",         True,   True),
                    ("\\bEtai([ts]|ent)\\b",                "Étai$1",       True,   True),
384
385
386
387
388
389
390
391

392

393

394
395

396
397
398
399
400
401
402
403
404
405
406
407
408
                    ("(?<!,) etc[.]",                           ", etc.",       True,   True)
    ],
    "misc3": [
                    ("[ -]t[’'](?=il\\b|elle|on\\b)",           "-t-",          True,   True),
                    (" t-(?=il|elle|on)",                       "-t-",          True,   True),
                    ("[ -]t[’'-](?=ils|elles)",                 "-",            True,   True),
                    ("(?<=[td])-t-(?=il|elle|on)",              "-",            True,   True),
                    ("(celles?|celui|ceux) (ci|là)\\b",         "$1-$2",        True,   False),

                    ("\\bdix (sept|huit|neuf)",                 "dix-$1",       True,   False),

                    ("quatre vingt",                            "quatre-vingt", False,  True),

                    ("(soixante|quatre-vingt) dix",             "$1-dix",       True,   False),
                    ("(vingt|trente|quarante|cinquante|soixante(?:-dix|)|quatre-vingt(?:-dix|)) (deux|trois|quatre|cinq|six|sept|huit|neuf)", "$1-$2", True, False),

                    ("(?<!-)\\b(ci) (joint|desso?us|contre|devant|avant|après|incluse|g[îi]t|gisent)", "$1-$2", True, False),
                    ("\\bvis à vis",                            "vis-à-vis",    False,  True),
                    ("\\bVis à vis",                            "Vis-à-vis",    False,  True),
                    ("week end",                                "week-end",     False,  True),
                    ("Week end",                                "Week-end",     False,  True),
                    ("(plus|moins) value",                      "$1-value",     True,   False)
    ],
    "misc5a": [
                    ("(qu|lorsqu|puisqu|quoiqu|presqu|jusqu|aujourd|entr|quelqu) ", "$1’", True, True),
    ],
    "misc5b": [
                    ("\\bj (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])",     "j’",           True,   True),
                    ("\\bn (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])",     "n’",           True,   True),







|
>
|
>

>
|
|
>
|
|
|


|







385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
                    ("(?<!,) etc[.]",                           ", etc.",       True,   True)
    ],
    "misc3": [
                    ("[ -]t[’'](?=il\\b|elle|on\\b)",           "-t-",          True,   True),
                    (" t-(?=il|elle|on)",                       "-t-",          True,   True),
                    ("[ -]t[’'-](?=ils|elles)",                 "-",            True,   True),
                    ("(?<=[td])-t-(?=il|elle|on)",              "-",            True,   True),
                    (" ce(lles?|lui|ux) (ci|là)\\b",            " ce$1-$2",     True,   True),
                    ("Ce(lles?|lui|ux) (ci|là)\\b",             "Ce$1-$2",      True,   True),
                    (" dix (sept|huit|neuf)",                   " dix-$1",      True,   True),
                    ("Dix (sept|huit|neuf)",                    "Dix-$1",       True,   True),
                    ("quatre vingt",                            "quatre-vingt", False,  True),
                    ("Quatre vingt",                            "Quatre-vingt", False,  True),
                    ("(soixante|quatre-vingt) (deux|trois|quatre|cinq|six|sept|huit|neuf|dix|onze|douze|treize|quatorze|quinze|seize|dix-sept|dix-huit|dix-neuf)", "$1-$2", True, False),
                    ("(vingt|trente|quarante|cinquante) (deux|trois|quatre|cinq|six|sept|huit|neuf)", "$1-$2", True, False),
                    (" ci (joint|desso?us|contre|devant|avant|après|incluse|g[îi]t|gisent)", " ci-$1", True, True),
                    ("Ci (joint|desso?us|contre|devant|avant|après|incluse|g[îi]t|gisent)", "Ci-$1", True, True),
                    (" vis à vis\\b",                           "vis-à-vis",    False,  True),
                    ("Vis à vis\\b",                            "Vis-à-vis",    False,  True),
                    ("week end",                                "week-end",     False,  True),
                    ("Week end",                                "Week-end",     False,  True),
                    ("(plus|moins) value",                      "$1-value",     True,   False),
    ],
    "misc5a": [
                    ("(qu|lorsqu|puisqu|quoiqu|presqu|jusqu|aujourd|entr|quelqu) ", "$1’", True, True),
    ],
    "misc5b": [
                    ("\\bj (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])",     "j’",           True,   True),
                    ("\\bn (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])",     "n’",           True,   True),