Grammalecte  Check-in [f069a117e4]

Overview
Comment:[core][fr] fix text formtatter
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | fr | core
Files: files | file ages | folders
SHA3-256: f069a117e4f2ebe0f2513770d4e1d1a6c93664d002533eaf31f37cbee18af548
User & Date: olr on 2021-02-15 16:32:22
Other Links: manifest | tags
Context
2021-02-17
10:45
[core][fr][oxt] text formatter: another apostrophe check-in: 26cf74769b user: olr tags: core, fr, lo, trunk
2021-02-15
16:32
[core][fr] fix text formtatter check-in: f069a117e4 user: olr tags: core, fr, trunk
13:46
[build][fr] include lemmas of words that are also verbal forms check-in: 69affb5433 user: olr tags: build, fr, trunk
Changes

Modified gc_lang/fr/modules/textformatter.py from [4ba47078d2] to [bc3ccf87ae].

76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
    "ts_n_dash_start":        [ ("^[-—][  ]", "– "),
                                ("^– ", "– "),
                                ("^[-–—](?=[\\w.…])", "– ") ],
    "ts_m_dash_start":        [ ("^[-–][  ]", "— "),
                                ("^— ", "— "),
                                ("^«[  ][—–-][  ]", "« — "),
                                ("^[-–—](?=[\\w.…])", "— ") ],
    "ts_quotation_marks":     [ ('"(\\w+)"', "“$1”"),
                                ("''(\\w+)''", "“$1”"),
                                ("'(\\w+)'", "“$1”"),
                                ("^(?:\"|'')(?=\\w)", "« "),
                                (" (?:\"|'')(?=\\w)", " « "),
                                ("\\((?:\"|'')(?=\\w)", "(« "),
                                ("(?<=\\w)(?:\"|'')$", " »"),
                                ("(?<=\\w)(?:\"|'')(?=[] ,.:;?!…)])", " »"),
                                ('(?<=[.!?…])" ', " » "),
                                ('(?<=[.!?…])"$', " »") ],







|
|
|







76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
    "ts_n_dash_start":        [ ("^[-—][  ]", "– "),
                                ("^– ", "– "),
                                ("^[-–—](?=[\\w.…])", "– ") ],
    "ts_m_dash_start":        [ ("^[-–][  ]", "— "),
                                ("^— ", "— "),
                                ("^«[  ][—–-][  ]", "« — "),
                                ("^[-–—](?=[\\w.…])", "— ") ],
    "ts_quotation_marks":     [ ('"(\\w+)"', "“\\1”"),
                                ("''(\\w+)''", "“\\1”"),
                                ("'(\\w+)'", "“\\1”"),
                                ("^(?:\"|'')(?=\\w)", "« "),
                                (" (?:\"|'')(?=\\w)", " « "),
                                ("\\((?:\"|'')(?=\\w)", "(« "),
                                ("(?<=\\w)(?:\"|'')$", " »"),
                                ("(?<=\\w)(?:\"|'')(?=[] ,.:;?!…)])", " »"),
                                ('(?<=[.!?…])" ', " » "),
                                ('(?<=[.!?…])"$', " »") ],
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
                                ("noeu", "nœu"), ("Noeu", "Nœu"),
                                ("soeur", "sœur"), ("Soeur", "Sœur"),
                                ("voeu", "vœu"), ("Voeu", "Vœu"),
                                ("aequo", "æquo"), ("Aequo", "Æquo"),
                                ("\\bCa\\b", "Ça"), (" ca\\b", " ça"),
                                ("\\bdej[aà]\\b", "déjà"), ("\\bplutot\\b", "plutôt"),
                                ("\\bmeme\\b", "même"), ("\\bmemes\\b", "mêmes"), ("\\bMeme\\b", "Même"),
                                ("\\b([cC]e(?:ux|lles?|lui))-la\\b", "$1-là"),
                                ("\\bmalgre\\b", "malgré"), ("\\bMalgre\\b", "Malgré"),
                                ("\\betre\\b", "être"), ("\\bEtre\\b", "Être"),
                                ("\\btres\\b", "très"), ("\\bTres\\b", "Très"),
                                ("\\bEtai([ts]|ent)\\b", "Étai$1"),
                                ("\\bE(tat|cole|crit|poque|tude|ducation|glise|conomi(?:qu|)e|videmment|lysée|tienne|thiopie|cosse|gypt(?:e|ien)|rythrée|pinal|vreux)", "É$1") ],
    "ts_ligature_ffi_on":       [("ffi", "ffi")],
    "ts_ligature_ffl_on":       [("ffl", "ffl")],
    "ts_ligature_fi_on":        [("fi", "fi")],
    "ts_ligature_fl_on":        [("fl", "fl")],
    "ts_ligature_ff_on":        [("ff", "ff")],
    "ts_ligature_ft_on":        [("ft", "ſt")],
    "ts_ligature_st_on":        [("st", "st")],







|



|
|







104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
                                ("noeu", "nœu"), ("Noeu", "Nœu"),
                                ("soeur", "sœur"), ("Soeur", "Sœur"),
                                ("voeu", "vœu"), ("Voeu", "Vœu"),
                                ("aequo", "æquo"), ("Aequo", "Æquo"),
                                ("\\bCa\\b", "Ça"), (" ca\\b", " ça"),
                                ("\\bdej[aà]\\b", "déjà"), ("\\bplutot\\b", "plutôt"),
                                ("\\bmeme\\b", "même"), ("\\bmemes\\b", "mêmes"), ("\\bMeme\\b", "Même"),
                                ("\\b([cC]e(?:ux|lles?|lui))-la\\b", "\\1-là"),
                                ("\\bmalgre\\b", "malgré"), ("\\bMalgre\\b", "Malgré"),
                                ("\\betre\\b", "être"), ("\\bEtre\\b", "Être"),
                                ("\\btres\\b", "très"), ("\\bTres\\b", "Très"),
                                ("\\bEtai([ts]|ent)\\b", "Étai\\1"),
                                ("\\bE(tat|cole|crit|poque|tude|ducation|glise|conomi(?:qu|)e|videmment|lysée|tienne|thiopie|cosse|gypt(?:e|ien)|rythrée|pinal|vreux)", "É\\1") ],
    "ts_ligature_ffi_on":       [("ffi", "ffi")],
    "ts_ligature_ffl_on":       [("ffl", "ffl")],
    "ts_ligature_fi_on":        [("fi", "fi")],
    "ts_ligature_fl_on":        [("fl", "fl")],
    "ts_ligature_ff_on":        [("ff", "ff")],
    "ts_ligature_ft_on":        [("ft", "ſt")],
    "ts_ligature_st_on":        [("st", "st")],