Overview
| Comment: | [core] update: paragraph splitting |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | core |
| Files: | files | file ages | folders |
| SHA3-256: |
722eb9bdaa28f9569187daee1ae90852 |
| User & Date: | olr on 2019-04-16 18:03:55 |
| Other Links: | manifest | tags |
Context
|
2019-04-17
| ||
| 06:56 | [fr] couleurs check-in: 9eccb90a0f user: olr tags: trunk, fr | |
|
2019-04-16
| ||
| 18:03 | [core] update: paragraph splitting check-in: 722eb9bdaa user: olr tags: trunk, core | |
| 16:43 | [fr] faux positif check-in: 772b6e8cfc user: olr tags: trunk, fr | |
Changes
Modified gc_core/js/lang_core/gc_engine.js from [5d05a79843] to [3be7b763d3].
| ︙ | ︙ | |||
161 162 163 164 165 166 167 |
//// Parsing
parse: function (sText, sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false) {
let oText = new TextParser(sText);
return oText.parse(sCountry, bDebug, dOptions, bContext);
},
| | | 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
//// Parsing
parse: function (sText, sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false) {
let oText = new TextParser(sText);
return oText.parse(sCountry, bDebug, dOptions, bContext);
},
_zEndOfSentence: new RegExp ('([.?!:;…][ .?!…»«“”"‘’)–—]+(?=[A-ZÉÈÎÔ])|.$)', "g"),
_zBeginOfParagraph: new RegExp ("^[- –—.,;?!…]*", "ig"),
_zEndOfParagraph: new RegExp ("[- .,;?!…–—]*$", "ig"),
getSentenceBoundaries: function* (sText) {
let mBeginOfSentence = this._zBeginOfParagraph.exec(sText);
let iStart = this._zBeginOfParagraph.lastIndex;
let m;
|
| ︙ | ︙ |
Modified gc_core/py/lang_core/gc_engine.py from [52335d206d] to [c8f472926f].
| ︙ | ︙ | |||
187 188 189 190 191 192 193 |
"set options to default values"
global _dOptions
_dOptions = dict(gc_options.getOptions(_sAppContext))
#### Parsing
| | | 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
"set options to default values"
global _dOptions
_dOptions = dict(gc_options.getOptions(_sAppContext))
#### Parsing
_zEndOfSentence = re.compile(r'([.?!:;…]\W+(?=[A-ZÉÈÎÔ])|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
_zEndOfParagraph = re.compile(r"\W*$")
def _getSentenceBoundaries (sText):
iStart = _zBeginOfParagraph.match(sText).end()
for m in _zEndOfSentence.finditer(sText):
yield (iStart, m.end())
|
| ︙ | ︙ |