Overview
Comment: | [core] update: paragraph splitting |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | core |
Files: | files | file ages | folders |
SHA3-256: |
d0bd3382ebaefaa81cbeb41789eacce7 |
User & Date: | olr on 2019-04-16 16:05:28 |
Other Links: | manifest | tags |
Context
2019-04-16
| ||
16:29 | [fr] faux positif (ocr) check-in: 660df51ea1 user: olr tags: trunk, fr | |
16:05 | [core] update: paragraph splitting check-in: d0bd3382eb user: olr tags: trunk, core | |
15:58 | [fr] faux positifs (ocr) check-in: a052c4803b user: olr tags: trunk, fr | |
Changes
Modified gc_core/js/lang_core/gc_engine.js from [577effc815] to [5d05a79843].
︙ | ︙ | |||
161 162 163 164 165 166 167 | //// Parsing parse: function (sText, sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false) { let oText = new TextParser(sText); return oText.parse(sCountry, bDebug, dOptions, bContext); }, | | | 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | //// Parsing parse: function (sText, sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false) { let oText = new TextParser(sText); return oText.parse(sCountry, bDebug, dOptions, bContext); }, _zEndOfSentence: new RegExp ('([.?!:;…][ .?!…»«“”"‘’)]+(?=[A-ZÉÈÎÔ])|.$)', "g"), _zBeginOfParagraph: new RegExp ("^[- –—.,;?!…]*", "ig"), _zEndOfParagraph: new RegExp ("[- .,;?!…–—]*$", "ig"), getSentenceBoundaries: function* (sText) { let mBeginOfSentence = this._zBeginOfParagraph.exec(sText); let iStart = this._zBeginOfParagraph.lastIndex; let m; |
︙ | ︙ |
Modified gc_core/py/lang_core/gc_engine.py from [ca233af37a] to [52335d206d].
︙ | ︙ | |||
187 188 189 190 191 192 193 | "set options to default values" global _dOptions _dOptions = dict(gc_options.getOptions(_sAppContext)) #### Parsing | | | 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 | "set options to default values" global _dOptions _dOptions = dict(gc_options.getOptions(_sAppContext)) #### Parsing _zEndOfSentence = re.compile(r'([.?!:;…][ .?!…»«“”"‘’)]+(?=[A-ZÉÈÎÔ])|.$)') _zBeginOfParagraph = re.compile(r"^\W*") _zEndOfParagraph = re.compile(r"\W*$") def _getSentenceBoundaries (sText): iStart = _zBeginOfParagraph.match(sText).end() for m in _zEndOfSentence.finditer(sText): yield (iStart, m.end()) |
︙ | ︙ |