Overview
| Comment: | [core] adjusst sentence detection | 
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive | 
| Timelines: | family | ancestors | descendants | both | trunk | core | 
| Files: | files | file ages | folders | 
| SHA3-256: | 
f49fe68f6c994c0754db8ea31635dd3b | 
| User & Date: | olr on 2020-03-20 16:30:27 | 
| Other Links: | manifest | tags | 
Context
| 
   2020-03-21 
 | ||
| 10:16 | [fr] test: untested rules check-in: ac727cfd6b user: olr tags: trunk, fr | |
| 
   2020-03-20 
 | ||
| 16:30 | [core] adjusst sentence detection check-in: f49fe68f6c user: olr tags: trunk, core | |
| 16:29 | [fr] ajustements et faux positifs check-in: a654df5567 user: olr tags: trunk, fr | |
Changes
Modified gc_core/js/text.js from [650a548528] to [17a1dd3378].
1 2 3 4 5 6 7 8 9 10 11  | 
// JavaScript
/* jshint esversion:6, -W097 */
/* jslint esversion:6 */
/* global require, exports, console */
"use strict";
var text = {
 | |  | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19  | 
// JavaScript
/* jshint esversion:6, -W097 */
/* jslint esversion:6 */
/* global require, exports, console */
"use strict";
var text = {
    _zEndOfSentence: new RegExp ('[.?!…]+[»”’)]?[   ]+[»”’]?(?=[«"“‘–—   ]*[A-ZÀÂÉÈÊÎÔÇ])|[:;][   ]+', "g"),
    getSentenceBoundaries: function* (sText) {
        // generator: returns start and end of sentences found in <sText>
        let iStart = 0;
        let m;
        while ((m = this._zEndOfSentence.exec(sText)) !== null) {
            yield [iStart, this._zEndOfSentence.lastIndex];
 | 
| ︙ | ︙ | 
Modified gc_core/py/text.py from [df68ef0738] to [15f2da6650].
1 2 3 4 5 6 7 8 9 10 11  | #!python3 """ Text tools """ import re import textwrap from itertools import chain  | |  | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19  | 
#!python3
"""
Text tools
"""
import re
import textwrap
from itertools import chain
_zEndOfSentence = re.compile(r'[.?!…]+[»”’)]?[   ]+[»”’]?(?=[«"“‘–—   ]*[A-ZÀÂÉÈÊÎÔÇ])|[:;][   ]+')
def getSentenceBoundaries (sText):
    "generator: returns start and end of sentences found in <sText>"
    iStart = 0
    for m in _zEndOfSentence.finditer(sText):
        yield (iStart, m.end())
        iStart = m.end()
 | 
| ︙ | ︙ |