// JavaScript
/* jshint esversion:6, -W097 */
/* jslint esversion:6 */
/* global require, exports, console */
"use strict";
var text = {
_zEndOfSentence: new RegExp ('[.?!:;…]+[ ]+[»”’]?(?=[«"“‘–— ]?[A-ZÀÂÉÈÊÎÔÇ])', "g"),
_zEndOfSentence: new RegExp ('[.?!:;…]+[»”’)]?[ ]+[»”’]?(?=[«"“‘–— ]*[A-ZÀÂÉÈÊÎÔÇ])', "g"),
getSentenceBoundaries: function* (sText) {
// generator: returns start and end of sentences found in <sText>
let iStart = 0;
let m;
while ((m = this._zEndOfSentence.exec(sText)) !== null) {
yield [iStart, this._zEndOfSentence.lastIndex];
#!python3
"""
Text tools
"""
import re
import textwrap
from itertools import chain
_zEndOfSentence = re.compile(r'[.?!:;…]+[ ]+[»”’]?(?=[«"“‘–— ]?[A-ZÀÂÉÈÊÎÔÇ])')
_zEndOfSentence = re.compile(r'[.?!:;…]+[»”’)]?[ ]+[»”’]?(?=[«"“‘–— ]*[A-ZÀÂÉÈÊÎÔÇ])')
def getSentenceBoundaries (sText):
"generator: returns start and end of sentences found in <sText>"
iStart = 0
for m in _zEndOfSentence.finditer(sText):
yield (iStart, m.end())
iStart = m.end()