1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
|
#!python3
"""
Text tools
"""
import re
import textwrap
from itertools import chain
_zEndOfSentence = re.compile(r'[.?!:;…]+[ ]+[»”’]?(?=[«"“‘]?[A-ZÉÈÎÔ–—])')
def getSentenceBoundaries (sText):
"generator: returns start and end of sentences found in <sText>"
iStart = 0
for m in _zEndOfSentence.finditer(sText):
yield (iStart, m.end())
iStart = m.end()
|
|
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
|
#!python3
"""
Text tools
"""
import re
import textwrap
from itertools import chain
_zEndOfSentence = re.compile(r'[.?!:;…]+[ ]+[»”’]?(?=[«"“‘–— ]?[A-ZÉÈÎÔ])')
def getSentenceBoundaries (sText):
"generator: returns start and end of sentences found in <sText>"
iStart = 0
for m in _zEndOfSentence.finditer(sText):
yield (iStart, m.end())
iStart = m.end()
|