1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
#!python3
"""
Text tools
"""
import textwrap
from itertools import chain
def getParagraph (sText):
"generator: returns paragraphs of text"
iStart = 0
sText = sText.replace("\r\n", "\n").replace("\r", "\n")
iEnd = sText.find("\n", iStart)
while iEnd != -1:
|
>
>
>
>
>
>
>
>
>
>
>
>
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
#!python3
"""
Text tools
"""
import re
import textwrap
from itertools import chain
_zEndOfSentence = re.compile(r'([.?!:;…]\W+(?=[A-ZÉÈÎÔ])|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
def getSentenceBoundaries (sText):
"generator: returns start and end of sentences found in <sText>"
iStart = _zBeginOfParagraph.match(sText).end()
for m in _zEndOfSentence.finditer(sText):
yield (iStart, m.end())
iStart = m.end()
def getParagraph (sText):
"generator: returns paragraphs of text"
iStart = 0
sText = sText.replace("\r\n", "\n").replace("\r", "\n")
iEnd = sText.find("\n", iStart)
while iEnd != -1:
|