187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
|
"set options to default values"
global _dOptions
_dOptions = dict(gc_options.getOptions(_sAppContext))
#### Parsing
_zEndOfSentence = re.compile(r'([.?!:;…][ .?!… »”")]+|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
_zEndOfParagraph = re.compile(r"\W*$")
def _getSentenceBoundaries (sText):
iStart = _zBeginOfParagraph.match(sText).end()
for m in _zEndOfSentence.finditer(sText):
yield (iStart, m.end())
|
|
|
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
|
"set options to default values"
global _dOptions
_dOptions = dict(gc_options.getOptions(_sAppContext))
#### Parsing
_zEndOfSentence = re.compile(r'([.?!:;…][ .?!…»”"’)]+(?=[A-ZÉÈÎÔ])|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
_zEndOfParagraph = re.compile(r"\W*$")
def _getSentenceBoundaries (sText):
iStart = _zBeginOfParagraph.match(sText).end()
for m in _zEndOfSentence.finditer(sText):
yield (iStart, m.end())
|