Grammalecte  Diff

Differences From Artifact [20746fba5d]:

To Artifact [621c240c96]:


187
188
189
190
191
192
193
194

195
196
197
198
199
200
201
187
188
189
190
191
192
193

194
195
196
197
198
199
200
201







-
+







    "set options to default values"
    global _dOptions
    _dOptions = dict(gc_options.getOptions(_sAppContext))


#### Parsing

_zEndOfSentence = re.compile(r'([.?!:;…][ .?!… »”")]*|.$)')
_zEndOfSentence = re.compile(r'([.?!:;…][   .?!… »”")]+|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
_zEndOfParagraph = re.compile(r"\W*$")

def _getSentenceBoundaries (sText):
    iStart = _zBeginOfParagraph.match(sText).end()
    for m in _zEndOfSentence.finditer(sText):
        yield (iStart, m.end())