Grammalecte  Diff

Differences From Artifact [ca233af37a]:

To Artifact [52335d206d]:


187
188
189
190
191
192
193
194

195
196
197
198
199
200
201
187
188
189
190
191
192
193

194
195
196
197
198
199
200
201







-
+







    "set options to default values"
    global _dOptions
    _dOptions = dict(gc_options.getOptions(_sAppContext))


#### Parsing

_zEndOfSentence = re.compile(r'([.?!:;…][   .?!…»”"’)]+(?=[A-ZÉÈÎÔ])|.$)')
_zEndOfSentence = re.compile(r'([.?!:;…][   .?!…»«“”"‘’)]+(?=[A-ZÉÈÎÔ])|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
_zEndOfParagraph = re.compile(r"\W*$")

def _getSentenceBoundaries (sText):
    iStart = _zBeginOfParagraph.match(sText).end()
    for m in _zEndOfSentence.finditer(sText):
        yield (iStart, m.end())