Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -100,10 +100,21 @@ echo("Bad regular expression in # " + str(aRule[2])) aRule[0] = "(?i)" #### Parsing + +_zEndOfSentence = re.compile(r'([.?!:;…][ .?!… »”")]*|.$)') +_zBeginOfParagraph = re.compile(r"^\W*") +_zEndOfParagraph = re.compile(r"\W*$") + +def _getSentenceBoundaries (sText): + iStart = _zBeginOfParagraph.match(sText).end() + for m in _zEndOfSentence.finditer(sText): + yield (iStart, m.end()) + iStart = m.end() + def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False): "analyses the paragraph sText and returns list of errors" #sText = unicodedata.normalize("NFC", sText) aErrors = None @@ -139,21 +150,10 @@ aErrors.update(errs) except: raise return aErrors.values() # this is a view (iterable) - -_zEndOfSentence = re.compile(r'([.?!:;…][ .?!… »”")]*|.$)') -_zBeginOfParagraph = re.compile(r"^\W*") -_zEndOfParagraph = re.compile(r"\W*$") - -def _getSentenceBoundaries (sText): - iStart = _zBeginOfParagraph.match(sText).end() - for m in _zEndOfSentence.finditer(sText): - yield (iStart, m.end()) - iStart = m.end() - def _proofread (oSentence, s, sx, nOffset, bParagraph, dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext): dErrs = {} bParagraphChange = False bSentenceChange = False