Grammalecte  Check-in [7a0ff6e88c]

Overview
Comment:[core] gc engine: move getSentenceBoundaries
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: 7a0ff6e88c5ddbf1edf874056e941fb905a50fdc39628009a14a26e3d14e2080
User & Date: olr on 2018-06-20 08:27:35
Other Links: branch diff | manifest | tags
Context
2018-06-20
09:18
[fr] faux positif: ne + negadv check-in: 1e6fb17431 user: olr tags: fr, rg
08:27
[core] gc engine: move getSentenceBoundaries check-in: 7a0ff6e88c user: olr tags: core, rg
08:25
[core] gc engine: move plugins code check-in: 886177ba3e user: olr tags: core, rg
Changes

Modified gc_core/py/lang_core/gc_engine.py from [8ff23d9ea2] to [5d267395dd].

98
99
100
101
102
103
104











105
106
107
108
109
110
111
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122







+
+
+
+
+
+
+
+
+
+
+







                    aRule[0] = re.compile(aRule[0])
                except:
                    echo("Bad regular expression in # " + str(aRule[2]))
                    aRule[0] = "(?i)<Grammalecte>"


#### Parsing

_zEndOfSentence = re.compile(r'([.?!:;…][ .?!… »”")]*|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
_zEndOfParagraph = re.compile(r"\W*$")

def _getSentenceBoundaries (sText):
    iStart = _zBeginOfParagraph.match(sText).end()
    for m in _zEndOfSentence.finditer(sText):
        yield (iStart, m.end())
        iStart = m.end()


def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
    "analyses the paragraph sText and returns list of errors"
    #sText = unicodedata.normalize("NFC", sText)
    aErrors = None
    sRealText = sText
    dPriority = {}  # Key = position; value = priority
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
148
149
150
151
152
153
154











155
156
157
158
159
160
161







-
-
-
-
-
-
-
-
-
-
-







                oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart)
                _, errs = _proofread(oSentence, sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
                aErrors.update(errs)
            except:
                raise
    return aErrors.values() # this is a view (iterable)


_zEndOfSentence = re.compile(r'([.?!:;…][ .?!… »”")]*|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
_zEndOfParagraph = re.compile(r"\W*$")

def _getSentenceBoundaries (sText):
    iStart = _zBeginOfParagraph.match(sText).end()
    for m in _zEndOfSentence.finditer(sText):
        yield (iStart, m.end())
        iStart = m.end()


def _proofread (oSentence, s, sx, nOffset, bParagraph, dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext):
    dErrs = {}
    bParagraphChange = False
    bSentenceChange = False
    dTokenPos = oSentence.dTokenPos if oSentence else {}
    for sOption, lRuleGroup in _getRules(bParagraph):