98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
|
+
+
+
+
+
+
+
+
+
+
+
|
aRule[0] = re.compile(aRule[0])
except:
echo("Bad regular expression in # " + str(aRule[2]))
aRule[0] = "(?i)<Grammalecte>"
#### Parsing
_zEndOfSentence = re.compile(r'([.?!:;…][ .?!… »”")]*|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
_zEndOfParagraph = re.compile(r"\W*$")
def _getSentenceBoundaries (sText):
iStart = _zBeginOfParagraph.match(sText).end()
for m in _zEndOfSentence.finditer(sText):
yield (iStart, m.end())
iStart = m.end()
def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
"analyses the paragraph sText and returns list of errors"
#sText = unicodedata.normalize("NFC", sText)
aErrors = None
sRealText = sText
dPriority = {} # Key = position; value = priority
|
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
|
148
149
150
151
152
153
154
155
156
157
158
159
160
161
|
-
-
-
-
-
-
-
-
-
-
-
|
oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart)
_, errs = _proofread(oSentence, sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
aErrors.update(errs)
except:
raise
return aErrors.values() # this is a view (iterable)
_zEndOfSentence = re.compile(r'([.?!:;…][ .?!… »”")]*|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
_zEndOfParagraph = re.compile(r"\W*$")
def _getSentenceBoundaries (sText):
iStart = _zBeginOfParagraph.match(sText).end()
for m in _zEndOfSentence.finditer(sText):
yield (iStart, m.end())
iStart = m.end()
def _proofread (oSentence, s, sx, nOffset, bParagraph, dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext):
dErrs = {}
bParagraphChange = False
bSentenceChange = False
dTokenPos = oSentence.dTokenPos if oSentence else {}
for sOption, lRuleGroup in _getRules(bParagraph):
|