1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
|
"""
Grammalecte
Grammar checker engine
"""
import re
import traceback
#import unicodedata
from itertools import chain
from ..graphspell.spellchecker import SpellChecker
from ..graphspell.echo import echo
from . import gc_options
try:
# LibreOffice / OpenOffice
from com.sun.star.linguistic2 import SingleProofreadingError
from com.sun.star.text.TextMarkupType import PROOFREADING
from com.sun.star.beans import PropertyValue
|
>
>
>
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
|
"""
Grammalecte
Grammar checker engine
"""
import re
import traceback
#import unicodedata
from itertools import chain
from ..graphspell.spellchecker import SpellChecker
from ..graphspell.echo import echo
from .. import text
from . import gc_options
try:
# LibreOffice / OpenOffice
from com.sun.star.linguistic2 import SingleProofreadingError
from com.sun.star.text.TextMarkupType import PROOFREADING
from com.sun.star.beans import PropertyValue
|
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
|
def resetOptions ():
"set options to default values"
global _dOptions
_dOptions = getDefaultOptions()
#### Parsing
_zEndOfSentence = re.compile(r'([.?!:;…]\W+(?=[A-ZÉÈÎÔ])|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
_zEndOfParagraph = re.compile(r"\W*$")
def _getSentenceBoundaries (sText):
iStart = _zBeginOfParagraph.match(sText).end()
for m in _zEndOfSentence.finditer(sText):
yield (iStart, m.end())
iStart = m.end()
def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
"init point to analyze a text"
oText = TextParser(sText)
return oText.parse(sCountry, bDebug, dOptions, bContext)
|
<
<
<
<
<
<
<
<
<
<
<
|
189
190
191
192
193
194
195
196
197
198
199
200
201
202
|
def resetOptions ():
"set options to default values"
global _dOptions
_dOptions = getDefaultOptions()
#### Parsing
def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
"init point to analyze a text"
oText = TextParser(sText)
return oText.parse(sCountry, bDebug, dOptions, bContext)
|
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
|
sText = sText.replace("'", "’")
if "‑" in sText:
sText = sText.replace("‑", "-") # nobreakdash
if "@@" in sText:
sText = re.sub("@@+", "", sText)
# parse sentences
for iStart, iEnd in _getSentenceBoundaries(sText):
if 4 < (iEnd - iStart) < 2000:
try:
self.sSentence = sText[iStart:iEnd]
self.sSentence0 = self.sText0[iStart:iEnd]
self.nOffsetWithinParagraph = iStart
self.lToken = list(_oTokenizer.genTokens(self.sSentence, True))
self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken if dToken["sType"] != "INFO" }
|
|
|
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
|
sText = sText.replace("'", "’")
if "‑" in sText:
sText = sText.replace("‑", "-") # nobreakdash
if "@@" in sText:
sText = re.sub("@@+", "", sText)
# parse sentences
for iStart, iEnd in text.getSentenceBoundaries(sText):
if 4 < (iEnd - iStart) < 2000:
try:
self.sSentence = sText[iStart:iEnd]
self.sSentence0 = self.sText0[iStart:iEnd]
self.nOffsetWithinParagraph = iStart
self.lToken = list(_oTokenizer.genTokens(self.sSentence, True))
self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken if dToken["sType"] != "INFO" }
|