Grammalecte  Diff

Differences From Artifact [b546c7e179]:

To Artifact [c8470c3c39]:


1
2
3
4
5
6
7
8
9
10
11
12



13
14
15
16
17
18
19
"""
Grammalecte
Grammar checker engine
"""

import re
import traceback
#import unicodedata
from itertools import chain

from ..graphspell.spellchecker import SpellChecker
from ..graphspell.echo import echo



from . import gc_options

try:
    # LibreOffice / OpenOffice
    from com.sun.star.linguistic2 import SingleProofreadingError
    from com.sun.star.text.TextMarkupType import PROOFREADING
    from com.sun.star.beans import PropertyValue












>
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
"""
Grammalecte
Grammar checker engine
"""

import re
import traceback
#import unicodedata
from itertools import chain

from ..graphspell.spellchecker import SpellChecker
from ..graphspell.echo import echo

from .. import text

from . import gc_options

try:
    # LibreOffice / OpenOffice
    from com.sun.star.linguistic2 import SingleProofreadingError
    from com.sun.star.text.TextMarkupType import PROOFREADING
    from com.sun.star.beans import PropertyValue
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
def resetOptions ():
    "set options to default values"
    global _dOptions
    _dOptions = getDefaultOptions()


#### Parsing

_zEndOfSentence = re.compile(r'([.?!:;…]\W+(?=[A-ZÉÈÎÔ])|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
_zEndOfParagraph = re.compile(r"\W*$")

def _getSentenceBoundaries (sText):
    iStart = _zBeginOfParagraph.match(sText).end()
    for m in _zEndOfSentence.finditer(sText):
        yield (iStart, m.end())
        iStart = m.end()


def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
    "init point to analyze a text"
    oText = TextParser(sText)
    return oText.parse(sCountry, bDebug, dOptions, bContext)









<
<
<
<
<
<
<
<
<
<
<







189
190
191
192
193
194
195











196
197
198
199
200
201
202
def resetOptions ():
    "set options to default values"
    global _dOptions
    _dOptions = getDefaultOptions()


#### Parsing












def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
    "init point to analyze a text"
    oText = TextParser(sText)
    return oText.parse(sCountry, bDebug, dOptions, bContext)


262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
            sText = sText.replace("'", "’")
        if "‑" in sText:
            sText = sText.replace("‑", "-") # nobreakdash
        if "@@" in sText:
            sText = re.sub("@@+", "", sText)

        # parse sentences
        for iStart, iEnd in _getSentenceBoundaries(sText):
            if 4 < (iEnd - iStart) < 2000:
                try:
                    self.sSentence = sText[iStart:iEnd]
                    self.sSentence0 = self.sText0[iStart:iEnd]
                    self.nOffsetWithinParagraph = iStart
                    self.lToken = list(_oTokenizer.genTokens(self.sSentence, True))
                    self.dTokenPos = { dToken["nStart"]: dToken  for dToken in self.lToken  if dToken["sType"] != "INFO" }







|







254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
            sText = sText.replace("'", "’")
        if "‑" in sText:
            sText = sText.replace("‑", "-") # nobreakdash
        if "@@" in sText:
            sText = re.sub("@@+", "", sText)

        # parse sentences
        for iStart, iEnd in text.getSentenceBoundaries(sText):
            if 4 < (iEnd - iStart) < 2000:
                try:
                    self.sSentence = sText[iStart:iEnd]
                    self.sSentence0 = self.sText0[iStart:iEnd]
                    self.nOffsetWithinParagraph = iStart
                    self.lToken = list(_oTokenizer.genTokens(self.sSentence, True))
                    self.dTokenPos = { dToken["nStart"]: dToken  for dToken in self.lToken  if dToken["sType"] != "INFO" }