︙ | | |
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
+
+
+
+
+
+
+
+
+
+
|
from ..graphspell.spellchecker import SpellChecker
from ..graphspell.echo import echo
from . import gc_options
from ..graphspell.tokenizer import Tokenizer
from .gc_rules_graph import dGraph, dRule
try:
# LibreOffice / OpenOffice
from com.sun.star.linguistic2 import SingleProofreadingError
from com.sun.star.text.TextMarkupType import PROOFREADING
from com.sun.star.beans import PropertyValue
#import lightproof_handler_${implname} as opt
_bWriterError = True
except ImportError:
_bWriterError = False
__all__ = [ "lang", "locales", "pkg", "name", "version", "author", \
"load", "parse", "getSpellChecker", \
"setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \
"ignoreRule", "resetIgnoreRules", "reactivateRule", "listRules", "displayRules" ]
__version__ = "${version}"
|
︙ | | |
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
|
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
|
+
+
+
+
+
+
+
+
+
+
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
author = "${author}"
_rules = None # module gc_rules
# data
_sAppContext = "" # what software is running
_dOptions = None
_oSpellChecker = None
_oTokenizer = None
_aIgnoredRules = set()
# functions
_createRegexError = None
#### Initialization
def load (sContext="Python"):
_oSpellChecker = None
_oTokenizer = None
global _oSpellChecker
global _sAppContext
global _dOptions
global _oTokenizer
global _createRegexError
global _createTokenError
try:
_oSpellChecker = SpellChecker("${lang}", "${dic_main_filename_py}", "${dic_extended_filename_py}", "${dic_community_filename_py}", "${dic_personal_filename_py}")
_sAppContext = sContext
_dOptions = dict(gc_options.getOptions(sContext)) # duplication necessary, to be able to reset to default
_oTokenizer = _oSpellChecker.getTokenizer()
_oSpellChecker.activateStorage()
_createRegexError = _createRegexWriterError if _bWriterError else _createRegexDictError
except:
traceback.print_exc()
def _getRules (bParagraph):
try:
if not bParagraph:
return _rules.lSentenceRules
return _rules.lParagraphRules
except:
_loadRules()
if not bParagraph:
return _rules.lSentenceRules
return _rules.lParagraphRules
def _loadRules ():
from . import gc_rules
global _rules
_rules = gc_rules
# compile rules regex
for lRuleGroup in chain(_rules.lParagraphRules, _rules.lSentenceRules):
for rule in lRuleGroup[1]:
try:
rule[0] = re.compile(rule[0])
except:
echo("Bad regular expression in # " + str(rule[2]))
rule[0] = "(?i)<Grammalecte>"
#### Parsing
def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
"analyses the paragraph sText and returns list of errors"
#sText = unicodedata.normalize("NFC", sText)
|
︙ | | |
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
|
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
|
-
-
+
-
-
|
xErr.aSuggestions = ()
else:
if bUppercase and m.group(iGroup)[0:1].isupper():
xErr.aSuggestions = tuple(map(str.capitalize, m.expand(sRepl).split("|")))
else:
xErr.aSuggestions = tuple(m.expand(sRepl).split("|"))
# Message
if sMsg[0:1] == "=":
sMessage = globals()[sMsg[1:]](s, m)
sMessage = globals()[sMsg[1:]](s, m) if sMsg[0:1] == "=" else m.expand(sMsg)
else:
sMessage = m.expand(sMsg)
xErr.aShortComment = sMessage # sMessage.split("|")[0] # in context menu
xErr.aFullComment = sMessage # sMessage.split("|")[-1] # in dialog
if bShowRuleId:
xErr.aShortComment += " # " + sLineId + " # " + sRuleId
# URL
if sURL:
p = PropertyValue()
|
︙ | | |
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
|
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
|
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
|
dErr["aSuggestions"] = []
else:
if bUppercase and m.group(iGroup)[0:1].isupper():
dErr["aSuggestions"] = list(map(str.capitalize, m.expand(sRepl).split("|")))
else:
dErr["aSuggestions"] = m.expand(sRepl).split("|")
# Message
if sMsg[0:1] == "=":
sMessage = globals()[sMsg[1:]](s, m)
else:
sMessage = m.expand(sMsg)
dErr["sMessage"] = sMessage
dErr["sMessage"] = globals()[sMsg[1:]](s, m) if sMsg[0:1] == "=" else m.expand(sMsg)
if bShowRuleId:
dErr["sMessage"] += " # " + sLineId + " # " + sRuleId
# URL
dErr["URL"] = sURL if sURL else ""
# Context
if bContext:
dErr['sUnderlined'] = sx[m.start(iGroup):m.end(iGroup)]
dErr['sBefore'] = sx[max(0,m.start(iGroup)-80):m.start(iGroup)]
dErr['sAfter'] = sx[m.end(iGroup):m.end(iGroup)+80]
return dErr
def _createTokenWriterError (lToken, sSentence, sSentence0, sRepl, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
"error for Writer (LO/OO)"
xErr = SingleProofreadingError()
#xErr = uno.createUnoStruct( "com.sun.star.linguistic2.SingleProofreadingError" )
xErr.nErrorStart = nStart
xErr.nErrorLength = nEnd - nStart
xErr.nErrorType = PROOFREADING
xErr.aRuleIdentifier = sRuleId
# suggestions
if sRepl[0:1] == "=":
sSugg = globals()[sRepl[1:]](lToken)
if sSugg:
if bUppercase and lToken[iFirstToken]["sValue"][0:1].isupper():
xErr.aSuggestions = tuple(map(str.capitalize, sSugg.split("|")))
else:
xErr.aSuggestions = tuple(sSugg.split("|"))
else:
xErr.aSuggestions = ()
elif sRepl == "_":
xErr.aSuggestions = ()
else:
if bUppercase and lToken[iFirstToken]["sValue"][0:1].isupper():
xErr.aSuggestions = tuple(map(str.capitalize, sRepl.split("|")))
else:
xErr.aSuggestions = tuple(sRepl.split("|"))
# Message
if sMsg[0:1] == "=":
sMessage = globals()[sMsg[1:]](lToken)
else:
sMessage = sMsg
xErr.aShortComment = sMessage # sMessage.split("|")[0] # in context menu
xErr.aFullComment = sMessage # sMessage.split("|")[-1] # in dialog
if bShowRuleId:
xErr.aShortComment += " " + sLineId + " # " + sRuleId
# URL
if sURL:
p = PropertyValue()
p.Name = "FullCommentURL"
p.Value = sURL
xErr.aProperties = (p,)
else:
xErr.aProperties = ()
return xErr
def _createTokenDictError (lToken, sSentence, sSentence0, sRepl, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
"error as a dictionary"
dErr = {}
dErr["nStart"] = nStart
dErr["nEnd"] = nEnd
dErr["sLineId"] = sLineId
dErr["sRuleId"] = sRuleId
dErr["sType"] = sOption if sOption else "notype"
# suggestions
if sRepl[0:1] == "=":
sugg = globals()[sRepl[1:]](lToken)
if sugg:
if bUppercase and lToken[iFirstToken]["sValue"][0:1].isupper():
dErr["aSuggestions"] = list(map(str.capitalize, sugg.split("|")))
else:
dErr["aSuggestions"] = sugg.split("|")
else:
dErr["aSuggestions"] = []
elif sRepl == "_":
dErr["aSuggestions"] = []
else:
if bUppercase and lToken[iFirstToken]["sValue"][0:1].isupper():
dErr["aSuggestions"] = list(map(str.capitalize, sRepl.split("|")))
else:
dErr["aSuggestions"] = sRepl.split("|")
# Message
if sMsg[0:1] == "=":
sMessage = globals()[sMsg[1:]](lToken)
else:
sMessage = sMsg
dErr["sMessage"] = sMessage
if bShowRuleId:
dErr["sMessage"] += " " + sLineId + " # " + sRuleId
# URL
dErr["URL"] = sURL if sURL else ""
# Context
if bContext:
dErr['sUnderlined'] = sSentence0[dErr["nStart"]:dErr["nEnd"]]
dErr['sBefore'] = sSentence0[max(0,dErr["nStart"]-80):dErr["nStart"]]
dErr['sAfter'] = sSentence0[dErr["nEnd"]:dErr["nEnd"]+80]
return dErr
def _rewrite (sSentence, sRepl, iGroup, m, bUppercase):
"text processor: write <sRepl> in <sSentence> at <iGroup> position"
nLen = m.end(iGroup) - m.start(iGroup)
if sRepl == "*":
sNew = " " * nLen
elif sRepl == "_":
|
︙ | | |
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
|
327
328
329
330
331
332
333
334
335
336
337
338
339
340
|
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
|
def displayRules (sFilter=None):
echo("List of rules. Filter: << " + str(sFilter) + " >>")
for sOption, sLineId, sRuleId in listRules(sFilter):
echo("{:<10} {:<10} {}".format(sOption, sLineId, sRuleId))
#### init
try:
# LibreOffice / OpenOffice
from com.sun.star.linguistic2 import SingleProofreadingError
from com.sun.star.text.TextMarkupType import PROOFREADING
from com.sun.star.beans import PropertyValue
#import lightproof_handler_${implname} as opt
_createRegexError = _createRegexWriterError
_createTokenError = _createTokenWriterError
except ImportError:
_createRegexError = _createRegexDictError
_createTokenError = _createTokenDictError
def load (sContext="Python"):
global _oSpellChecker
global _sAppContext
global _dOptions
global _oTokenizer
try:
_oSpellChecker = SpellChecker("${lang}", "${dic_main_filename_py}", "${dic_extended_filename_py}", "${dic_community_filename_py}", "${dic_personal_filename_py}")
_sAppContext = sContext
_dOptions = dict(gc_options.getOptions(sContext)) # duplication necessary, to be able to reset to default
_oTokenizer = _oSpellChecker.getTokenizer()
_oSpellChecker.activateStorage()
except:
traceback.print_exc()
def setOption (sOpt, bVal):
if sOpt in _dOptions:
_dOptions[sOpt] = bVal
def setOptions (dOpt):
|
︙ | | |
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
|
365
366
367
368
369
370
371
372
373
374
375
376
377
378
|
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
|
global _dOptions
_dOptions = dict(gc_options.getOptions(_sAppContext))
def getSpellChecker ():
return _oSpellChecker
def _getRules (bParagraph):
try:
if not bParagraph:
return _rules.lSentenceRules
return _rules.lParagraphRules
except:
_loadRules()
if not bParagraph:
return _rules.lSentenceRules
return _rules.lParagraphRules
def _loadRules ():
from . import gc_rules
global _rules
_rules = gc_rules
# compile rules regex
for lRuleGroup in chain(_rules.lParagraphRules, _rules.lSentenceRules):
for rule in lRuleGroup[1]:
try:
rule[0] = re.compile(rule[0])
except:
echo("Bad regular expression in # " + str(rule[2]))
rule[0] = "(?i)<Grammalecte>"
def _getPath ():
return os.path.join(os.path.dirname(sys.modules[__name__].__file__), __name__ + ".py")
#### common functions
|
︙ | | |
669
670
671
672
673
674
675
676
677
678
679
680
681
682
|
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
|
+
|
class TokenSentence:
def __init__ (self, sSentence, sSentence0, nOffset):
self.sSentence = sSentence
self.sSentence0 = sSentence0
self.nOffset = nOffset
self.lToken = list(_oTokenizer.genTokens(sSentence, True))
self.createError = self._createWriterError if _bWriterError else self._createDictError
def _getNextMatchingNodes (self, dToken, dNode):
"generator: return nodes where <dToken> “values” match <dNode> arcs"
# token value
if dToken["sValue"] in dNode:
#print("value found: ", dToken["sValue"])
yield dGraph[dNode[dToken["sValue"]]]
|
︙ | | |
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
|
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
|
-
+
|
# grammar error
print("-")
nTokenErrorStart = nTokenOffset + eAct[0]
nTokenErrorEnd = nTokenOffset + eAct[1]
nErrorStart = self.nOffset + self.lToken[nTokenErrorStart]["nStart"]
nErrorEnd = self.nOffset + self.lToken[nTokenErrorEnd]["nEnd"]
if nErrorStart not in dErrs or eAct[2] > dPriority[nErrorStart]:
dErrs[nErrorStart] = _createTokenError(self.lToken, self.sSentence, self.sSentence0, sWhat, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, True, eAct[3], eAct[4], bShowRuleId, "notype", bContext)
dErrs[nErrorStart] = self.createError(sWhat, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, True, eAct[3], eAct[4], bShowRuleId, "notype", bContext)
dPriority[nErrorStart] = eAct[2]
elif cActionType == "~":
# text processor
print("~")
self._tagAndPrepareTokenForRewriting(sWhat, nTokenOffset + eAct[0], nTokenOffset + eAct[1])
bChange = True
elif cActionType == "=":
|
︙ | | |
792
793
794
795
796
797
798
799
800
801
802
803
804
805
|
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
|
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
else:
print("# error: unknown action at " + sLineId)
elif cActionType == ">":
break
except Exception as e:
raise Exception(str(e), sLineId)
return bChange, dErrs
def _createWriterError (self, sRepl, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
"error for Writer (LO/OO)"
xErr = SingleProofreadingError()
#xErr = uno.createUnoStruct( "com.sun.star.linguistic2.SingleProofreadingError" )
xErr.nErrorStart = nStart
xErr.nErrorLength = nEnd - nStart
xErr.nErrorType = PROOFREADING
xErr.aRuleIdentifier = sRuleId
# suggestions
if sRepl[0:1] == "=":
sSugg = globals()[sRepl[1:]](self.lToken)
if sSugg:
if bUppercase and self.lToken[iFirstToken]["sValue"][0:1].isupper():
xErr.aSuggestions = tuple(map(str.capitalize, sSugg.split("|")))
else:
xErr.aSuggestions = tuple(sSugg.split("|"))
else:
xErr.aSuggestions = ()
elif sRepl == "_":
xErr.aSuggestions = ()
else:
if bUppercase and self.lToken[iFirstToken]["sValue"][0:1].isupper():
xErr.aSuggestions = tuple(map(str.capitalize, sRepl.split("|")))
else:
xErr.aSuggestions = tuple(sRepl.split("|"))
# Message
sMessage = globals()[sMsg[1:]](self.lToken) if sMsg[0:1] == "=" else sMsg
xErr.aShortComment = sMessage # sMessage.split("|")[0] # in context menu
xErr.aFullComment = sMessage # sMessage.split("|")[-1] # in dialog
if bShowRuleId:
xErr.aShortComment += " " + sLineId + " # " + sRuleId
# URL
if sURL:
p = PropertyValue()
p.Name = "FullCommentURL"
p.Value = sURL
xErr.aProperties = (p,)
else:
xErr.aProperties = ()
return xErr
def _createDictError (self, sRepl, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
"error as a dictionary"
dErr = {}
dErr["nStart"] = nStart
dErr["nEnd"] = nEnd
dErr["sLineId"] = sLineId
dErr["sRuleId"] = sRuleId
dErr["sType"] = sOption if sOption else "notype"
# suggestions
if sRepl[0:1] == "=":
sugg = globals()[sRepl[1:]](self.lToken)
if sugg:
if bUppercase and self.lToken[iFirstToken]["sValue"][0:1].isupper():
dErr["aSuggestions"] = list(map(str.capitalize, sugg.split("|")))
else:
dErr["aSuggestions"] = sugg.split("|")
else:
dErr["aSuggestions"] = []
elif sRepl == "_":
dErr["aSuggestions"] = []
else:
if bUppercase and self.lToken[iFirstToken]["sValue"][0:1].isupper():
dErr["aSuggestions"] = list(map(str.capitalize, sRepl.split("|")))
else:
dErr["aSuggestions"] = sRepl.split("|")
# Message
dErr["sMessage"] = globals()[sMsg[1:]](self.lToken) if sMsg[0:1] == "=" else sMsg
if bShowRuleId:
dErr["sMessage"] += " " + sLineId + " # " + sRuleId
# URL
dErr["URL"] = sURL if sURL else ""
# Context
if bContext:
dErr['sUnderlined'] = self.sSentence0[dErr["nStart"]:dErr["nEnd"]]
dErr['sBefore'] = self.sSentence0[max(0,dErr["nStart"]-80):dErr["nStart"]]
dErr['sAfter'] = self.sSentence0[dErr["nEnd"]:dErr["nEnd"]+80]
return dErr
def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, bUppercase=True):
"text processor: rewrite tokens between <nTokenRewriteStart> and <nTokenRewriteEnd> position"
if sWhat == "*":
# purge text
if nTokenRewriteEnd - nTokenRewriteStart == 0:
self.lToken[nTokenRewriteStart]["bToRemove"] = True
|
︙ | | |
843
844
845
846
847
848
849
850
851
852
853
854
855
856
|
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
|
+
|
nDiffLen = len(dToken["sRealValue"]) - len(dToken["sNewValue"])
sNewRepl = (dToken["sNewValue"] + " " * nDiffLen) if nDiffLen >= 0 else dToken["sNewValue"][:len(dToken["sRealValue"])]
self.sSentence = self.sSentence[:self.nOffset+dToken["nStart"]] + sNewRepl + self.sSentence[self.nOffset+dToken["nEnd"]:]
del dToken["sNewValue"]
print(self.sSentence)
self.lToken.clear()
self.lToken = lNewToken
#### Analyse tokens
def g_morph (dToken, sPattern, sNegPattern=""):
"analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies"
if "lMorph" in dToken:
|
︙ | | |