8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
#import unicodedata
from itertools import chain
from ..graphspell.spellchecker import SpellChecker
from ..graphspell.echo import echo
from . import gc_options
__all__ = [ "lang", "locales", "pkg", "name", "version", "author", \
"load", "parse", "getSpellChecker", \
"setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \
"ignoreRule", "resetIgnoreRules", "reactivateRule", "listRules", "displayRules" ]
__version__ = "${version}"
|
>
>
>
|
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
#import unicodedata
from itertools import chain
from ..graphspell.spellchecker import SpellChecker
from ..graphspell.echo import echo
from . import gc_options
from ..graphspell.tokenizer import Tokenizer
from .gc_rules_graph import dGraph
__all__ = [ "lang", "locales", "pkg", "name", "version", "author", \
"load", "parse", "getSpellChecker", \
"setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \
"ignoreRule", "resetIgnoreRules", "reactivateRule", "listRules", "displayRules" ]
__version__ = "${version}"
|
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
|
_rules = None # module gc_rules
# data
_sAppContext = "" # what software is running
_dOptions = None
_aIgnoredRules = set()
_oSpellChecker = None
_dAnalyses = {} # cache for data from dictionary
#### Parsing
def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
"analyses the paragraph sText and returns list of errors"
#sText = unicodedata.normalize("NFC", sText)
aErrors = None
sAlt = sText
dDA = {} # Disambiguisator. Key = position; value = list of morphologies
dPriority = {} # Key = position; value = priority
dOpt = _dOptions if not dOptions else dOptions
# parse paragraph
try:
sNew, aErrors = _proofread(sText, sAlt, 0, True, dDA, dPriority, sCountry, dOpt, bDebug, bContext)
if sNew:
sText = sNew
except:
raise
# cleanup
if " " in sText:
sText = sText.replace(" ", ' ') # nbsp
if " " in sText:
sText = sText.replace(" ", ' ') # nnbsp
if "'" in sText:
sText = sText.replace("'", "’")
if "‑" in sText:
sText = sText.replace("‑", "-") # nobreakdash
# parse sentences
for iStart, iEnd in _getSentenceBoundaries(sText):
if 4 < (iEnd - iStart) < 2000:
dDA.clear()
try:
_, errs = _proofread(sText[iStart:iEnd], sAlt[iStart:iEnd], iStart, False, dDA, dPriority, sCountry, dOpt, bDebug, bContext)
aErrors.update(errs)
except:
raise
return aErrors.values() # this is a view (iterable)
def _getSentenceBoundaries (sText):
iStart = _zBeginOfParagraph.match(sText).end()
|
<
|
|
|
>
|
>
>
>
|
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
|
_rules = None # module gc_rules
# data
_sAppContext = "" # what software is running
_dOptions = None
_aIgnoredRules = set()
_oSpellChecker = None
_oTokenizer = None
#### Parsing
def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
"analyses the paragraph sText and returns list of errors"
#sText = unicodedata.normalize("NFC", sText)
aErrors = None
sRealText = sText
dDA = {} # Disambiguisator. Key = position; value = list of morphologies
dPriority = {} # Key = position; value = priority
dOpt = _dOptions if not dOptions else dOptions
# parse paragraph
try:
sNew, aErrors = _proofread(sText, sRealText, 0, True, dDA, dPriority, sCountry, dOpt, bDebug, bContext)
if sNew:
sText = sNew
except:
raise
# cleanup
if " " in sText:
sText = sText.replace(" ", ' ') # nbsp
if " " in sText:
sText = sText.replace(" ", ' ') # nnbsp
if "'" in sText:
sText = sText.replace("'", "’")
if "‑" in sText:
sText = sText.replace("‑", "-") # nobreakdash
# parse sentences
for iStart, iEnd in _getSentenceBoundaries(sText):
if 4 < (iEnd - iStart) < 2000:
dDA.clear()
try:
# regex parser
_, errs = _proofread(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dDA, dPriority, sCountry, dOpt, bDebug, bContext)
aErrors.update(errs)
# token parser
oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, dPriority, sCountry, dOpt, bDebug, bContext)
oSentence.parse()
except:
raise
return aErrors.values() # this is a view (iterable)
def _getSentenceBoundaries (sText):
iStart = _zBeginOfParagraph.match(sText).end()
|
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
|
_createError = _createDictError
def load (sContext="Python"):
global _oSpellChecker
global _sAppContext
global _dOptions
try:
_oSpellChecker = SpellChecker("${lang}", "${dic_main_filename_py}", "${dic_extended_filename_py}", "${dic_community_filename_py}", "${dic_personal_filename_py}")
_sAppContext = sContext
_dOptions = dict(gc_options.getOptions(sContext)) # duplication necessary, to be able to reset to default
except:
traceback.print_exc()
def setOption (sOpt, bVal):
if sOpt in _dOptions:
_dOptions[sOpt] = bVal
|
>
>
>
|
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
|
_createError = _createDictError
def load (sContext="Python"):
global _oSpellChecker
global _sAppContext
global _dOptions
global _oTokenizer
try:
_oSpellChecker = SpellChecker("${lang}", "${dic_main_filename_py}", "${dic_extended_filename_py}", "${dic_community_filename_py}", "${dic_personal_filename_py}")
_sAppContext = sContext
_dOptions = dict(gc_options.getOptions(sContext)) # duplication necessary, to be able to reset to default
_oTokenizer = _oSpellChecker.getTokenizer()
_oSpellChecker.activateStorage()
except:
traceback.print_exc()
def setOption (sOpt, bVal):
if sOpt in _dOptions:
_dOptions[sOpt] = bVal
|
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
|
return os.path.join(os.path.dirname(sys.modules[__name__].__file__), __name__ + ".py")
#### common functions
# common regexes
_zEndOfSentence = re.compile('([.?!:;…][ .?!… »”")]*|.$)')
_zBeginOfParagraph = re.compile("^\W*")
_zEndOfParagraph = re.compile("\W*$")
_zNextWord = re.compile(" +(\w[\w-]*)")
_zPrevWord = re.compile("(\w[\w-]*) +$")
def option (sOpt):
"return True if option sOpt is active"
return _dOptions.get(sOpt, False)
def displayInfo (dDA, tWord):
"for debugging: retrieve info of word"
if not tWord:
echo("> nothing to find")
return True
if tWord[1] not in _dAnalyses and not _storeMorphFromFSA(tWord[1]):
echo("> not in FSA")
return True
if tWord[0] in dDA:
echo("DA: " + str(dDA[tWord[0]]))
echo("FSA: " + str(_dAnalyses[tWord[1]]))
return True
def _storeMorphFromFSA (sWord):
"retrieves morphologies list from _oSpellChecker -> _dAnalyses"
global _dAnalyses
_dAnalyses[sWord] = _oSpellChecker.getMorph(sWord)
return True if _dAnalyses[sWord] else False
def morph (dDA, tWord, sPattern, bStrict=True, bNoWord=False):
"analyse a tuple (position, word), return True if sPattern in morphologies (disambiguation on)"
if not tWord:
return bNoWord
if tWord[1] not in _dAnalyses and not _storeMorphFromFSA(tWord[1]):
return False
lMorph = dDA[tWord[0]] if tWord[0] in dDA else _dAnalyses[tWord[1]]
if not lMorph:
return False
p = re.compile(sPattern)
if bStrict:
return all(p.search(s) for s in lMorph)
return any(p.search(s) for s in lMorph)
def morphex (dDA, tWord, sPattern, sNegPattern, bNoWord=False):
"analyse a tuple (position, word), returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)"
if not tWord:
return bNoWord
if tWord[1] not in _dAnalyses and not _storeMorphFromFSA(tWord[1]):
return False
lMorph = dDA[tWord[0]] if tWord[0] in dDA else _dAnalyses[tWord[1]]
# check negative condition
np = re.compile(sNegPattern)
if any(np.search(s) for s in lMorph):
return False
# search sPattern
p = re.compile(sPattern)
return any(p.search(s) for s in lMorph)
def analyse (sWord, sPattern, bStrict=True):
"analyse a word, return True if sPattern in morphologies (disambiguation off)"
if sWord not in _dAnalyses and not _storeMorphFromFSA(sWord):
return False
if not _dAnalyses[sWord]:
return False
p = re.compile(sPattern)
if bStrict:
return all(p.search(s) for s in _dAnalyses[sWord])
return any(p.search(s) for s in _dAnalyses[sWord])
def analysex (sWord, sPattern, sNegPattern):
"analyse a word, returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation off)"
if sWord not in _dAnalyses and not _storeMorphFromFSA(sWord):
return False
# check negative condition
np = re.compile(sNegPattern)
if any(np.search(s) for s in _dAnalyses[sWord]):
return False
# search sPattern
p = re.compile(sPattern)
return any(p.search(s) for s in _dAnalyses[sWord])
def stem (sWord):
"returns a list of sWord's stems"
if not sWord:
return []
if sWord not in _dAnalyses and not _storeMorphFromFSA(sWord):
return []
return [ s[1:s.find(" ")] for s in _dAnalyses[sWord] ]
## functions to get text outside pattern scope
# warning: check compile_rules.py to understand how it works
def nextword (s, iStart, n):
|
|
|
|
|
|
|
>
|
|
<
<
<
<
<
<
<
<
<
|
|
>
<
|
<
|
|
|
|
>
|
|
<
<
<
<
<
<
<
<
|
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
|
return os.path.join(os.path.dirname(sys.modules[__name__].__file__), __name__ + ".py")
#### common functions
# common regexes
_zEndOfSentence = re.compile(r'([.?!:;…][ .?!… »”")]*|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
_zEndOfParagraph = re.compile(r"\W*$")
_zNextWord = re.compile(r" +(\w[\w-]*)")
_zPrevWord = re.compile(r"(\w[\w-]*) +$")
def option (sOpt):
"return True if option sOpt is active"
return _dOptions.get(sOpt, False)
def displayInfo (dDA, tWord):
"for debugging: retrieve info of word"
if not tWord:
echo("> nothing to find")
return True
lMorph = _oSpellChecker.getMorph(tWord[1])
if not lMorph:
echo("> not in dictionary")
return True
if tWord[0] in dDA:
echo("DA: " + str(dDA[tWord[0]]))
echo("FSA: " + str(lMorph))
return True
def morph (dDA, tWord, sPattern, bStrict=True, bNoWord=False):
"analyse a tuple (position, word), return True if sPattern in morphologies (disambiguation on)"
if not tWord:
return bNoWord
lMorph = dDA[tWord[0]] if tWord[0] in dDA else _oSpellChecker.getMorph(tWord[1])
if not lMorph:
return False
p = re.compile(sPattern)
if bStrict:
return all(p.search(s) for s in lMorph)
return any(p.search(s) for s in lMorph)
def morphex (dDA, tWord, sPattern, sNegPattern, bNoWord=False):
"analyse a tuple (position, word), returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)"
if not tWord:
return bNoWord
lMorph = dDA[tWord[0]] if tWord[0] in dDA else _oSpellChecker.getMorph(tWord[1])
if not lMorph:
return False
# check negative condition
np = re.compile(sNegPattern)
if any(np.search(s) for s in lMorph):
return False
# search sPattern
p = re.compile(sPattern)
return any(p.search(s) for s in lMorph)
def analyse (sWord, sPattern, bStrict=True):
"analyse a word, return True if sPattern in morphologies (disambiguation off)"
lMorph = _oSpellChecker.getMorph(sWord)
if not lMorph:
return False
p = re.compile(sPattern)
if bStrict:
return all(p.search(s) for s in lMorph)
return any(p.search(s) for s in lMorph)
def analysex (sWord, sPattern, sNegPattern):
"analyse a word, returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation off)"
lMorph = _oSpellChecker.getMorph(sWord)
if not lMorph:
return False
# check negative condition
np = re.compile(sNegPattern)
if any(np.search(s) for s in lMorph):
return False
# search sPattern
p = re.compile(sPattern)
return any(p.search(s) for s in lMorph)
## functions to get text outside pattern scope
# warning: check compile_rules.py to understand how it works
def nextword (s, iStart, n):
|
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
|
#### Disambiguator
def select (dDA, nPos, sWord, sPattern, lDefault=None):
if not sWord:
return True
if nPos in dDA:
return True
if sWord not in _dAnalyses and not _storeMorphFromFSA(sWord):
return True
if len(_dAnalyses[sWord]) == 1:
return True
lSelect = [ sMorph for sMorph in _dAnalyses[sWord] if re.search(sPattern, sMorph) ]
if lSelect:
if len(lSelect) != len(_dAnalyses[sWord]):
dDA[nPos] = lSelect
#echo("= "+sWord+" "+str(dDA.get(nPos, "null")))
elif lDefault:
dDA[nPos] = lDefault
#echo("= "+sWord+" "+str(dDA.get(nPos, "null")))
return True
def exclude (dDA, nPos, sWord, sPattern, lDefault=None):
if not sWord:
return True
if nPos in dDA:
return True
if sWord not in _dAnalyses and not _storeMorphFromFSA(sWord):
return True
if len(_dAnalyses[sWord]) == 1:
return True
lSelect = [ sMorph for sMorph in _dAnalyses[sWord] if not re.search(sPattern, sMorph) ]
if lSelect:
if len(lSelect) != len(_dAnalyses[sWord]):
dDA[nPos] = lSelect
#echo("= "+sWord+" "+str(dDA.get(nPos, "null")))
elif lDefault:
dDA[nPos] = lDefault
#echo("= "+sWord+" "+str(dDA.get(nPos, "null")))
return True
def define (dDA, nPos, lMorph):
dDA[nPos] = lMorph
#echo("= "+str(nPos)+" "+str(dDA[nPos]))
return True
#### GRAMMAR CHECKER PLUGINS
${plugins}
${callables}
|
|
<
|
|
|
<
<
|
<
|
|
|
<
<
<
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
|
#### Disambiguator
def select (dDA, nPos, sWord, sPattern, lDefault=None):
if not sWord:
return True
if nPos in dDA:
return True
lMorph = _oSpellChecker.getMorph(sWord)
if not lMorph or len(lMorph) == 1:
return True
lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ]
if lSelect:
if len(lSelect) != len(lMorph):
dDA[nPos] = lSelect
elif lDefault:
dDA[nPos] = lDefault
return True
def exclude (dDA, nPos, sWord, sPattern, lDefault=None):
if not sWord:
return True
if nPos in dDA:
return True
lMorph = _oSpellChecker.getMorph(sWord)
if not lMorph or len(lMorph) == 1:
return True
lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ]
if lSelect:
if len(lSelect) != len(lMorph):
dDA[nPos] = lSelect
elif lDefault:
dDA[nPos] = lDefault
return True
def define (dDA, nPos, lMorph):
dDA[nPos] = lMorph
return True
#### GRAMMAR CHECKER PLUGINS
${plugins}
#### CALLABLES (generated code)
${callables}
#### TOKEN SENTENCE CHECKER
class TokenSentence:
def __init__ (self, sSentence, sSentence0, iStart, dPriority, sCountry, dOpt, bDebug, bContext):
self.sSentence = sSentence
self.sSentence0 = sSentence0
self.iStart = iStart
self.lToken = list(_oTokenizer.genTokens(sSentence))
def parse (self):
dErr = {}
lPointer = []
for dToken in self.lToken:
for i, dPointer in enumerate(lPointer):
bValid = False
for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]):
dPointer["nOffset"] = dToken["i"]
dPointer["dNode"] = dNode
bValid = True
if not bValid:
del lPointer[i]
for dNode in self._getNextMatchingNodes(dToken, dGraph):
lPointer.append({"nOffset": 0, "dNode": dNode})
for dPointer in lPointer:
if "<rules>" in dPointer["dNode"]:
for dNode in dGraph[dPointer["dNode"]["<rules>"]]:
dErr = self._executeActions(dNode, nOffset)
return dErr
def _getNextMatchingNodes (self, dToken, dNode):
# token value
if dToken["sValue"] in dNode:
yield dGraph[dNode[dToken["sValue"]]]
# token lemmas
if "<lemmas>" in dNode:
for sLemma in _oSpellChecker.getLemma(dToken["sValue"]):
if sLemma in dNode["<lemmas>"]:
yield dGraph[dNode["<lemmas>"][sLemma]]
# universal arc
if "*" in dNode:
yield dGraph[dNode["*"]]
# regex value arcs
if "<re_value>" in dNode:
for sRegex in dNode["<re_value>"]:
if re.search(sRegex, dToken["sValue"]):
yield dGraph[dNode["<re_value>"][sRegex]]
# regex morph arcs
if "<re_morph>" in dNode:
for sRegex in dNode["<re_morph>"]:
for sMorph in _oSpellChecker.getMorph(dToken["sValue"]):
if re.search(sRegex, sMorph):
yield dGraph[dNode["<re_morph>"][sRegex]]
def _executeActions (self, dNode, nOffset):
for sLineId, nextNodeKey in dNode.items():
for sArc in dGraph[nextNodeKey]:
print(sArc)
bCondMemo = None
sFuncCond, cActionType, sWhat, *eAct = dRule[sArc]
# action in lActions: [ condition, action type, replacement/suggestion/action[, iGroupStart, iGroupEnd[, message, URL]] ]
try:
bCondMemo = not sFuncCond or globals()[sFuncCond](self, sCountry, bCondMemo)
if bCondMemo:
if cActionType == "-":
# grammar error
print("-")
nErrorStart = nSentenceOffset + m.start(eAct[0])
nErrorEnd = nSentenceOffset + m.start(eAct[1])
if nErrorStart not in dErrs or nPriority > dPriority[nErrorStart]:
dErrs[nErrorStart] = _createError(self, sWhat, nErrorStart, nErrorEnd, sLineId, bUppercase, eAct[2], eAct[3], bIdRule, sOption, bContext)
dPriority[nErrorStart] = nPriority
elif cActionType == "~":
# text processor
print("~")
self._rewrite(sWhat, nErrorStart, nErrorEnd)
elif cActionType == "@":
# jump
print("@")
self._jump(sWhat)
elif cActionType == "=":
# disambiguation
print("=")
globals()[sWhat](self.lToken)
elif cActionType == ">":
# we do nothing, this test is just a condition to apply all following actions
print(">")
pass
else:
print("# error: unknown action at " + sLineId)
elif cActionType == ">":
break
except Exception as e:
raise Exception(str(e), "# " + sLineId + " # " + sRuleId)
def _createWriterError (self):
d = {}
return d
def _createDictError (self):
d = {}
return d
def _rewrite (self, sWhat, nErrorStart, nErrorEnd):
"text processor: rewrite tokens between <nErrorStart> and <nErrorEnd> position"
lTokenValue = sWhat.split("|")
if len(lTokenValue) != (nErrorEnd - nErrorStart + 1):
print("Error. Text processor: number of replacements != number of tokens.")
return
for i, sValue in zip(range(nErrorStart, nErrorEnd+1), lTokenValue):
self.lToken[i]["sValue"] = sValue
def _jump (self, sWhat):
try:
nFrom, nTo = sWhat.split(">")
self.lToken[int(nFrom)]["iJump"] = int(nTo)
except:
print("# Error. Jump failed: ", sWhat)
traceback.print_exc()
return
#### Analyse tokens
def g_morph (dToken, sPattern, bStrict=True):
"analyse a token, return True if <sPattern> in morphologies"
if "lMorph" in dToken:
lMorph = dToken["lMorph"]
else:
lMorph = _oSpellChecker.getMorph(dToken["sValue"])
if not lMorph:
return False
zPattern = re.compile(sPattern)
if bStrict:
return all(zPattern.search(sMorph) for sMorph in lMorph)
return any(zPattern.search(sMorph) for sMorph in lMorph)
def g_morphex (dToken, sPattern, sNegPattern):
"analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies"
if "lMorph" in dToken:
lMorph = dToken["lMorph"]
else:
lMorph = _oSpellChecker.getMorph(dToken["sValue"])
if not lMorph:
return False
# check negative condition
zNegPattern = re.compile(sNegPattern)
if any(zNegPattern.search(sMorph) for sMorph in lMorph):
return False
# search sPattern
zPattern = re.compile(sPattern)
return any(zPattern.search(sMorph) for sMorph in lMorph)
def g_analyse (dToken, sPattern, bStrict=True):
"analyse a token, return True if <sPattern> in morphologies (disambiguation off)"
lMorph = _oSpellChecker.getMorph(dToken["sValue"])
if not lMorph:
return False
zPattern = re.compile(sPattern)
if bStrict:
return all(zPattern.search(sMorph) for sMorph in lMorph)
return any(zPattern.search(sMorph) for sMorph in lMorph)
def g_analysex (dToken, sPattern, sNegPattern):
"analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies (disambiguation off)"
lMorph = _oSpellChecker.getMorph(dToken["sValue"])
if not lMorph:
return False
# check negative condition
zNegPattern = re.compile(sNegPattern)
if any(zNegPattern.search(sMorph) for sMorph in lMorph):
return False
# search sPattern
zPattern = re.compile(sPattern)
return any(zPattern.search(sMorph) for sMorph in lMorph)
#### Go outside the rule scope
def g_nextToken (i):
pass
def g_prevToken (i):
pass
def g_look ():
pass
def g_lookAndCheck ():
pass
#### Disambiguator
def g_select (dToken, sPattern, lDefault=None):
"select morphologies for <dToken> according to <sPattern>, always return True"
lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"])
if not lMorph or len(lMorph) == 1:
return True
lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ]
if lSelect:
if len(lSelect) != len(lMorph):
dToken["lMorph"] = lSelect
elif lDefault:
dToken["lMorph"] = lDefault
return True
def g_exclude (dToken, sPattern, lDefault=None):
"select morphologies for <dToken> according to <sPattern>, always return True"
lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"])
if not lMorph or len(lMorph) == 1:
return True
lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ]
if lSelect:
if len(lSelect) != len(lMorph):
dToken["lMorph"] = lSelect
elif lDefault:
dToken["lMorph"] = lDefault
return True
def g_define (dToken, lMorph):
"set morphologies of <dToken>, always return True"
dToken["lMorph"] = lMorph
return True
#### CALLABLES (generated code)
${graph_callables}
|