9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
|
from itertools import chain
from ..graphspell.spellchecker import SpellChecker
from ..graphspell.echo import echo
from . import gc_options
from ..graphspell.tokenizer import Tokenizer
from .gc_rules_graph import dGraph
__all__ = [ "lang", "locales", "pkg", "name", "version", "author", \
"load", "parse", "getSpellChecker", \
"setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \
"ignoreRule", "resetIgnoreRules", "reactivateRule", "listRules", "displayRules" ]
|
|
|
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
|
from itertools import chain
from ..graphspell.spellchecker import SpellChecker
from ..graphspell.echo import echo
from . import gc_options
from ..graphspell.tokenizer import Tokenizer
from .gc_rules_graph import dGraph, dRule
__all__ = [ "lang", "locales", "pkg", "name", "version", "author", \
"load", "parse", "getSpellChecker", \
"setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \
"ignoreRule", "resetIgnoreRules", "reactivateRule", "listRules", "displayRules" ]
|
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
|
self.iStart = iStart
self.lToken = list(_oTokenizer.genTokens(sSentence))
def parse (self):
dErr = {}
lPointer = []
for dToken in self.lToken:
for i, dPointer in enumerate(lPointer):
bValid = False
for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]):
dPointer["nOffset"] = dToken["i"]
dPointer["dNode"] = dNode
bValid = True
if not bValid:
del lPointer[i]
for dNode in self._getNextMatchingNodes(dToken, dGraph):
lPointer.append({"nOffset": 0, "dNode": dNode})
for dPointer in lPointer:
if "<rules>" in dPointer["dNode"]:
for dNode in dGraph[dPointer["dNode"]["<rules>"]]:
dErr = self._executeActions(dNode, nOffset)
return dErr
def _getNextMatchingNodes (self, dToken, dNode):
# token value
if dToken["sValue"] in dNode:
yield dGraph[dNode[dToken["sValue"]]]
# token lemmas
if "<lemmas>" in dNode:
for sLemma in _oSpellChecker.getLemma(dToken["sValue"]):
if sLemma in dNode["<lemmas>"]:
yield dGraph[dNode["<lemmas>"][sLemma]]
# universal arc
if "*" in dNode:
yield dGraph[dNode["*"]]
# regex value arcs
if "<re_value>" in dNode:
for sRegex in dNode["<re_value>"]:
if re.search(sRegex, dToken["sValue"]):
yield dGraph[dNode["<re_value>"][sRegex]]
# regex morph arcs
if "<re_morph>" in dNode:
for sRegex in dNode["<re_morph>"]:
for sMorph in _oSpellChecker.getMorph(dToken["sValue"]):
if re.search(sRegex, sMorph):
yield dGraph[dNode["<re_morph>"][sRegex]]
def _executeActions (self, dNode, nOffset):
for sLineId, nextNodeKey in dNode.items():
for sArc in dGraph[nextNodeKey]:
print(sArc)
bCondMemo = None
sFuncCond, cActionType, sWhat, *eAct = dRule[sArc]
# action in lActions: [ condition, action type, replacement/suggestion/action[, iGroupStart, iGroupEnd[, message, URL]] ]
try:
bCondMemo = not sFuncCond or globals()[sFuncCond](self, sCountry, bCondMemo)
if bCondMemo:
if cActionType == "-":
# grammar error
print("-")
nErrorStart = nSentenceOffset + m.start(eAct[0])
nErrorEnd = nSentenceOffset + m.start(eAct[1])
if nErrorStart not in dErrs or nPriority > dPriority[nErrorStart]:
dErrs[nErrorStart] = _createError(self, sWhat, nErrorStart, nErrorEnd, sLineId, bUppercase, eAct[2], eAct[3], bIdRule, sOption, bContext)
dPriority[nErrorStart] = nPriority
elif cActionType == "~":
# text processor
print("~")
self._rewrite(sWhat, nErrorStart, nErrorEnd)
|
>
>
>
>
|
|
>
>
>
>
>
|
>
<
|
>
>
>
>
>
>
>
>
|
|
|
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
|
self.iStart = iStart
self.lToken = list(_oTokenizer.genTokens(sSentence))
def parse (self):
dErr = {}
lPointer = []
for dToken in self.lToken:
# check arcs for each existing pointer
lNewPointer = []
for i, dPointer in enumerate(lPointer):
bValid = False
bFirst = True
for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]):
if bFirst:
dPointer["nOffset"] = dToken["i"]
dPointer["dNode"] = dNode
else:
lNewPointer.append({"nOffset": dPointer["nOffset"], "dNode": dNode})
bFirst = False
bValid = True
if not bValid:
del lPointer[i]
lPointer.extend(lNewPointer)
# check arcs of first nodes
for dNode in self._getNextMatchingNodes(dToken, dGraph[0]):
lPointer.append({"nOffset": 0, "dNode": dNode})
# check if there is rules to check for each pointer
for dPointer in lPointer:
if "<rules>" in dPointer["dNode"]:
dErr = self._executeActions(dPointer["dNode"]["<rules>"], dPointer["nOffset"])
if dErr:
print(dErr)
return dErr
def _getNextMatchingNodes (self, dToken, dNode):
# token value
if dToken["sValue"] in dNode:
print("value found: ", dToken["sValue"])
yield dGraph[dNode[dToken["sValue"]]]
# token lemmas
if "<lemmas>" in dNode:
for sLemma in _oSpellChecker.getLemma(dToken["sValue"]):
if sLemma in dNode["<lemmas>"]:
print("lemma found: ", sLemma)
yield dGraph[dNode["<lemmas>"][sLemma]]
# universal arc
if "*" in dNode:
print("generic arc")
yield dGraph[dNode["*"]]
# regex value arcs
if "<re_value>" in dNode:
for sRegex in dNode["<re_value>"]:
if re.search(sRegex, dToken["sValue"]):
print("value regex matching: ", sRegex)
yield dGraph[dNode["<re_value>"][sRegex]]
# regex morph arcs
if "<re_morph>" in dNode:
for sRegex in dNode["<re_morph>"]:
for sMorph in _oSpellChecker.getMorph(dToken["sValue"]):
if re.search(sRegex, sMorph):
print("morph regex matching: ", sRegex)
yield dGraph[dNode["<re_morph>"][sRegex]]
def _executeActions (self, dNode, nOffset):
dErrs = {}
for sLineId, nextNodeKey in dNode.items():
for sArc in dGraph[nextNodeKey]:
print(sArc)
bCondMemo = None
sFuncCond, cActionType, sWhat, *eAct = dRule[sArc]
# action in lActions: [ condition, action type, replacement/suggestion/action[, iGroupStart, iGroupEnd[, message, URL]] ]
try:
bCondMemo = not sFuncCond or globals()[sFuncCond](self, sCountry, bCondMemo)
if bCondMemo:
if cActionType == "-":
# grammar error
print("-")
nErrorStart = self.iStart + self.lToken[eAct[0]]["nStart"]
nErrorEnd = self.iStart + self.lToken[eAct[1]]["nEnd"]
if nErrorStart not in dErrs or nPriority > dPriority[nErrorStart]:
dErrs[nErrorStart] = _createError(self, sWhat, nErrorStart, nErrorEnd, sLineId, bUppercase, eAct[2], eAct[3], bIdRule, sOption, bContext)
dPriority[nErrorStart] = nPriority
elif cActionType == "~":
# text processor
print("~")
self._rewrite(sWhat, nErrorStart, nErrorEnd)
|
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
|
print(">")
pass
else:
print("# error: unknown action at " + sLineId)
elif cActionType == ">":
break
except Exception as e:
raise Exception(str(e), "# " + sLineId + " # " + sRuleId)
def _createWriterError (self):
d = {}
return d
def _createDictError (self):
d = {}
return d
def _rewrite (self, sWhat, nErrorStart, nErrorEnd):
"text processor: rewrite tokens between <nErrorStart> and <nErrorEnd> position"
lTokenValue = sWhat.split("|")
if len(lTokenValue) != (nErrorEnd - nErrorStart + 1):
print("Error. Text processor: number of replacements != number of tokens.")
return
|
|
<
<
<
|
<
<
<
<
|
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
|
print(">")
pass
else:
print("# error: unknown action at " + sLineId)
elif cActionType == ">":
break
except Exception as e:
raise Exception(str(e), sLineId)
return dErrs
def _rewrite (self, sWhat, nErrorStart, nErrorEnd):
"text processor: rewrite tokens between <nErrorStart> and <nErrorEnd> position"
lTokenValue = sWhat.split("|")
if len(lTokenValue) != (nErrorEnd - nErrorStart + 1):
print("Error. Text processor: number of replacements != number of tokens.")
return
|