162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
|
if sOption == "@@@@":
# graph rules
if not bParagraph and bSentenceChange:
oSentence.update(s)
bSentenceChange = False
for sGraphName, sLineId in lRuleGroup:
if bDebug:
print(sGraphName, sLineId)
bParagraphChange, errs = oSentence.parse(dAllGraph[sGraphName], dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext)
dErrs.update(errs)
if bParagraphChange:
s = oSentence.rewrite()
if bDebug:
print("~", oSentence.sSentence)
elif not sOption or dOptions.get(sOption, False):
|
|
|
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
|
if sOption == "@@@@":
# graph rules
if not bParagraph and bSentenceChange:
oSentence.update(s)
bSentenceChange = False
for sGraphName, sLineId in lRuleGroup:
if bDebug:
print("\n>>>> GRAPH:", sGraphName, sLineId)
bParagraphChange, errs = oSentence.parse(dAllGraph[sGraphName], dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext)
dErrs.update(errs)
if bParagraphChange:
s = oSentence.rewrite()
if bDebug:
print("~", oSentence.sSentence)
elif not sOption or dOptions.get(sOption, False):
|
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
|
self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken }
self.createError = self._createWriterError if _bWriterError else self._createDictError
def update (self, sSentence):
self.sSentence = sSentence
self.lToken = list(_oTokenizer.genTokens(sSentence, True))
def _getNextMatchingNodes (self, dToken, dGraph, dNode):
"generator: return nodes where <dToken> “values” match <dNode> arcs"
# token value
if dToken["sValue"] in dNode:
#print("value found: ", dToken["sValue"])
yield dGraph[dNode[dToken["sValue"]]]
# token lemmas
if "<lemmas>" in dNode:
for sLemma in _oSpellChecker.getLemma(dToken["sValue"]):
if sLemma in dNode["<lemmas>"]:
#print("lemma found: ", sLemma)
yield dGraph[dNode["<lemmas>"][sLemma]]
# universal arc
if "*" in dNode:
#print("generic arc")
yield dGraph[dNode["*"]]
# regex value arcs
if "<re_value>" in dNode:
for sRegex in dNode["<re_value>"]:
if re.search(sRegex, dToken["sValue"]):
#print("value regex matching: ", sRegex)
yield dGraph[dNode["<re_value>"][sRegex]]
# regex morph arcs
if "<re_morph>" in dNode:
for sRegex in dNode["<re_morph>"]:
if "¬" not in sRegex:
# no anti-pattern
if any(re.search(sRegex, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
yield dGraph[dNode["<re_morph>"][sRegex]]
else:
# there is an anti-pattern
sPattern, sNegPattern = sRegex.split("¬", 1)
if sNegPattern == "*":
# all morphologies must match with <sPattern>
if all(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
yield dGraph[dNode["<re_morph>"][sRegex]]
else:
if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
continue
if any(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
yield dGraph[dNode["<re_morph>"][sRegex]]
def parse (self, dGraph, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False):
dErr = {}
dPriority = {} # Key = position; value = priority
dOpt = _dOptions if not dOptions else dOptions
lPointer = []
bChange = False
for dToken in self.lToken:
# check arcs for each existing pointer
lNextPointer = []
for dPointer in lPointer:
for dNode in self._getNextMatchingNodes(dToken, dGraph, dPointer["dNode"]):
lNextPointer.append({"iToken": dPointer["iToken"], "dNode": dNode})
lPointer = lNextPointer
# check arcs of first nodes
for dNode in self._getNextMatchingNodes(dToken, dGraph, dGraph[0]):
lPointer.append({"iToken": dToken["i"], "dNode": dNode})
# check if there is rules to check for each pointer
for dPointer in lPointer:
if "<rules>" in dPointer["dNode"]:
bHasChanged, errs = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iToken"]-1, dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext)
dErr.update(errs)
if bHasChanged:
bChange = True
return (bChange, dErr)
|
|
>
|
>
|
>
|
>
>
>
>
>
>
>
>
|
|
>
>
|
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
|
self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken }
self.createError = self._createWriterError if _bWriterError else self._createDictError
def update (self, sSentence):
self.sSentence = sSentence
self.lToken = list(_oTokenizer.genTokens(sSentence, True))
def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False):
"generator: return nodes where <dToken> “values” match <dNode> arcs"
# token value
if dToken["sValue"] in dNode:
if bDebug:
print("value found: ", dToken["sValue"])
yield dGraph[dNode[dToken["sValue"]]]
# token lemmas
if "<lemmas>" in dNode:
for sLemma in _oSpellChecker.getLemma(dToken["sValue"]):
if sLemma in dNode["<lemmas>"]:
#print("lemma found: ", sLemma)
yield dGraph[dNode["<lemmas>"][sLemma]]
# universal arc
if "*" in dNode:
if bDebug:
print("generic arc")
yield dGraph[dNode["*"]]
# regex value arcs
if "<re_value>" in dNode:
for sRegex in dNode["<re_value>"]:
if re.search(sRegex, dToken["sValue"]):
if bDebug:
print("value regex matching: ", sRegex)
yield dGraph[dNode["<re_value>"][sRegex]]
# regex morph arcs
if "<re_morph>" in dNode:
for sRegex in dNode["<re_morph>"]:
if "¬" not in sRegex:
# no anti-pattern
if any(re.search(sRegex, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
if bDebug:
print("morph regex matching: ", sRegex)
yield dGraph[dNode["<re_morph>"][sRegex]]
else:
# there is an anti-pattern
sPattern, sNegPattern = sRegex.split("¬", 1)
if sNegPattern == "*":
# all morphologies must match with <sPattern>
if all(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
if bDebug:
print("morph regex matching: ", sRegex)
yield dGraph[dNode["<re_morph>"][sRegex]]
else:
if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
continue
if any(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
if bDebug:
print("morph regex matching: ", sRegex)
yield dGraph[dNode["<re_morph>"][sRegex]]
def parse (self, dGraph, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False):
dErr = {}
dPriority = {} # Key = position; value = priority
dOpt = _dOptions if not dOptions else dOptions
lPointer = []
bChange = False
for dToken in self.lToken:
if bDebug:
print("=", dToken["sValue"])
# check arcs for each existing pointer
lNextPointer = []
for dPointer in lPointer:
for dNode in self._getNextMatchingNodes(dToken, dGraph, dPointer["dNode"], bDebug):
lNextPointer.append({"iToken": dPointer["iToken"], "dNode": dNode})
lPointer = lNextPointer
# check arcs of first nodes
for dNode in self._getNextMatchingNodes(dToken, dGraph, dGraph[0], bDebug):
lPointer.append({"iToken": dToken["i"], "dNode": dNode})
# check if there is rules to check for each pointer
for dPointer in lPointer:
if bDebug:
print("+", dPointer)
if "<rules>" in dPointer["dNode"]:
bHasChanged, errs = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iToken"]-1, dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext)
dErr.update(errs)
if bHasChanged:
bChange = True
return (bChange, dErr)
|