586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
|
def __init__ (self, sSentence, sSentence0, nOffset):
self.sSentence = sSentence
self.sSentence0 = sSentence0
self.nOffsetWithinParagraph = nOffset
self.lToken = list(_oTokenizer.genTokens(sSentence, True))
self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken }
self.createError = self._createWriterError if _bWriterError else self._createDictError
def update (self, sSentence):
self.sSentence = sSentence
self.lToken = list(_oTokenizer.genTokens(sSentence, True))
def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False):
"generator: return nodes where <dToken> “values” match <dNode> arcs"
|
>
>
|
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
|
def __init__ (self, sSentence, sSentence0, nOffset):
self.sSentence = sSentence
self.sSentence0 = sSentence0
self.nOffsetWithinParagraph = nOffset
self.lToken = list(_oTokenizer.genTokens(sSentence, True))
self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken }
self.dTags = {}
self.createError = self._createWriterError if _bWriterError else self._createDictError
def update (self, sSentence):
self.sSentence = sSentence
self.lToken = list(_oTokenizer.genTokens(sSentence, True))
def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False):
"generator: return nodes where <dToken> “values” match <dNode> arcs"
|
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
|
bCondMemo = None
for sRuleId in dGraph[nextNodeKey]:
try:
if bDebug:
print("ACTION:", sRuleId)
print(dRule[sRuleId])
sOption, sFuncCond, cActionType, sWhat, *eAct = dRule[sRuleId]
# action in lActions: [ condition, action type, replacement/suggestion/action[, iTokenStart, iTokenEnd[, nPriority, message, URL]] ]
if not sOption or dOptions.get(sOption, False):
bCondMemo = not sFuncCond or globals()[sFuncCond](self.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo)
if bCondMemo:
if cActionType == "-":
# grammar error
nTokenErrorStart = nTokenOffset + eAct[0]
nTokenErrorEnd = (nTokenOffset + eAct[1]) if eAct[1] else nLastToken
nErrorStart = self.nOffsetWithinParagraph + self.lToken[nTokenErrorStart]["nStart"]
nErrorEnd = self.nOffsetWithinParagraph + self.lToken[nTokenErrorEnd]["nEnd"]
|
>
>
|
>
>
|
|
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
|
bCondMemo = None
for sRuleId in dGraph[nextNodeKey]:
try:
if bDebug:
print("ACTION:", sRuleId)
print(dRule[sRuleId])
sOption, sFuncCond, cActionType, sWhat, *eAct = dRule[sRuleId]
# Suggestion [ option, condition, "-", replacement/suggestion/action, iTokenStart, iTokenEnd, nPriority, message, URL ]
# TextProcessor [ option, condition, "~", replacement/suggestion/action, iTokenStart, iTokenEnd ]
# Disambiguator [ option, condition, "=", replacement/suggestion/action ]
# Tag [ option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ]
# Test [ option, condition, ">", "" ]
if not sOption or dOptions.get(sOption, False):
bCondMemo = not sFuncCond or globals()[sFuncCond](self.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, self.dTags)
if bCondMemo:
if cActionType == "-":
# grammar error
nTokenErrorStart = nTokenOffset + eAct[0]
nTokenErrorEnd = (nTokenOffset + eAct[1]) if eAct[1] else nLastToken
nErrorStart = self.nOffsetWithinParagraph + self.lToken[nTokenErrorStart]["nStart"]
nErrorEnd = self.nOffsetWithinParagraph + self.lToken[nTokenErrorEnd]["nEnd"]
|
734
735
736
737
738
739
740
741
742
743
744
745
746
747
|
if bDebug:
print("=", sRuleId)
elif cActionType == ">":
# we do nothing, this test is just a condition to apply all following actions
if bDebug:
print(">", sRuleId)
pass
else:
print("# error: unknown action at " + sLineId)
elif cActionType == ">":
if bDebug:
print(">!", sRuleId)
break
except Exception as e:
|
>
>
>
>
>
>
>
>
>
|
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
|
if bDebug:
print("=", sRuleId)
elif cActionType == ">":
# we do nothing, this test is just a condition to apply all following actions
if bDebug:
print(">", sRuleId)
pass
elif cActionType == "/":
# tags
nTokenTag = nTokenOffset + eAct[0]
if sWhat not in self.dTags:
self.dTags[sWhat] = (nTokenTag, nTokenTag)
elif nTokenTag > self.dTags[sWhat][1]:
self.dTags[sWhat] = (self.dTags[sWhat][0], nTokenTag)
if bDebug:
print("/", sRuleId)
else:
print("# error: unknown action at " + sLineId)
elif cActionType == ">":
if bDebug:
print(">!", sRuleId)
break
except Exception as e:
|
932
933
934
935
936
937
938
939
940
941
942
943
944
945
|
zNegPattern = re.compile(sNegPattern)
if any(zNegPattern.search(sMorph) for sMorph in lMorph):
return False
# search sPattern
zPattern = re.compile(sPattern)
return any(zPattern.search(sMorph) for sMorph in lMorph)
#### Disambiguator
def g_select (dToken, sPattern, lDefault=None):
"select morphologies for <dToken> according to <sPattern>, always return True"
lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"])
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
|
zNegPattern = re.compile(sNegPattern)
if any(zNegPattern.search(sMorph) for sMorph in lMorph):
return False
# search sPattern
zPattern = re.compile(sPattern)
return any(zPattern.search(sMorph) for sMorph in lMorph)
def g_tag_before (dToken, sTag, dTags):
if sTag not in dTags:
return False
if dToken["nStart"] > dTags[sTag][0]:
return True
return False
def g_tag_after (dToken, sTag, dTags):
if sTag not in dTags:
return False
if dToken["nStart"] < dTags[sTag][1]:
return True
return False
#### Disambiguator
def g_select (dToken, sPattern, lDefault=None):
"select morphologies for <dToken> according to <sPattern>, always return True"
lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"])
|