Overview
| Comment: | [core] debugging madness |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | core | rg |
| Files: | files | file ages | folders |
| SHA3-256: |
dfeeae2ca4a21881aec358cae7c0d422 |
| User & Date: | olr on 2018-06-29 09:28:58 |
| Other Links: | branch diff | manifest | tags |
Context
|
2018-06-29
| ||
| 09:54 | version 0.7 check-in: 783d38aef0 user: olr tags: rg | |
| 09:28 | [core] debugging madness check-in: dfeeae2ca4 user: olr tags: core, rg | |
| 09:27 | [fr] remove test graph check-in: f3fb6556ae user: olr tags: fr, rg | |
Changes
Modified gc_core/py/lang_core/gc_engine.py from [eca6b550ae] to [16b07dde1f].
| ︙ | ︙ | |||
383 384 385 386 387 388 389 390 391 392 393 394 395 396 |
if not tWord:
echo("> nothing to find")
return True
lMorph = _oSpellChecker.getMorph(tWord[1])
if not lMorph:
echo("> not in dictionary")
return True
if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]]:
echo("DA: " + str(dTokenPos[tWord[0]]["lMorph"]))
echo("FSA: " + str(lMorph))
return True
def morph (dTokenPos, tWord, sPattern, bStrict=True, bNoWord=False):
| > | 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 |
if not tWord:
echo("> nothing to find")
return True
lMorph = _oSpellChecker.getMorph(tWord[1])
if not lMorph:
echo("> not in dictionary")
return True
print("TOKENS:", dTokenPos)
if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]]:
echo("DA: " + str(dTokenPos[tWord[0]]["lMorph"]))
echo("FSA: " + str(lMorph))
return True
def morph (dTokenPos, tWord, sPattern, bStrict=True, bNoWord=False):
|
| ︙ | ︙ | |||
406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 |
return any(zPattern.search(s) for s in lMorph)
def morphex (dTokenPos, tWord, sPattern, sNegPattern, bNoWord=False):
"analyse a tuple (position, word), returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)"
if not tWord:
return bNoWord
lMorph = dTokenPos[tWord[0]]["lMorph"] if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]] else _oSpellChecker.getMorph(tWord[1])
if not lMorph:
return False
# check negative condition
zNegPattern = re.compile(sNegPattern)
if any(zNegPattern.search(s) for s in lMorph):
return False
# search sPattern
zPattern = re.compile(sPattern)
return any(zPattern.search(s) for s in lMorph)
| > > > > > | 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 |
return any(zPattern.search(s) for s in lMorph)
def morphex (dTokenPos, tWord, sPattern, sNegPattern, bNoWord=False):
"analyse a tuple (position, word), returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)"
if not tWord:
return bNoWord
lMorph = dTokenPos[tWord[0]]["lMorph"] if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]] else _oSpellChecker.getMorph(tWord[1])
if not lMorph:
return False
if (tWord[1].startswith("noir")):
print(tWord)
print(dTokenPos)
print(lMorph)
# check negative condition
zNegPattern = re.compile(sNegPattern)
if any(zNegPattern.search(s) for s in lMorph):
return False
# search sPattern
zPattern = re.compile(sPattern)
return any(zPattern.search(s) for s in lMorph)
|
| ︙ | ︙ | |||
571 572 573 574 575 576 577 578 579 580 581 582 583 584 |
self.sSentence = sSentence
self.sSentence0 = sSentence0
self.nOffsetWithinParagraph = nOffset
self.lToken = list(_oTokenizer.genTokens(sSentence, True))
self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken }
self.dTags = {}
self.dError = {}
def update (self, sSentence):
"update <sSentence> and retokenize"
self.sSentence = sSentence
self.lToken = list(_oTokenizer.genTokens(sSentence, True))
def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False):
| > > > > > > > > > > > > | 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 |
self.sSentence = sSentence
self.sSentence0 = sSentence0
self.nOffsetWithinParagraph = nOffset
self.lToken = list(_oTokenizer.genTokens(sSentence, True))
self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken }
self.dTags = {}
self.dError = {}
def __str__ (self):
s = "sentence: " + self.sSentence0 + "\n"
s += "now: " + self.sSentence + "\n"
for dToken in self.lToken:
s += f'{dToken["nStart"]}\t{dToken["nEnd"]}\t{dToken["sValue"]}'
if "lMorph" in dToken:
s += "\t" + str(dToken["lMorph"])
s += "\n"
for nPos, dToken in self.dTokenPos.items():
s += f"{nPos}\t{dToken}\n"
return s
def update (self, sSentence):
"update <sSentence> and retokenize"
self.sSentence = sSentence
self.lToken = list(_oTokenizer.genTokens(sSentence, True))
def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False):
|
| ︙ | ︙ | |||
701 702 703 704 705 706 707 708 709 710 711 712 713 714 |
if "<rules>" in dPointer["dNode"]:
bChange, dErr = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iToken"]-1, dToken["i"], dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext)
self.dError.update(dErr)
if bChange:
bTagAndRewrite = True
if bTagAndRewrite:
self.rewrite(bDebug)
return (bTagAndRewrite, self.sSentence)
def _executeActions (self, dGraph, dNode, nTokenOffset, nLastToken, dPriority, dOptions, sCountry, bShowRuleId, bDebug, bContext):
"execute actions found in the DARG"
dError = {}
bChange = False
for sLineId, nextNodeKey in dNode.items():
| > > | 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 |
if "<rules>" in dPointer["dNode"]:
bChange, dErr = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iToken"]-1, dToken["i"], dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext)
self.dError.update(dErr)
if bChange:
bTagAndRewrite = True
if bTagAndRewrite:
self.rewrite(bDebug)
if bDebug:
print(self)
return (bTagAndRewrite, self.sSentence)
def _executeActions (self, dGraph, dNode, nTokenOffset, nLastToken, dPriority, dOptions, sCountry, bShowRuleId, bDebug, bContext):
"execute actions found in the DARG"
dError = {}
bChange = False
for sLineId, nextNodeKey in dNode.items():
|
| ︙ | ︙ | |||
831 832 833 834 835 836 837 |
sText = sText.replace(m.group(0), self.lToken[int(m.group(1))+nTokenOffset]["sValue"])
#print(">", sText)
return sText
def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, nTokenOffset, bUppercase=True, bDebug=False):
"text processor: rewrite tokens between <nTokenRewriteStart> and <nTokenRewriteEnd> position"
if bDebug:
| | | 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 |
sText = sText.replace(m.group(0), self.lToken[int(m.group(1))+nTokenOffset]["sValue"])
#print(">", sText)
return sText
def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, nTokenOffset, bUppercase=True, bDebug=False):
"text processor: rewrite tokens between <nTokenRewriteStart> and <nTokenRewriteEnd> position"
if bDebug:
print(" START:", nTokenRewriteStart, "END:", nTokenRewriteEnd)
if sWhat == "*":
# purge text
if nTokenRewriteEnd - nTokenRewriteStart == 0:
self.lToken[nTokenRewriteStart]["bToRemove"] = True
else:
for i in range(nTokenRewriteStart, nTokenRewriteEnd+1):
self.lToken[i]["bToRemove"] = True
|
| ︙ | ︙ | |||
927 928 929 930 931 932 933 934 935 936 937 938 939 940 |
print(dToken["sValue"], "->", dToken["sNewValue"])
dToken["sRealValue"] = dToken["sValue"]
dToken["sValue"] = dToken["sNewValue"]
nDiffLen = len(dToken["sRealValue"]) - len(dToken["sNewValue"])
sNewRepl = (dToken["sNewValue"] + " " * nDiffLen) if nDiffLen >= 0 else dToken["sNewValue"][:len(dToken["sRealValue"])]
self.sSentence = self.sSentence[:dToken["nStart"]] + sNewRepl + self.sSentence[dToken["nEnd"]:]
del dToken["sNewValue"]
if bDebug:
print(" REWRITED:", self.sSentence)
self.lToken.clear()
self.lToken = lNewToken
| > > | 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 |
print(dToken["sValue"], "->", dToken["sNewValue"])
dToken["sRealValue"] = dToken["sValue"]
dToken["sValue"] = dToken["sNewValue"]
nDiffLen = len(dToken["sRealValue"]) - len(dToken["sNewValue"])
sNewRepl = (dToken["sNewValue"] + " " * nDiffLen) if nDiffLen >= 0 else dToken["sNewValue"][:len(dToken["sRealValue"])]
self.sSentence = self.sSentence[:dToken["nStart"]] + sNewRepl + self.sSentence[dToken["nEnd"]:]
del dToken["sNewValue"]
else:
del self.dTokenPos[dToken["nStart"]]
if bDebug:
print(" REWRITED:", self.sSentence)
self.lToken.clear()
self.lToken = lNewToken
|
| ︙ | ︙ |