Overview
Comment: | [core] debugging madness |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | core | rg |
Files: | files | file ages | folders |
SHA3-256: |
dfeeae2ca4a21881aec358cae7c0d422 |
User & Date: | olr on 2018-06-29 09:28:58 |
Other Links: | branch diff | manifest | tags |
Context
2018-06-29
| ||
09:54 | version 0.7 check-in: 783d38aef0 user: olr tags: rg | |
09:28 | [core] debugging madness check-in: dfeeae2ca4 user: olr tags: core, rg | |
09:27 | [fr] remove test graph check-in: f3fb6556ae user: olr tags: fr, rg | |
Changes
Modified gc_core/py/lang_core/gc_engine.py from [eca6b550ae] to [16b07dde1f].
︙ | ︙ | |||
383 384 385 386 387 388 389 390 391 392 393 394 395 396 | if not tWord: echo("> nothing to find") return True lMorph = _oSpellChecker.getMorph(tWord[1]) if not lMorph: echo("> not in dictionary") return True if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]]: echo("DA: " + str(dTokenPos[tWord[0]]["lMorph"])) echo("FSA: " + str(lMorph)) return True def morph (dTokenPos, tWord, sPattern, bStrict=True, bNoWord=False): | > | 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 | if not tWord: echo("> nothing to find") return True lMorph = _oSpellChecker.getMorph(tWord[1]) if not lMorph: echo("> not in dictionary") return True print("TOKENS:", dTokenPos) if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]]: echo("DA: " + str(dTokenPos[tWord[0]]["lMorph"])) echo("FSA: " + str(lMorph)) return True def morph (dTokenPos, tWord, sPattern, bStrict=True, bNoWord=False): |
︙ | ︙ | |||
406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 | return any(zPattern.search(s) for s in lMorph) def morphex (dTokenPos, tWord, sPattern, sNegPattern, bNoWord=False): "analyse a tuple (position, word), returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)" if not tWord: return bNoWord lMorph = dTokenPos[tWord[0]]["lMorph"] if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]] else _oSpellChecker.getMorph(tWord[1]) if not lMorph: return False # check negative condition zNegPattern = re.compile(sNegPattern) if any(zNegPattern.search(s) for s in lMorph): return False # search sPattern zPattern = re.compile(sPattern) return any(zPattern.search(s) for s in lMorph) | > > > > > | 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 | return any(zPattern.search(s) for s in lMorph) def morphex (dTokenPos, tWord, sPattern, sNegPattern, bNoWord=False): "analyse a tuple (position, word), returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)" if not tWord: return bNoWord lMorph = dTokenPos[tWord[0]]["lMorph"] if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]] else _oSpellChecker.getMorph(tWord[1]) if not lMorph: return False if (tWord[1].startswith("noir")): print(tWord) print(dTokenPos) print(lMorph) # check negative condition zNegPattern = re.compile(sNegPattern) if any(zNegPattern.search(s) for s in lMorph): return False # search sPattern zPattern = re.compile(sPattern) return any(zPattern.search(s) for s in lMorph) |
︙ | ︙ | |||
571 572 573 574 575 576 577 578 579 580 581 582 583 584 | self.sSentence = sSentence self.sSentence0 = sSentence0 self.nOffsetWithinParagraph = nOffset self.lToken = list(_oTokenizer.genTokens(sSentence, True)) self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken } self.dTags = {} self.dError = {} def update (self, sSentence): "update <sSentence> and retokenize" self.sSentence = sSentence self.lToken = list(_oTokenizer.genTokens(sSentence, True)) def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False): | > > > > > > > > > > > > | 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 | self.sSentence = sSentence self.sSentence0 = sSentence0 self.nOffsetWithinParagraph = nOffset self.lToken = list(_oTokenizer.genTokens(sSentence, True)) self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken } self.dTags = {} self.dError = {} def __str__ (self): s = "sentence: " + self.sSentence0 + "\n" s += "now: " + self.sSentence + "\n" for dToken in self.lToken: s += f'{dToken["nStart"]}\t{dToken["nEnd"]}\t{dToken["sValue"]}' if "lMorph" in dToken: s += "\t" + str(dToken["lMorph"]) s += "\n" for nPos, dToken in self.dTokenPos.items(): s += f"{nPos}\t{dToken}\n" return s def update (self, sSentence): "update <sSentence> and retokenize" self.sSentence = sSentence self.lToken = list(_oTokenizer.genTokens(sSentence, True)) def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False): |
︙ | ︙ | |||
701 702 703 704 705 706 707 708 709 710 711 712 713 714 | if "<rules>" in dPointer["dNode"]: bChange, dErr = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iToken"]-1, dToken["i"], dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext) self.dError.update(dErr) if bChange: bTagAndRewrite = True if bTagAndRewrite: self.rewrite(bDebug) return (bTagAndRewrite, self.sSentence) def _executeActions (self, dGraph, dNode, nTokenOffset, nLastToken, dPriority, dOptions, sCountry, bShowRuleId, bDebug, bContext): "execute actions found in the DARG" dError = {} bChange = False for sLineId, nextNodeKey in dNode.items(): | > > | 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 | if "<rules>" in dPointer["dNode"]: bChange, dErr = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iToken"]-1, dToken["i"], dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext) self.dError.update(dErr) if bChange: bTagAndRewrite = True if bTagAndRewrite: self.rewrite(bDebug) if bDebug: print(self) return (bTagAndRewrite, self.sSentence) def _executeActions (self, dGraph, dNode, nTokenOffset, nLastToken, dPriority, dOptions, sCountry, bShowRuleId, bDebug, bContext): "execute actions found in the DARG" dError = {} bChange = False for sLineId, nextNodeKey in dNode.items(): |
︙ | ︙ | |||
831 832 833 834 835 836 837 | sText = sText.replace(m.group(0), self.lToken[int(m.group(1))+nTokenOffset]["sValue"]) #print(">", sText) return sText def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, nTokenOffset, bUppercase=True, bDebug=False): "text processor: rewrite tokens between <nTokenRewriteStart> and <nTokenRewriteEnd> position" if bDebug: | | | 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 | sText = sText.replace(m.group(0), self.lToken[int(m.group(1))+nTokenOffset]["sValue"]) #print(">", sText) return sText def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, nTokenOffset, bUppercase=True, bDebug=False): "text processor: rewrite tokens between <nTokenRewriteStart> and <nTokenRewriteEnd> position" if bDebug: print(" START:", nTokenRewriteStart, "END:", nTokenRewriteEnd) if sWhat == "*": # purge text if nTokenRewriteEnd - nTokenRewriteStart == 0: self.lToken[nTokenRewriteStart]["bToRemove"] = True else: for i in range(nTokenRewriteStart, nTokenRewriteEnd+1): self.lToken[i]["bToRemove"] = True |
︙ | ︙ | |||
927 928 929 930 931 932 933 934 935 936 937 938 939 940 | print(dToken["sValue"], "->", dToken["sNewValue"]) dToken["sRealValue"] = dToken["sValue"] dToken["sValue"] = dToken["sNewValue"] nDiffLen = len(dToken["sRealValue"]) - len(dToken["sNewValue"]) sNewRepl = (dToken["sNewValue"] + " " * nDiffLen) if nDiffLen >= 0 else dToken["sNewValue"][:len(dToken["sRealValue"])] self.sSentence = self.sSentence[:dToken["nStart"]] + sNewRepl + self.sSentence[dToken["nEnd"]:] del dToken["sNewValue"] if bDebug: print(" REWRITED:", self.sSentence) self.lToken.clear() self.lToken = lNewToken | > > | 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 | print(dToken["sValue"], "->", dToken["sNewValue"]) dToken["sRealValue"] = dToken["sValue"] dToken["sValue"] = dToken["sNewValue"] nDiffLen = len(dToken["sRealValue"]) - len(dToken["sNewValue"]) sNewRepl = (dToken["sNewValue"] + " " * nDiffLen) if nDiffLen >= 0 else dToken["sNewValue"][:len(dToken["sRealValue"])] self.sSentence = self.sSentence[:dToken["nStart"]] + sNewRepl + self.sSentence[dToken["nEnd"]:] del dToken["sNewValue"] else: del self.dTokenPos[dToken["nStart"]] if bDebug: print(" REWRITED:", self.sSentence) self.lToken.clear() self.lToken = lNewToken |
︙ | ︙ |