Overview
Comment: | [core] gc engine: fix bug |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | core | rg |
Files: | files | file ages | folders |
SHA3-256: |
b5a5b6b16120e61eb0c8c17406f85449 |
User & Date: | olr on 2018-07-23 17:54:08 |
Other Links: | branch diff | manifest | tags |
Context
2018-07-23
| ||
17:54 | [fr] conversion: regex rules -> graph rules check-in: af307724f2 user: olr tags: fr, rg | |
17:54 | [core] gc engine: fix bug check-in: b5a5b6b161 user: olr tags: core, rg | |
17:52 | [build] rules condition rewriting update check-in: ee36aa096c user: olr tags: build, rg | |
Changes
Modified gc_core/py/lang_core/gc_engine.py from [d2b4f44276] to [903e91d939].
︙ | ︙ | |||
139 140 141 142 143 144 145 | if "‑" in sText: sText = sText.replace("‑", "-") # nobreakdash # parse sentences for iStart, iEnd in _getSentenceBoundaries(sText): if 4 < (iEnd - iStart) < 2000: try: | | | 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 | if "‑" in sText: sText = sText.replace("‑", "-") # nobreakdash # parse sentences for iStart, iEnd in _getSentenceBoundaries(sText): if 4 < (iEnd - iStart) < 2000: try: oSentence = TextParser(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart) _, dErrors = _proofread(oSentence, sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dErrors, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext) except: raise return dErrors.values() # this is a view (iterable) def _proofread (oSentence, s, sx, nOffset, bParagraph, dErrors, dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext): |
︙ | ︙ | |||
561 562 563 564 565 566 567 | return True dTokenPos[nPos]["lMorph"] = lMorph return True | | | > | | 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 | return True dTokenPos[nPos]["lMorph"] = lMorph return True #### TEXT PARSER class TextParser: "Text parser" def __init__ (self, sSentence, sSentence0, nOffset): self.sSentence = sSentence self.sSentence0 = sSentence0 self.nOffsetWithinParagraph = nOffset self.lToken = list(_oTokenizer.genTokens(sSentence, True)) self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken if dToken["sType"] != "INFO" } self.dTags = {} self.dError = {} def __str__ (self): s = "TEXT ==========\n" s += "sentence: " + self.sSentence0 + "\n" s += "now: " + self.sSentence + "\n" for dToken in self.lToken: s += f'{dToken["nStart"]}\t{dToken["nEnd"]}\t{dToken["sValue"]}' if "lMorph" in dToken: s += "\t" + str(dToken["lMorph"]) s += "\n" for nPos, dToken in self.dTokenPos.items(): |
︙ | ︙ | |||
680 681 682 683 684 685 686 | continue if not sPattern or any(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): if bDebug: print(" MATCH: @" + sRegex) yield dGraph[dNode["<re_morph>"][sRegex]] # token tags if "tags" in dToken and "<tags>" in dNode: | | | | 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 | continue if not sPattern or any(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): if bDebug: print(" MATCH: @" + sRegex) yield dGraph[dNode["<re_morph>"][sRegex]] # token tags if "tags" in dToken and "<tags>" in dNode: for sTag in dToken["tags"]: if sTag in dNode["<tags>"]: if bDebug: print(" MATCH: /" + sTag) yield dGraph[dNode["<tags>"][sTag]] # meta arc (for token type) if "<meta>" in dNode: for sMeta in dNode["<meta>"]: # not regex here, we just search if <dNode["sType"]> exists within <sMeta> |
︙ | ︙ | |||
785 786 787 788 789 790 791 | if bDebug: print(" COND_OK") pass elif cActionType == "/": if bDebug: print(" SEMANTIC_TAG:\n ", dRule[sRuleId]) nTokenStart = nTokenOffset + eAct[0] | | | 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 | if bDebug: print(" COND_OK") pass elif cActionType == "/": if bDebug: print(" SEMANTIC_TAG:\n ", dRule[sRuleId]) nTokenStart = nTokenOffset + eAct[0] nTokenEnd = nTokenOffset + (eAct[1] if eAct[1] else eAct[0]) for i in range(nTokenStart, nTokenEnd+1): if "tags" in self.lToken[i]: self.lToken[i]["tags"].update(sWhat.split("|")) else: self.lToken[i]["tags"] = set(sWhat.split("|")) elif cActionType == "%": # sentence tags |
︙ | ︙ |