Overview
Comment: | [core] gc engine: code cleaning |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | core | rg |
Files: | files | file ages | folders |
SHA3-256: |
cc997fd6212877f7fe3ec8834912a835 |
User & Date: | olr on 2018-09-07 10:09:55 |
Other Links: | branch diff | manifest | tags |
Context
2018-09-07
| ||
21:29 | [core] fix bug about context check-in: 1fbd95d6f4 user: olr tags: core, rg | |
10:09 | [core] gc engine: code cleaning check-in: cc997fd621 user: olr tags: core, rg | |
2018-09-06
| ||
07:04 | [build][bug] DARG: fix Heisenbug, sort actions identifiers by name (necessary for Python 3.5) check-in: e18bf06f0c user: olr tags: build, warning, rg | |
Changes
Modified gc_core/py/lang_core/gc_engine.py from [7169e30ca7] to [9f26b926c3].
︙ | ︙ | |||
67 68 69 70 71 72 73 74 75 76 77 78 79 80 | _sAppContext = sContext _dOptions = dict(gc_options.getOptions(sContext)) # duplication necessary, to be able to reset to default _oTokenizer = _oSpellChecker.getTokenizer() _oSpellChecker.activateStorage() except: traceback.print_exc() def _getRules (bParagraph): try: if not bParagraph: return _rules.lSentenceRules return _rules.lParagraphRules except: | > > > > > > > | 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | _sAppContext = sContext _dOptions = dict(gc_options.getOptions(sContext)) # duplication necessary, to be able to reset to default _oTokenizer = _oSpellChecker.getTokenizer() _oSpellChecker.activateStorage() except: traceback.print_exc() def getSpellChecker (): "return the spellchecker object" return _oSpellChecker #### Rules def _getRules (bParagraph): try: if not bParagraph: return _rules.lSentenceRules return _rules.lParagraphRules except: |
︙ | ︙ | |||
98 99 100 101 102 103 104 | try: aRule[0] = re.compile(aRule[0]) except: echo("Bad regular expression in # " + str(aRule[2])) aRule[0] = "(?i)<Grammalecte>" | < < | 105 106 107 108 109 110 111 112 113 114 115 116 117 118 | try: aRule[0] = re.compile(aRule[0]) except: echo("Bad regular expression in # " + str(aRule[2])) aRule[0] = "(?i)<Grammalecte>" def ignoreRule (sRuleId): "disable rule <sRuleId>" _aIgnoredRules.add(sRuleId) def resetIgnoreRules (): "clear all ignored rules" |
︙ | ︙ | |||
136 137 138 139 140 141 142 143 144 145 146 147 148 149 | def displayRules (sFilter=None): "display the name of rules, with the filter <sFilter>" echo("List of rules. Filter: << " + str(sFilter) + " >>") for sOption, sLineId, sRuleId in listRegexRules(sFilter): echo("{:<10} {:<10} {}".format(sOption, sLineId, sRuleId)) def setOption (sOpt, bVal): "set option <sOpt> with <bVal> if it exists" if sOpt in _dOptions: _dOptions[sOpt] = bVal | > > | 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 | def displayRules (sFilter=None): "display the name of rules, with the filter <sFilter>" echo("List of rules. Filter: << " + str(sFilter) + " >>") for sOption, sLineId, sRuleId in listRegexRules(sFilter): echo("{:<10} {:<10} {}".format(sOption, sLineId, sRuleId)) #### Options def setOption (sOpt, bVal): "set option <sOpt> with <bVal> if it exists" if sOpt in _dOptions: _dOptions[sOpt] = bVal |
︙ | ︙ | |||
177 178 179 180 181 182 183 | def resetOptions (): "set options to default values" global _dOptions _dOptions = dict(gc_options.getOptions(_sAppContext)) | < < < < < | 184 185 186 187 188 189 190 191 192 193 194 195 196 197 | def resetOptions (): "set options to default values" global _dOptions _dOptions = dict(gc_options.getOptions(_sAppContext)) #### Parsing _zEndOfSentence = re.compile(r'([.?!:;…][ .?!… »”")]*|.$)') _zBeginOfParagraph = re.compile(r"^\W*") _zEndOfParagraph = re.compile(r"\W*$") |
︙ | ︙ | |||
237 238 239 240 241 242 243 | #for nPos, dToken in self.dTokenPos.items(): # s += "{}\t{}\n".format(nPos, dToken) return s def parse (self, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False): "analyses the paragraph sText and returns list of errors" #sText = unicodedata.normalize("NFC", sText) | | | 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 | #for nPos, dToken in self.dTokenPos.items(): # s += "{}\t{}\n".format(nPos, dToken) return s def parse (self, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False): "analyses the paragraph sText and returns list of errors" #sText = unicodedata.normalize("NFC", sText) dOpt = dOptions or _dOptions bShowRuleId = option('idrule') # parse paragraph try: self.parseText(self.sText, self.sText0, True, 0, sCountry, dOpt, bShowRuleId, bDebug, bContext) except: raise |
︙ | ︙ | |||
337 338 339 340 341 342 343 344 345 346 347 348 349 350 | def update (self, sSentence, bDebug=False): "update <sSentence> and retokenize" self.sSentence = sSentence lNewToken = list(_oTokenizer.genTokens(sSentence, True)) for dToken in lNewToken: if "lMorph" in self.dTokenPos.get(dToken["nStart"], {}): dToken["lMorph"] = self.dTokenPos[dToken["nStart"]]["lMorph"] self.lToken = lNewToken self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken if dToken["sType"] != "INFO" } if bDebug: echo("UPDATE:") echo(self) def _getNextPointers (self, dToken, dGraph, dPointer, bDebug=False): | > > | 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 | def update (self, sSentence, bDebug=False): "update <sSentence> and retokenize" self.sSentence = sSentence lNewToken = list(_oTokenizer.genTokens(sSentence, True)) for dToken in lNewToken: if "lMorph" in self.dTokenPos.get(dToken["nStart"], {}): dToken["lMorph"] = self.dTokenPos[dToken["nStart"]]["lMorph"] if "tags" in self.dTokenPos.get(dToken["nStart"], {}): dToken["tags"] = self.dTokenPos[dToken["nStart"]]["tags"] self.lToken = lNewToken self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken if dToken["sType"] != "INFO" } if bDebug: echo("UPDATE:") echo(self) def _getNextPointers (self, dToken, dGraph, dPointer, bDebug=False): |
︙ | ︙ | |||
460 461 462 463 464 465 466 | bTokenFound = True elif "¬" in sMeta: if dToken["sType"] not in sMeta: if bDebug: echo(" MATCH: *" + sMeta) yield { "iNode1": iNode1, "dNode": dGraph[dNode["<meta>"][sMeta]] } bTokenFound = True | | < | | 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 | bTokenFound = True elif "¬" in sMeta: if dToken["sType"] not in sMeta: if bDebug: echo(" MATCH: *" + sMeta) yield { "iNode1": iNode1, "dNode": dGraph[dNode["<meta>"][sMeta]] } bTokenFound = True if not bTokenFound and "bKeep" in dPointer: yield dPointer # JUMP # Warning! Recurssion! if "<>" in dNode: dPointer2 = { "iNode1": iNode1, "dNode": dGraph[dNode["<>"]], "bKeep": True } yield from self._getNextPointers(dToken, dGraph, dPointer2, bDebug) def parseGraph (self, dGraph, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False): "parse graph with tokens from the text and execute actions encountered" lPointer = [] bTagAndRewrite = False for iToken, dToken in enumerate(self.lToken): if bDebug: echo("TOKEN: " + dToken["sValue"]) # check arcs for each existing pointer lNextPointer = [] for dPointer in lPointer: lNextPointer.extend(self._getNextPointers(dToken, dGraph, dPointer, bDebug)) lPointer = lNextPointer # check arcs of first nodes lPointer.extend(self._getNextPointers(dToken, dGraph, { "iNode1": iToken, "dNode": dGraph[0] }, bDebug)) # check if there is rules to check for each pointer for dPointer in lPointer: #if bDebug: # echo("+", dPointer) if "<rules>" in dPointer["dNode"]: bChange = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iNode1"]-1, iToken, dOptions, sCountry, bShowRuleId, bDebug, bContext) if bChange: bTagAndRewrite = True if bTagAndRewrite: self.rewriteFromTags(bDebug) if bDebug: echo(self) return self.sSentence |
︙ | ︙ |