Overview
| Comment: | [core] gc engine: code cleaning |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | core | rg |
| Files: | files | file ages | folders |
| SHA3-256: |
cc997fd6212877f7fe3ec8834912a835 |
| User & Date: | olr on 2018-09-07 10:09:55 |
| Other Links: | branch diff | manifest | tags |
Context
|
2018-09-07
| ||
| 21:29 | [core] fix bug about context check-in: 1fbd95d6f4 user: olr tags: core, rg | |
| 10:09 | [core] gc engine: code cleaning check-in: cc997fd621 user: olr tags: core, rg | |
|
2018-09-06
| ||
| 07:04 | [build][bug] DARG: fix Heisenbug, sort actions identifiers by name (necessary for Python 3.5) check-in: e18bf06f0c user: olr tags: build, warning, rg | |
Changes
Modified gc_core/py/lang_core/gc_engine.py from [7169e30ca7] to [9f26b926c3].
| ︙ | ︙ | |||
67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
_sAppContext = sContext
_dOptions = dict(gc_options.getOptions(sContext)) # duplication necessary, to be able to reset to default
_oTokenizer = _oSpellChecker.getTokenizer()
_oSpellChecker.activateStorage()
except:
traceback.print_exc()
def _getRules (bParagraph):
try:
if not bParagraph:
return _rules.lSentenceRules
return _rules.lParagraphRules
except:
| > > > > > > > | 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
_sAppContext = sContext
_dOptions = dict(gc_options.getOptions(sContext)) # duplication necessary, to be able to reset to default
_oTokenizer = _oSpellChecker.getTokenizer()
_oSpellChecker.activateStorage()
except:
traceback.print_exc()
def getSpellChecker ():
"return the spellchecker object"
return _oSpellChecker
#### Rules
def _getRules (bParagraph):
try:
if not bParagraph:
return _rules.lSentenceRules
return _rules.lParagraphRules
except:
|
| ︙ | ︙ | |||
98 99 100 101 102 103 104 |
try:
aRule[0] = re.compile(aRule[0])
except:
echo("Bad regular expression in # " + str(aRule[2]))
aRule[0] = "(?i)<Grammalecte>"
| < < | 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
try:
aRule[0] = re.compile(aRule[0])
except:
echo("Bad regular expression in # " + str(aRule[2]))
aRule[0] = "(?i)<Grammalecte>"
def ignoreRule (sRuleId):
"disable rule <sRuleId>"
_aIgnoredRules.add(sRuleId)
def resetIgnoreRules ():
"clear all ignored rules"
|
| ︙ | ︙ | |||
136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
def displayRules (sFilter=None):
"display the name of rules, with the filter <sFilter>"
echo("List of rules. Filter: << " + str(sFilter) + " >>")
for sOption, sLineId, sRuleId in listRegexRules(sFilter):
echo("{:<10} {:<10} {}".format(sOption, sLineId, sRuleId))
def setOption (sOpt, bVal):
"set option <sOpt> with <bVal> if it exists"
if sOpt in _dOptions:
_dOptions[sOpt] = bVal
| > > | 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
def displayRules (sFilter=None):
"display the name of rules, with the filter <sFilter>"
echo("List of rules. Filter: << " + str(sFilter) + " >>")
for sOption, sLineId, sRuleId in listRegexRules(sFilter):
echo("{:<10} {:<10} {}".format(sOption, sLineId, sRuleId))
#### Options
def setOption (sOpt, bVal):
"set option <sOpt> with <bVal> if it exists"
if sOpt in _dOptions:
_dOptions[sOpt] = bVal
|
| ︙ | ︙ | |||
177 178 179 180 181 182 183 |
def resetOptions ():
"set options to default values"
global _dOptions
_dOptions = dict(gc_options.getOptions(_sAppContext))
| < < < < < | 184 185 186 187 188 189 190 191 192 193 194 195 196 197 |
def resetOptions ():
"set options to default values"
global _dOptions
_dOptions = dict(gc_options.getOptions(_sAppContext))
#### Parsing
_zEndOfSentence = re.compile(r'([.?!:;…][ .?!… »”")]*|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
_zEndOfParagraph = re.compile(r"\W*$")
|
| ︙ | ︙ | |||
237 238 239 240 241 242 243 |
#for nPos, dToken in self.dTokenPos.items():
# s += "{}\t{}\n".format(nPos, dToken)
return s
def parse (self, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
"analyses the paragraph sText and returns list of errors"
#sText = unicodedata.normalize("NFC", sText)
| | | 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 |
#for nPos, dToken in self.dTokenPos.items():
# s += "{}\t{}\n".format(nPos, dToken)
return s
def parse (self, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
"analyses the paragraph sText and returns list of errors"
#sText = unicodedata.normalize("NFC", sText)
dOpt = dOptions or _dOptions
bShowRuleId = option('idrule')
# parse paragraph
try:
self.parseText(self.sText, self.sText0, True, 0, sCountry, dOpt, bShowRuleId, bDebug, bContext)
except:
raise
|
| ︙ | ︙ | |||
337 338 339 340 341 342 343 344 345 346 347 348 349 350 |
def update (self, sSentence, bDebug=False):
"update <sSentence> and retokenize"
self.sSentence = sSentence
lNewToken = list(_oTokenizer.genTokens(sSentence, True))
for dToken in lNewToken:
if "lMorph" in self.dTokenPos.get(dToken["nStart"], {}):
dToken["lMorph"] = self.dTokenPos[dToken["nStart"]]["lMorph"]
self.lToken = lNewToken
self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken if dToken["sType"] != "INFO" }
if bDebug:
echo("UPDATE:")
echo(self)
def _getNextPointers (self, dToken, dGraph, dPointer, bDebug=False):
| > > | 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 |
def update (self, sSentence, bDebug=False):
"update <sSentence> and retokenize"
self.sSentence = sSentence
lNewToken = list(_oTokenizer.genTokens(sSentence, True))
for dToken in lNewToken:
if "lMorph" in self.dTokenPos.get(dToken["nStart"], {}):
dToken["lMorph"] = self.dTokenPos[dToken["nStart"]]["lMorph"]
if "tags" in self.dTokenPos.get(dToken["nStart"], {}):
dToken["tags"] = self.dTokenPos[dToken["nStart"]]["tags"]
self.lToken = lNewToken
self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken if dToken["sType"] != "INFO" }
if bDebug:
echo("UPDATE:")
echo(self)
def _getNextPointers (self, dToken, dGraph, dPointer, bDebug=False):
|
| ︙ | ︙ | |||
460 461 462 463 464 465 466 |
bTokenFound = True
elif "¬" in sMeta:
if dToken["sType"] not in sMeta:
if bDebug:
echo(" MATCH: *" + sMeta)
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<meta>"][sMeta]] }
bTokenFound = True
| | < | | 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 |
bTokenFound = True
elif "¬" in sMeta:
if dToken["sType"] not in sMeta:
if bDebug:
echo(" MATCH: *" + sMeta)
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<meta>"][sMeta]] }
bTokenFound = True
if not bTokenFound and "bKeep" in dPointer:
yield dPointer
# JUMP
# Warning! Recurssion!
if "<>" in dNode:
dPointer2 = { "iNode1": iNode1, "dNode": dGraph[dNode["<>"]], "bKeep": True }
yield from self._getNextPointers(dToken, dGraph, dPointer2, bDebug)
def parseGraph (self, dGraph, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False):
"parse graph with tokens from the text and execute actions encountered"
lPointer = []
bTagAndRewrite = False
for iToken, dToken in enumerate(self.lToken):
if bDebug:
echo("TOKEN: " + dToken["sValue"])
# check arcs for each existing pointer
lNextPointer = []
for dPointer in lPointer:
lNextPointer.extend(self._getNextPointers(dToken, dGraph, dPointer, bDebug))
lPointer = lNextPointer
# check arcs of first nodes
lPointer.extend(self._getNextPointers(dToken, dGraph, { "iNode1": iToken, "dNode": dGraph[0] }, bDebug))
# check if there is rules to check for each pointer
for dPointer in lPointer:
#if bDebug:
# echo("+", dPointer)
if "<rules>" in dPointer["dNode"]:
bChange = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iNode1"]-1, iToken, dOptions, sCountry, bShowRuleId, bDebug, bContext)
if bChange:
bTagAndRewrite = True
if bTagAndRewrite:
self.rewriteFromTags(bDebug)
if bDebug:
echo(self)
return self.sSentence
|
| ︙ | ︙ |