Overview
Comment: | [core] rename parameter tags -> aTags |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | core | rg |
Files: | files | file ages | folders |
SHA3-256: |
5a15898d113cdcbf3ade04bd4ea0e563 |
User & Date: | olr on 2018-09-12 10:38:22 |
Other Links: | branch diff | manifest | tags |
Context
2018-09-12
| ||
17:21 | [core][js] gc engine: fix several issues check-in: 900ff7dc44 user: olr tags: core, rg | |
10:38 | [core] rename parameter tags -> aTags check-in: 5a15898d11 user: olr tags: core, rg | |
09:49 | [core][js] small code cleaning check-in: 05090f91ec user: olr tags: core, rg | |
Changes
Modified gc_core/js/lang_core/gc_engine.js from [225ec72077] to [eabdda0297].
︙ | ︙ | |||
183 184 185 186 187 188 189 | s += "sentence: " + this.sSentence0 + "\n"; s += "now: " + this.sSentence + "\n"; for (let dToken of this.lToken) { s += `#${dToken["i"]}\t${dToken["nStart"]}:${dToken["nEnd"]}\t${dToken["sValue"]}\t${dToken["sType"]}`; if (dToken.hasOwnProperty("lMorph")) { s += "\t" + dToken["lMorph"].toString(); } | | | | 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 | s += "sentence: " + this.sSentence0 + "\n"; s += "now: " + this.sSentence + "\n"; for (let dToken of this.lToken) { s += `#${dToken["i"]}\t${dToken["nStart"]}:${dToken["nEnd"]}\t${dToken["sValue"]}\t${dToken["sType"]}`; if (dToken.hasOwnProperty("lMorph")) { s += "\t" + dToken["lMorph"].toString(); } if (dToken.hasOwnProperty("aTags")) { s += "\t" + dToken["aTags"].toString(); } s += "\n"; } return s; } parse (sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false) { |
︙ | ︙ | |||
336 337 338 339 340 341 342 | // update <sSentence> and retokenize this.sSentence = sSentence; let lNewToken = Array.from(_oTokenizer.genTokens(sSentence, true)); for (let dToken of lNewToken) { if (this.dTokenPos.gl_get(dToken["nStart"], {}).hasOwnProperty("lMorph")) { dToken["lMorph"] = this.dTokenPos.get(dToken["nStart"])["lMorph"]; } | | | | 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 | // update <sSentence> and retokenize this.sSentence = sSentence; let lNewToken = Array.from(_oTokenizer.genTokens(sSentence, true)); for (let dToken of lNewToken) { if (this.dTokenPos.gl_get(dToken["nStart"], {}).hasOwnProperty("lMorph")) { dToken["lMorph"] = this.dTokenPos.get(dToken["nStart"])["lMorph"]; } if (this.dTokenPos.gl_get(dToken["nStart"], {}).hasOwnProperty("aTags")) { dToken["aTags"] = this.dTokenPos.get(dToken["nStart"])["aTags"]; } } this.lToken = lNewToken; this.dTokenPos.clear(); for (let dToken of this.lToken) { if (dToken["sType"] != "INFO") { this.dTokenPos.set(dToken["nStart"], dToken); |
︙ | ︙ | |||
483 484 485 486 487 488 489 | } } } } } } // token tags | | | | 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 | } } } } } } // token tags if (dToken.hasOwnProperty("aTags") && dNode.hasOwnProperty("<tags>")) { for (let sTag in dToken["aTags"]) { if (dNode["<tags>"].hasOwnProperty(sTag)) { if (bDebug) { console.log(" MATCH: /" + sTag); } yield { "iNode1": iNode1, "dNode": dGraph[dNode["<tags>"][sTag]] }; bTokenFound = true; } |
︙ | ︙ | |||
635 636 637 638 639 640 641 | } } else if (cActionType == "/") { // Tag let nTokenStart = (eAct[0] > 0) ? nTokenOffset + eAct[0] : nLastToken + eAct[0]; let nTokenEnd = (eAct[1] > 0) ? nTokenOffset + eAct[1] : nLastToken + eAct[1]; for (let i = nTokenStart; i <= nTokenEnd; i++) { | | | | | 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 | } } else if (cActionType == "/") { // Tag let nTokenStart = (eAct[0] > 0) ? nTokenOffset + eAct[0] : nLastToken + eAct[0]; let nTokenEnd = (eAct[1] > 0) ? nTokenOffset + eAct[1] : nLastToken + eAct[1]; for (let i = nTokenStart; i <= nTokenEnd; i++) { if (this.lToken[i].hasOwnProperty("aTags")) { this.lToken[i]["aTags"].add(...sWhat.split("|")) } else { this.lToken[i]["aTags"] = new Set(sWhat.split("|")); } } if (bDebug) { console.log(` TAG: ${sWhat} > [${this.lToken[nTokenStart]["sValue"]}:${this.lToken[nTokenEnd]["sValue"]}]`); } if (!this.dTags.has(sWhat)) { this.dTags.set(sWhat, [nTokenStart, nTokenStart]); |
︙ | ︙ | |||
1246 1247 1248 1249 1250 1251 1252 | if (dToken["i"] < dTags.get(sTag)[1]) { return true; } return false; } function g_tag (dToken, sTag) { | | | 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 | if (dToken["i"] < dTags.get(sTag)[1]) { return true; } return false; } function g_tag (dToken, sTag) { return dToken.hasOwnProperty("aTags") && dToken["aTags"].has(sTag); } function g_space_between_tokens (dToken1, dToken2, nMin, nMax=null) { let nSpace = dToken2["nStart"] - dToken1["nEnd"] if (nSpace < nMin) { return false; } |
︙ | ︙ |
Modified gc_core/py/lang_core/gc_engine.py from [a6af5aa7ed] to [b63b69316e].
︙ | ︙ | |||
229 230 231 232 233 234 235 | s = "===== TEXT =====\n" s += "sentence: " + self.sSentence0 + "\n" s += "now: " + self.sSentence + "\n" for dToken in self.lToken: s += '#{i}\t{nStart}:{nEnd}\t{sValue}\t{sType}'.format(**dToken) if "lMorph" in dToken: s += "\t" + str(dToken["lMorph"]) | | | | 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 | s = "===== TEXT =====\n" s += "sentence: " + self.sSentence0 + "\n" s += "now: " + self.sSentence + "\n" for dToken in self.lToken: s += '#{i}\t{nStart}:{nEnd}\t{sValue}\t{sType}'.format(**dToken) if "lMorph" in dToken: s += "\t" + str(dToken["lMorph"]) if "aTags" in dToken: s += "\t" + str(dToken["aTags"]) s += "\n" #for nPos, dToken in self.dTokenPos.items(): # s += "{}\t{}\n".format(nPos, dToken) return s def parse (self, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False): "analyses the paragraph sText and returns list of errors" |
︙ | ︙ | |||
339 340 341 342 343 344 345 | def update (self, sSentence, bDebug=False): "update <sSentence> and retokenize" self.sSentence = sSentence lNewToken = list(_oTokenizer.genTokens(sSentence, True)) for dToken in lNewToken: if "lMorph" in self.dTokenPos.get(dToken["nStart"], {}): dToken["lMorph"] = self.dTokenPos[dToken["nStart"]]["lMorph"] | | | | 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 | def update (self, sSentence, bDebug=False): "update <sSentence> and retokenize" self.sSentence = sSentence lNewToken = list(_oTokenizer.genTokens(sSentence, True)) for dToken in lNewToken: if "lMorph" in self.dTokenPos.get(dToken["nStart"], {}): dToken["lMorph"] = self.dTokenPos[dToken["nStart"]]["lMorph"] if "aTags" in self.dTokenPos.get(dToken["nStart"], {}): dToken["aTags"] = self.dTokenPos[dToken["nStart"]]["aTags"] self.lToken = lNewToken self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken if dToken["sType"] != "INFO" } if bDebug: echo("UPDATE:") echo(self) def _getNextPointers (self, dToken, dGraph, dPointer, bDebug=False): |
︙ | ︙ | |||
440 441 442 443 444 445 446 | continue if not sPattern or any(re.search(sPattern, sMorph) for sMorph in lMorph): if bDebug: echo(" MATCH: @" + sRegex) yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_morph>"][sRegex]] } bTokenFound = True # token tags | | | | 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 | continue if not sPattern or any(re.search(sPattern, sMorph) for sMorph in lMorph): if bDebug: echo(" MATCH: @" + sRegex) yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_morph>"][sRegex]] } bTokenFound = True # token tags if "aTags" in dToken and "<tags>" in dNode: for sTag in dToken["aTags"]: if sTag in dNode["<tags>"]: if bDebug: echo(" MATCH: /" + sTag) yield { "iNode1": iNode1, "dNode": dGraph[dNode["<tags>"][sTag]] } bTokenFound = True # meta arc (for token type) if "<meta>" in dNode: |
︙ | ︙ | |||
553 554 555 556 557 558 559 | echo(" COND_OK") pass elif cActionType == "/": # Tag nTokenStart = nTokenOffset + eAct[0] if eAct[0] > 0 else nLastToken + eAct[0] nTokenEnd = nTokenOffset + eAct[1] if eAct[1] > 0 else nLastToken + eAct[1] for i in range(nTokenStart, nTokenEnd+1): | | | | | 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 | echo(" COND_OK") pass elif cActionType == "/": # Tag nTokenStart = nTokenOffset + eAct[0] if eAct[0] > 0 else nLastToken + eAct[0] nTokenEnd = nTokenOffset + eAct[1] if eAct[1] > 0 else nLastToken + eAct[1] for i in range(nTokenStart, nTokenEnd+1): if "aTags" in self.lToken[i]: self.lToken[i]["aTags"].update(sWhat.split("|")) else: self.lToken[i]["aTags"] = set(sWhat.split("|")) if bDebug: echo(" TAG: {} > [{}:{}]".format(sWhat, self.lToken[nTokenStart]["sValue"], self.lToken[nTokenEnd]["sValue"])) if sWhat not in self.dTags: self.dTags[sWhat] = [nTokenStart, nTokenStart] else: self.dTags[sWhat][0] = min(nTokenStart, self.dTags[sWhat][0]) self.dTags[sWhat][1] = max(nTokenEnd, self.dTags[sWhat][1]) |
︙ | ︙ | |||
1035 1036 1037 1038 1039 1040 1041 | return False if dToken["i"] < dTags[sTag][1]: return True return False def g_tag (dToken, sTag): | | | 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 | return False if dToken["i"] < dTags[sTag][1]: return True return False def g_tag (dToken, sTag): return "aTags" in dToken and sTag in dToken["aTags"] def g_space_between_tokens (dToken1, dToken2, nMin, nMax=None): nSpace = dToken2["nStart"] - dToken1["nEnd"] if nSpace < nMin: return False if nMax is not None and nSpace > nMax: |
︙ | ︙ |