Overview
| Comment: | [core] rename parameter tags -> aTags |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | core | rg |
| Files: | files | file ages | folders |
| SHA3-256: |
5a15898d113cdcbf3ade04bd4ea0e563 |
| User & Date: | olr on 2018-09-12 10:38:22 |
| Other Links: | branch diff | manifest | tags |
Context
|
2018-09-12
| ||
| 17:21 | [core][js] gc engine: fix several issues check-in: 900ff7dc44 user: olr tags: core, rg | |
| 10:38 | [core] rename parameter tags -> aTags check-in: 5a15898d11 user: olr tags: core, rg | |
| 09:49 | [core][js] small code cleaning check-in: 05090f91ec user: olr tags: core, rg | |
Changes
Modified gc_core/js/lang_core/gc_engine.js from [225ec72077] to [eabdda0297].
| ︙ | ︙ | |||
183 184 185 186 187 188 189 |
s += "sentence: " + this.sSentence0 + "\n";
s += "now: " + this.sSentence + "\n";
for (let dToken of this.lToken) {
s += `#${dToken["i"]}\t${dToken["nStart"]}:${dToken["nEnd"]}\t${dToken["sValue"]}\t${dToken["sType"]}`;
if (dToken.hasOwnProperty("lMorph")) {
s += "\t" + dToken["lMorph"].toString();
}
| | | | 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 |
s += "sentence: " + this.sSentence0 + "\n";
s += "now: " + this.sSentence + "\n";
for (let dToken of this.lToken) {
s += `#${dToken["i"]}\t${dToken["nStart"]}:${dToken["nEnd"]}\t${dToken["sValue"]}\t${dToken["sType"]}`;
if (dToken.hasOwnProperty("lMorph")) {
s += "\t" + dToken["lMorph"].toString();
}
if (dToken.hasOwnProperty("aTags")) {
s += "\t" + dToken["aTags"].toString();
}
s += "\n";
}
return s;
}
parse (sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false) {
|
| ︙ | ︙ | |||
336 337 338 339 340 341 342 |
// update <sSentence> and retokenize
this.sSentence = sSentence;
let lNewToken = Array.from(_oTokenizer.genTokens(sSentence, true));
for (let dToken of lNewToken) {
if (this.dTokenPos.gl_get(dToken["nStart"], {}).hasOwnProperty("lMorph")) {
dToken["lMorph"] = this.dTokenPos.get(dToken["nStart"])["lMorph"];
}
| | | | 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 |
// update <sSentence> and retokenize
this.sSentence = sSentence;
let lNewToken = Array.from(_oTokenizer.genTokens(sSentence, true));
for (let dToken of lNewToken) {
if (this.dTokenPos.gl_get(dToken["nStart"], {}).hasOwnProperty("lMorph")) {
dToken["lMorph"] = this.dTokenPos.get(dToken["nStart"])["lMorph"];
}
if (this.dTokenPos.gl_get(dToken["nStart"], {}).hasOwnProperty("aTags")) {
dToken["aTags"] = this.dTokenPos.get(dToken["nStart"])["aTags"];
}
}
this.lToken = lNewToken;
this.dTokenPos.clear();
for (let dToken of this.lToken) {
if (dToken["sType"] != "INFO") {
this.dTokenPos.set(dToken["nStart"], dToken);
|
| ︙ | ︙ | |||
483 484 485 486 487 488 489 |
}
}
}
}
}
}
// token tags
| | | | 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 |
}
}
}
}
}
}
// token tags
if (dToken.hasOwnProperty("aTags") && dNode.hasOwnProperty("<tags>")) {
for (let sTag in dToken["aTags"]) {
if (dNode["<tags>"].hasOwnProperty(sTag)) {
if (bDebug) {
console.log(" MATCH: /" + sTag);
}
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<tags>"][sTag]] };
bTokenFound = true;
}
|
| ︙ | ︙ | |||
635 636 637 638 639 640 641 |
}
}
else if (cActionType == "/") {
// Tag
let nTokenStart = (eAct[0] > 0) ? nTokenOffset + eAct[0] : nLastToken + eAct[0];
let nTokenEnd = (eAct[1] > 0) ? nTokenOffset + eAct[1] : nLastToken + eAct[1];
for (let i = nTokenStart; i <= nTokenEnd; i++) {
| | | | | 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 |
}
}
else if (cActionType == "/") {
// Tag
let nTokenStart = (eAct[0] > 0) ? nTokenOffset + eAct[0] : nLastToken + eAct[0];
let nTokenEnd = (eAct[1] > 0) ? nTokenOffset + eAct[1] : nLastToken + eAct[1];
for (let i = nTokenStart; i <= nTokenEnd; i++) {
if (this.lToken[i].hasOwnProperty("aTags")) {
this.lToken[i]["aTags"].add(...sWhat.split("|"))
} else {
this.lToken[i]["aTags"] = new Set(sWhat.split("|"));
}
}
if (bDebug) {
console.log(` TAG: ${sWhat} > [${this.lToken[nTokenStart]["sValue"]}:${this.lToken[nTokenEnd]["sValue"]}]`);
}
if (!this.dTags.has(sWhat)) {
this.dTags.set(sWhat, [nTokenStart, nTokenStart]);
|
| ︙ | ︙ | |||
1246 1247 1248 1249 1250 1251 1252 |
if (dToken["i"] < dTags.get(sTag)[1]) {
return true;
}
return false;
}
function g_tag (dToken, sTag) {
| | | 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 |
if (dToken["i"] < dTags.get(sTag)[1]) {
return true;
}
return false;
}
function g_tag (dToken, sTag) {
return dToken.hasOwnProperty("aTags") && dToken["aTags"].has(sTag);
}
function g_space_between_tokens (dToken1, dToken2, nMin, nMax=null) {
let nSpace = dToken2["nStart"] - dToken1["nEnd"]
if (nSpace < nMin) {
return false;
}
|
| ︙ | ︙ |
Modified gc_core/py/lang_core/gc_engine.py from [a6af5aa7ed] to [b63b69316e].
| ︙ | ︙ | |||
229 230 231 232 233 234 235 |
s = "===== TEXT =====\n"
s += "sentence: " + self.sSentence0 + "\n"
s += "now: " + self.sSentence + "\n"
for dToken in self.lToken:
s += '#{i}\t{nStart}:{nEnd}\t{sValue}\t{sType}'.format(**dToken)
if "lMorph" in dToken:
s += "\t" + str(dToken["lMorph"])
| | | | 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 |
s = "===== TEXT =====\n"
s += "sentence: " + self.sSentence0 + "\n"
s += "now: " + self.sSentence + "\n"
for dToken in self.lToken:
s += '#{i}\t{nStart}:{nEnd}\t{sValue}\t{sType}'.format(**dToken)
if "lMorph" in dToken:
s += "\t" + str(dToken["lMorph"])
if "aTags" in dToken:
s += "\t" + str(dToken["aTags"])
s += "\n"
#for nPos, dToken in self.dTokenPos.items():
# s += "{}\t{}\n".format(nPos, dToken)
return s
def parse (self, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
"analyses the paragraph sText and returns list of errors"
|
| ︙ | ︙ | |||
339 340 341 342 343 344 345 |
def update (self, sSentence, bDebug=False):
"update <sSentence> and retokenize"
self.sSentence = sSentence
lNewToken = list(_oTokenizer.genTokens(sSentence, True))
for dToken in lNewToken:
if "lMorph" in self.dTokenPos.get(dToken["nStart"], {}):
dToken["lMorph"] = self.dTokenPos[dToken["nStart"]]["lMorph"]
| | | | 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 |
def update (self, sSentence, bDebug=False):
"update <sSentence> and retokenize"
self.sSentence = sSentence
lNewToken = list(_oTokenizer.genTokens(sSentence, True))
for dToken in lNewToken:
if "lMorph" in self.dTokenPos.get(dToken["nStart"], {}):
dToken["lMorph"] = self.dTokenPos[dToken["nStart"]]["lMorph"]
if "aTags" in self.dTokenPos.get(dToken["nStart"], {}):
dToken["aTags"] = self.dTokenPos[dToken["nStart"]]["aTags"]
self.lToken = lNewToken
self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken if dToken["sType"] != "INFO" }
if bDebug:
echo("UPDATE:")
echo(self)
def _getNextPointers (self, dToken, dGraph, dPointer, bDebug=False):
|
| ︙ | ︙ | |||
440 441 442 443 444 445 446 |
continue
if not sPattern or any(re.search(sPattern, sMorph) for sMorph in lMorph):
if bDebug:
echo(" MATCH: @" + sRegex)
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_morph>"][sRegex]] }
bTokenFound = True
# token tags
| | | | 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 |
continue
if not sPattern or any(re.search(sPattern, sMorph) for sMorph in lMorph):
if bDebug:
echo(" MATCH: @" + sRegex)
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_morph>"][sRegex]] }
bTokenFound = True
# token tags
if "aTags" in dToken and "<tags>" in dNode:
for sTag in dToken["aTags"]:
if sTag in dNode["<tags>"]:
if bDebug:
echo(" MATCH: /" + sTag)
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<tags>"][sTag]] }
bTokenFound = True
# meta arc (for token type)
if "<meta>" in dNode:
|
| ︙ | ︙ | |||
553 554 555 556 557 558 559 |
echo(" COND_OK")
pass
elif cActionType == "/":
# Tag
nTokenStart = nTokenOffset + eAct[0] if eAct[0] > 0 else nLastToken + eAct[0]
nTokenEnd = nTokenOffset + eAct[1] if eAct[1] > 0 else nLastToken + eAct[1]
for i in range(nTokenStart, nTokenEnd+1):
| | | | | 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 |
echo(" COND_OK")
pass
elif cActionType == "/":
# Tag
nTokenStart = nTokenOffset + eAct[0] if eAct[0] > 0 else nLastToken + eAct[0]
nTokenEnd = nTokenOffset + eAct[1] if eAct[1] > 0 else nLastToken + eAct[1]
for i in range(nTokenStart, nTokenEnd+1):
if "aTags" in self.lToken[i]:
self.lToken[i]["aTags"].update(sWhat.split("|"))
else:
self.lToken[i]["aTags"] = set(sWhat.split("|"))
if bDebug:
echo(" TAG: {} > [{}:{}]".format(sWhat, self.lToken[nTokenStart]["sValue"], self.lToken[nTokenEnd]["sValue"]))
if sWhat not in self.dTags:
self.dTags[sWhat] = [nTokenStart, nTokenStart]
else:
self.dTags[sWhat][0] = min(nTokenStart, self.dTags[sWhat][0])
self.dTags[sWhat][1] = max(nTokenEnd, self.dTags[sWhat][1])
|
| ︙ | ︙ | |||
1035 1036 1037 1038 1039 1040 1041 |
return False
if dToken["i"] < dTags[sTag][1]:
return True
return False
def g_tag (dToken, sTag):
| | | 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 |
return False
if dToken["i"] < dTags[sTag][1]:
return True
return False
def g_tag (dToken, sTag):
return "aTags" in dToken and sTag in dToken["aTags"]
def g_space_between_tokens (dToken1, dToken2, nMin, nMax=None):
nSpace = dToken2["nStart"] - dToken1["nEnd"]
if nSpace < nMin:
return False
if nMax is not None and nSpace > nMax:
|
| ︙ | ︙ |