Overview
Comment: | [core] code clarification |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | core |
Files: | files | file ages | folders |
SHA3-256: |
f3ee414fa4205e6d1feadac080867af3 |
User & Date: | olr on 2019-07-01 10:24:42 |
Other Links: | manifest | tags |
Context
2019-07-01
| ||
10:36 | [build][fr] fix build_data check-in: fa6c5bd7d6 user: olr tags: trunk, fr, build | |
10:24 | [core] code clarification check-in: f3ee414fa4 user: olr tags: trunk, core | |
08:52 | [fr] ajustements check-in: 8689b3cd9b user: olr tags: trunk, fr | |
Changes
Modified gc_core/js/lang_core/gc_engine.js from [8b3f65b931] to [418e2031a5].
︙ | ︙ | |||
396 397 398 399 400 401 402 | console.log(this.asString()); } } * _getNextPointers (dToken, dGraph, dPointer, bDebug=false) { // generator: return nodes where <dToken> “values” match <dNode> arcs try { | | | | | | | | | | > | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | > | | | | | 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 | console.log(this.asString()); } } * _getNextPointers (dToken, dGraph, dPointer, bDebug=false) { // generator: return nodes where <dToken> “values” match <dNode> arcs try { let dNode = dGraph[dPointer["iNode"]]; let iToken1 = dPointer["iToken1"]; let bTokenFound = false; // token value if (dNode.hasOwnProperty(dToken["sValue"])) { if (bDebug) { console.log(" MATCH: " + dToken["sValue"]); } yield { "iToken1": iToken1, "iNode": dNode[dToken["sValue"]] }; bTokenFound = true; } if (dToken["sValue"].slice(0,2).gl_isTitle()) { // we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". let sValue = dToken["sValue"].toLowerCase(); if (dNode.hasOwnProperty(sValue)) { if (bDebug) { console.log(" MATCH: " + sValue); } yield { "iToken1": iToken1, "iNode": dNode[sValue] }; bTokenFound = true; } } else if (dToken["sValue"].gl_isUpperCase()) { let sValue = dToken["sValue"].toLowerCase(); if (dNode.hasOwnProperty(sValue)) { if (bDebug) { console.log(" MATCH: " + sValue); } yield { "iToken1": iToken1, "iNode": dNode[sValue] }; bTokenFound = true; } sValue = dToken["sValue"].gl_toCapitalize(); if (dNode.hasOwnProperty(sValue)) { if (bDebug) { console.log(" MATCH: " + sValue); } yield { "iToken1": iToken1, "iNode": dNode[sValue] }; bTokenFound = true; } } // regex value arcs if (dToken["sType"] != "INFO" && dToken["sType"] != "PUNC" && dToken["sType"] != "SIGN") { if (dNode.hasOwnProperty("<re_value>")) { for (let sRegex in dNode["<re_value>"]) { if (!sRegex.includes("¬")) { // no anti-pattern if (dToken["sValue"].search(sRegex) !== -1) { if (bDebug) { console.log(" MATCH: ~" + sRegex); } yield { "iToken1": iToken1, "iNode": dNode["<re_value>"][sRegex] }; bTokenFound = true; } } else { // there is an anti-pattern let [sPattern, sNegPattern] = sRegex.split("¬", 2); if (sNegPattern && dToken["sValue"].search(sNegPattern) !== -1) { continue; } if (!sPattern || dToken["sValue"].search(sPattern) !== -1) { if (bDebug) { console.log(" MATCH: ~" + sRegex); } yield { "iToken1": iToken1, "iNode": dNode["<re_value>"][sRegex] }; bTokenFound = true; } } } } } // analysable tokens if (dToken["sType"].slice(0,4) == "WORD") { // token lemmas if (dNode.hasOwnProperty("<lemmas>")) { for (let sLemma of _oSpellChecker.getLemma(dToken["sValue"])) { if (dNode["<lemmas>"].hasOwnProperty(sLemma)) { if (bDebug) { console.log(" MATCH: >" + sLemma); } yield { "iToken1": iToken1, "iNode": dNode["<lemmas>"][sLemma] }; bTokenFound = true; } } } // regex morph arcs if (dNode.hasOwnProperty("<re_morph>")) { let lMorph = (dToken.hasOwnProperty("lMorph")) ? dToken["lMorph"] : _oSpellChecker.getMorph(dToken["sValue"]); if (lMorph.length > 0) { for (let sRegex in dNode["<re_morph>"]) { if (!sRegex.includes("¬")) { // no anti-pattern if (lMorph.some(sMorph => (sMorph.search(sRegex) !== -1))) { if (bDebug) { console.log(" MATCH: @" + sRegex); } yield { "iToken1": iToken1, "iNode": dNode["<re_morph>"][sRegex] }; bTokenFound = true; } } else { // there is an anti-pattern let [sPattern, sNegPattern] = sRegex.split("¬", 2); if (sNegPattern == "*") { // all morphologies must match with <sPattern> if (sPattern) { if (lMorph.every(sMorph => (sMorph.search(sPattern) !== -1))) { if (bDebug) { console.log(" MATCH: @" + sRegex); } yield { "iToken1": iToken1, "iNode": dNode["<re_morph>"][sRegex] }; bTokenFound = true; } } } else { if (sNegPattern && lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { continue; } if (!sPattern || lMorph.some(sMorph => (sMorph.search(sPattern) !== -1))) { if (bDebug) { console.log(" MATCH: @" + sRegex); } yield { "iToken1": iToken1, "iNode": dNode["<re_morph>"][sRegex] }; bTokenFound = true; } } } } } } } // token tags if (dToken.hasOwnProperty("aTags") && dNode.hasOwnProperty("<tags>")) { for (let sTag of dToken["aTags"]) { if (dNode["<tags>"].hasOwnProperty(sTag)) { if (bDebug) { console.log(" MATCH: /" + sTag); } yield { "iToken1": iToken1, "iNode": dNode["<tags>"][sTag] }; bTokenFound = true; } } } // meta arc (for token type) if (dNode.hasOwnProperty("<meta>")) { for (let sMeta in dNode["<meta>"]) { // no regex here, we just search if <dNode["sType"]> exists within <sMeta> if (sMeta == "*" || dToken["sType"] == sMeta) { if (bDebug) { console.log(" MATCH: *" + sMeta); } yield { "iToken1": iToken1, "iNode": dNode["<meta>"][sMeta] }; bTokenFound = true; } else if (sMeta.includes("¬")) { if (!sMeta.includes(dToken["sType"])) { if (bDebug) { console.log(" MATCH: *" + sMeta); } yield { "iToken1": iToken1, "iNode": dNode["<meta>"][sMeta] }; bTokenFound = true; } } } } if (!bTokenFound && dPointer.hasOwnProperty("bKeep")) { yield dPointer; } // JUMP // Warning! Recurssion! if (dNode.hasOwnProperty("<>")) { let dPointer2 = { "iToken1": iToken1, "iNode": dNode["<>"], "bKeep": true }; yield* this._getNextPointers(dToken, dGraph, dPointer2, bDebug); } } catch (e) { console.error(e); } } |
︙ | ︙ | |||
587 588 589 590 591 592 593 | // check arcs for each existing pointer let lNextPointer = []; for (let dPointer of lPointer) { lNextPointer.push(...this._getNextPointers(dToken, dGraph, dPointer, bDebug)); } lPointer = lNextPointer; // check arcs of first nodes | | | | | 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 | // check arcs for each existing pointer let lNextPointer = []; for (let dPointer of lPointer) { lNextPointer.push(...this._getNextPointers(dToken, dGraph, dPointer, bDebug)); } lPointer = lNextPointer; // check arcs of first nodes lPointer.push(...this._getNextPointers(dToken, dGraph, { "iToken1": iToken, "iNode": 0 }, bDebug)); // check if there is rules to check for each pointer for (let dPointer of lPointer) { if (dGraph[dPointer["iNode"]].hasOwnProperty("<rules>")) { let bChange = this._executeActions(dGraph, dGraph[dPointer["iNode"]]["<rules>"], dPointer["iToken1"]-1, iToken, dOptions, sCountry, bShowRuleId, bDebug, bContext); if (bChange) { bTagAndRewrite = true; } } } } } catch (e) { |
︙ | ︙ |
Modified gc_core/py/lang_core/gc_engine.py from [2821b2052e] to [d5503bf3bb].
︙ | ︙ | |||
387 388 389 390 391 392 393 | self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken if dToken["sType"] != "INFO" } if bDebug: echo("UPDATE:") echo(self) def _getNextPointers (self, dToken, dGraph, dPointer, bDebug=False): "generator: return nodes where <dToken> “values” match <dNode> arcs" | | | | | | | | | | > | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 | self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken if dToken["sType"] != "INFO" } if bDebug: echo("UPDATE:") echo(self) def _getNextPointers (self, dToken, dGraph, dPointer, bDebug=False): "generator: return nodes where <dToken> “values” match <dNode> arcs" dNode = dGraph[dPointer["iNode"]] iToken1 = dPointer["iToken1"] bTokenFound = False # token value if dToken["sValue"] in dNode: if bDebug: echo(" MATCH: " + dToken["sValue"]) yield { "iToken1": iToken1, "iNode": dNode[dToken["sValue"]] } bTokenFound = True if dToken["sValue"][0:2].istitle(): # we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". sValue = dToken["sValue"].lower() if sValue in dNode: if bDebug: echo(" MATCH: " + sValue) yield { "iToken1": iToken1, "iNode": dNode[sValue] } bTokenFound = True elif dToken["sValue"].isupper(): sValue = dToken["sValue"].lower() if sValue in dNode: if bDebug: echo(" MATCH: " + sValue) yield { "iToken1": iToken1, "iNode": dNode[sValue] } bTokenFound = True sValue = dToken["sValue"].capitalize() if sValue in dNode: if bDebug: echo(" MATCH: " + sValue) yield { "iToken1": iToken1, "iNode": dNode[sValue] } bTokenFound = True # regex value arcs if dToken["sType"] not in frozenset(["INFO", "PUNC", "SIGN"]): if "<re_value>" in dNode: for sRegex in dNode["<re_value>"]: if "¬" not in sRegex: # no anti-pattern if re.search(sRegex, dToken["sValue"]): if bDebug: echo(" MATCH: ~" + sRegex) yield { "iToken1": iToken1, "iNode": dNode["<re_value>"][sRegex] } bTokenFound = True else: # there is an anti-pattern sPattern, sNegPattern = sRegex.split("¬", 1) if sNegPattern and re.search(sNegPattern, dToken["sValue"]): continue if not sPattern or re.search(sPattern, dToken["sValue"]): if bDebug: echo(" MATCH: ~" + sRegex) yield { "iToken1": iToken1, "iNode": dNode["<re_value>"][sRegex] } bTokenFound = True # analysable tokens if dToken["sType"][0:4] == "WORD": # token lemmas if "<lemmas>" in dNode: for sLemma in _oSpellChecker.getLemma(dToken["sValue"]): if sLemma in dNode["<lemmas>"]: if bDebug: echo(" MATCH: >" + sLemma) yield { "iToken1": iToken1, "iNode": dNode["<lemmas>"][sLemma] } bTokenFound = True # regex morph arcs if "<re_morph>" in dNode: lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"])) if lMorph: for sRegex in dNode["<re_morph>"]: if "¬" not in sRegex: # no anti-pattern if any(re.search(sRegex, sMorph) for sMorph in lMorph): if bDebug: echo(" MATCH: @" + sRegex) yield { "iToken1": iToken1, "iNode": dNode["<re_morph>"][sRegex] } bTokenFound = True else: # there is an anti-pattern sPattern, sNegPattern = sRegex.split("¬", 1) if sNegPattern == "*": # all morphologies must match with <sPattern> if sPattern: if all(re.search(sPattern, sMorph) for sMorph in lMorph): if bDebug: echo(" MATCH: @" + sRegex) yield { "iToken1": iToken1, "iNode": dNode["<re_morph>"][sRegex] } bTokenFound = True else: if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in lMorph): continue if not sPattern or any(re.search(sPattern, sMorph) for sMorph in lMorph): if bDebug: echo(" MATCH: @" + sRegex) yield { "iToken1": iToken1, "iNode": dNode["<re_morph>"][sRegex] } bTokenFound = True # token tags if "aTags" in dToken and "<tags>" in dNode: for sTag in dToken["aTags"]: if sTag in dNode["<tags>"]: if bDebug: echo(" MATCH: /" + sTag) yield { "iToken1": iToken1, "iNode": dNode["<tags>"][sTag] } bTokenFound = True # meta arc (for token type) if "<meta>" in dNode: for sMeta in dNode["<meta>"]: # no regex here, we just search if <dNode["sType"]> exists within <sMeta> if sMeta == "*" or dToken["sType"] == sMeta: if bDebug: echo(" MATCH: *" + sMeta) yield { "iToken1": iToken1, "iNode": dNode["<meta>"][sMeta] } bTokenFound = True elif "¬" in sMeta: if dToken["sType"] not in sMeta: if bDebug: echo(" MATCH: *" + sMeta) yield { "iToken1": iToken1, "iNode": dNode["<meta>"][sMeta] } bTokenFound = True if not bTokenFound and "bKeep" in dPointer: yield dPointer # JUMP # Warning! Recurssion! if "<>" in dNode: dPointer2 = { "iToken1": iToken1, "iNode": dNode["<>"], "bKeep": True } yield from self._getNextPointers(dToken, dGraph, dPointer2, bDebug) def parseGraph (self, dGraph, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False): "parse graph with tokens from the text and execute actions encountered" lPointer = [] bTagAndRewrite = False for iToken, dToken in enumerate(self.lToken): if bDebug: echo("TOKEN: " + dToken["sValue"]) # check arcs for each existing pointer lNextPointer = [] for dPointer in lPointer: lNextPointer.extend(self._getNextPointers(dToken, dGraph, dPointer, bDebug)) lPointer = lNextPointer # check arcs of first nodes lPointer.extend(self._getNextPointers(dToken, dGraph, { "iToken1": iToken, "iNode": 0 }, bDebug)) # check if there is rules to check for each pointer for dPointer in lPointer: #if bDebug: # echo("+", dPointer) if "<rules>" in dGraph[dPointer["iNode"]]: bChange = self._executeActions(dGraph, dGraph[dPointer["iNode"]]["<rules>"], dPointer["iToken1"]-1, iToken, dOptions, sCountry, bShowRuleId, bDebug, bContext) if bChange: bTagAndRewrite = True if bTagAndRewrite: self.rewriteFromTags(bDebug) if bDebug: echo(self) return self.sSentence |
︙ | ︙ |