Overview
Comment: | [core] gc engine: code simplification |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | core |
Files: | files | file ages | folders |
SHA3-256: |
4a607614545207738066620d2da8a1d6 |
User & Date: | olr on 2020-11-29 12:43:36 |
Other Links: | manifest | tags |
Context
2020-11-30
| ||
10:30 | [fr] ajustements: nouveau graphe check-in: 0bd6c2f979 user: olr tags: trunk, fr | |
2020-11-29
| ||
12:43 | [core] gc engine: code simplification check-in: 4a60761454 user: olr tags: trunk, core | |
12:33 | [fr] ajustements check-in: ffcde2f18e user: olr tags: trunk, fr | |
Changes
Modified gc_core/js/lang_core/gc_engine.js from [2d8b7e8dc3] to [a6cc51f719].
︙ | ︙ | |||
389 390 391 392 393 394 395 | } if (bDebug) { console.log("UPDATE:"); console.log(this.asString()); } } | | | < < < < < | < < < | < < < | < < < | < < < | < < < | < < < | | 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 | } if (bDebug) { console.log("UPDATE:"); console.log(this.asString()); } } * _getMatches (oGraph, oToken, oNode, bKeep=false) { // generator: return matches where <oToken> “values” match <oNode> arcs try { let bTokenFound = false; // token value if (oNode.hasOwnProperty(oToken["sValue"])) { yield [" ", oToken["sValue"], oNode[oToken["sValue"]]]; bTokenFound = true; } if (oToken["sValue"].slice(0,2).gl_isTitle()) { // we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". let sValue = oToken["sValue"].toLowerCase(); if (oNode.hasOwnProperty(sValue)) { yield [" ", sValue, oNode[sValue]]; bTokenFound = true; } } else if (oToken["sValue"].gl_isUpperCase()) { let sValue = oToken["sValue"].toLowerCase(); if (oNode.hasOwnProperty(sValue)) { yield [" ", sValue, oNode[sValue]]; bTokenFound = true; } sValue = oToken["sValue"].gl_toCapitalize(); if (oNode.hasOwnProperty(sValue)) { yield [" ", sValue, oNode[sValue]]; bTokenFound = true; } } // regex value arcs if (oToken["sType"] != "INFO" && oToken["sType"] != "PUNC" && oToken["sType"] != "SIGN") { if (oNode.hasOwnProperty("<re_value>")) { for (let sRegex in oNode["<re_value>"]) { if (!sRegex.includes("¬")) { // no anti-pattern if (oToken["sValue"].search(sRegex) !== -1) { yield ["~", sRegex, oNode["<re_value>"][sRegex]]; bTokenFound = true; } } else { // there is an anti-pattern let [sPattern, sNegPattern] = sRegex.split("¬", 2); if (sNegPattern && oToken["sValue"].search(sNegPattern) !== -1) { continue; } if (!sPattern || oToken["sValue"].search(sPattern) !== -1) { yield ["~", sRegex, oNode["<re_value>"][sRegex]]; bTokenFound = true; } } } } } // analysable tokens if (oToken["sType"].slice(0,4) == "WORD") { // token lemmas if (oNode.hasOwnProperty("<lemmas>")) { for (let sLemma of gc_engine.oSpellChecker.getLemma(oToken["sValue"])) { if (oNode["<lemmas>"].hasOwnProperty(sLemma)) { yield [">", sLemma, oNode["<lemmas>"][sLemma]]; bTokenFound = true; } } } // phonetic similarity if (oNode.hasOwnProperty("<phonet>")) { for (let sPhonet in oNode["<phonet>"]) { |
︙ | ︙ | |||
493 494 495 496 497 498 499 | } if (oToken["sValue"].gl_isUpperCase() && oToken["sValue"].gl_toCapitalize() == sPhon) { continue; } } } if (phonet.isSimilAs(oToken["sValue"], sPhonet.gl_trimRight("!"))) { | < < < | < < < | < < < | < < < | < < < | < < < | < < < | < < < | < < < | < < < | | | < | > > | > > > > > > > > > > > > > > | > | 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 | } if (oToken["sValue"].gl_isUpperCase() && oToken["sValue"].gl_toCapitalize() == sPhon) { continue; } } } if (phonet.isSimilAs(oToken["sValue"], sPhonet.gl_trimRight("!"))) { yield ["#", sPhonet, oNode["<phonet>"][sPhonet]]; bTokenFound = true; } } } // morph arcs if (oNode.hasOwnProperty("<morph>")) { let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : gc_engine.oSpellChecker.getMorph(oToken["sValue"]); if (lMorph.length > 0) { for (let sSearch in oNode["<morph>"]) { if (!sSearch.includes("¬")) { // no anti-pattern if (lMorph.some(sMorph => (sMorph.includes(sSearch)))) { yield ["$", sSearch, oNode["<morph>"][sSearch]]; bTokenFound = true; } } else { // there is an anti-pattern let [sPattern, sNegPattern] = sSearch.split("¬", 2); if (sNegPattern == "*") { // all morphologies must match with <sPattern> if (sPattern) { if (lMorph.every(sMorph => (sMorph.includes(sPattern)))) { yield ["$", sSearch, oNode["<morph>"][sSearch]]; bTokenFound = true; } } } else { if (sNegPattern && lMorph.some(sMorph => (sMorph.includes(sNegPattern)))) { continue; } if (!sPattern || lMorph.some(sMorph => (sMorph.includes(sPattern)))) { yield ["$", sSearch, oNode["<morph>"][sSearch]]; bTokenFound = true; } } } } } } // regex morph arcs if (oNode.hasOwnProperty("<re_morph>")) { let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : gc_engine.oSpellChecker.getMorph(oToken["sValue"]); if (lMorph.length > 0) { for (let sRegex in oNode["<re_morph>"]) { if (!sRegex.includes("¬")) { // no anti-pattern if (lMorph.some(sMorph => (sMorph.search(sRegex) !== -1))) { yield ["@", sRegex, oNode["<re_morph>"][sRegex]]; bTokenFound = true; } } else { // there is an anti-pattern let [sPattern, sNegPattern] = sRegex.split("¬", 2); if (sNegPattern == "*") { // all morphologies must match with <sPattern> if (sPattern) { if (lMorph.every(sMorph => (sMorph.search(sPattern) !== -1))) { yield ["@", sRegex, oNode["<re_morph>"][sRegex]]; bTokenFound = true; } } } else { if (sNegPattern && lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { continue; } if (!sPattern || lMorph.some(sMorph => (sMorph.search(sPattern) !== -1))) { yield ["@", sRegex, oNode["<re_morph>"][sRegex]]; bTokenFound = true; } } } } } } } // token tags if (oToken.hasOwnProperty("aTags") && oNode.hasOwnProperty("<tags>")) { for (let sTag of oToken["aTags"]) { if (oNode["<tags>"].hasOwnProperty(sTag)) { yield ["/", sTag, oNode["<tags>"][sTag]]; bTokenFound = true; } } } // meta arc (for token type) if (oNode.hasOwnProperty("<meta>")) { for (let sMeta in oNode["<meta>"]) { // no regex here, we just search if <oNode["sType"]> exists within <sMeta> if (sMeta == "*" || oToken["sType"] == sMeta) { yield ["*", sMeta, oNode["<meta>"][sMeta]]; bTokenFound = true; } else if (sMeta.includes("¬")) { if (!sMeta.includes(oToken["sType"])) { yield ["*", sMeta, oNode["<meta>"][sMeta]]; bTokenFound = true; } } } } if (!bTokenFound && bKeep) { yield [null, "", -1]; } // JUMP // Warning! Recurssion! if (oNode.hasOwnProperty("<>")) { yield* this._getMatches(oGraph, oToken, oGraph[oNode["<>"]], bKeep=true); } } catch (e) { console.error(e); } } parseGraph (oGraph, sCountry="${country_default}", dOptions=null, bShowRuleId=false, bDebug=false, bContext=false) { // parse graph with tokens from the text and execute actions encountered let lPointer = []; let bTagAndRewrite = false; try { for (let [iToken, oToken] of this.lTokens.entries()) { if (bDebug) { console.log("TOKEN: " + oToken["sValue"]); } // check arcs for each existing pointer let lNextPointer = []; for (let oPointer of lPointer) { for (let [cActionType, sMatch, iNode] of this._getMatches(oGraph, oToken, oGraph[oPointer["iNode"]])) { if (cActionType === null) { lNextPointer.push(oPointer); continue; } if (bDebug) { console.log(" MATCH: " + cActionType + sMatch); } lNextPointer.push({ "iToken1": oPointer["iToken1"], "iNode": iNode }); } } lPointer = lNextPointer; // check arcs of first nodes for (let [cActionType, sMatch, iNode] of this._getMatches(oGraph, oToken, oGraph[0])) { if (cActionType === null) { continue; } if (bDebug) { console.log(" MATCH: " + cActionType + sMatch); } lPointer.push({ "iToken1": iToken, "iNode": iNode }); } // check if there is rules to check for each pointer for (let oPointer of lPointer) { if (oGraph[oPointer["iNode"]].hasOwnProperty("<rules>")) { let bChange = this._executeActions(oGraph, oGraph[oPointer["iNode"]]["<rules>"], oPointer["iToken1"]-1, iToken, dOptions, sCountry, bShowRuleId, bDebug, bContext); if (bChange) { bTagAndRewrite = true; } |
︙ | ︙ |
Modified gc_core/py/lang_core/gc_engine.py from [f9382d41bb] to [da8987b870].
︙ | ︙ | |||
396 397 398 399 400 401 402 | dToken["aTags"] = self.dTokenPos[dToken["nStart"]]["aTags"] self.lTokens = lNewTokens self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lTokens if dToken["sType"] != "INFO" } if bDebug: echo("UPDATE:") echo(self) | | | < < < < | < < | < < | < < | < < | < < | < < | < < | < < | < < | < < | < < | < < | < < | < < | < < | < < | | | | < | > > | > > > > > > > > > | | 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 | dToken["aTags"] = self.dTokenPos[dToken["nStart"]]["aTags"] self.lTokens = lNewTokens self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lTokens if dToken["sType"] != "INFO" } if bDebug: echo("UPDATE:") echo(self) def _getMatches (self, dGraph, dToken, dNode, bKeep=False): "generator: return matches where <dToken> “values” match <dNode> arcs" bTokenFound = False # token value if dToken["sValue"] in dNode: yield (" ", dToken["sValue"], dNode[dToken["sValue"]]) bTokenFound = True if dToken["sValue"][0:2].istitle(): # we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". sValue = dToken["sValue"].lower() if sValue in dNode: yield (" ", sValue, dNode[sValue]) bTokenFound = True elif dToken["sValue"].isupper(): sValue = dToken["sValue"].lower() if sValue in dNode: yield (" ", sValue, dNode[sValue]) bTokenFound = True sValue = dToken["sValue"].capitalize() if sValue in dNode: yield (" ", sValue, dNode[sValue]) bTokenFound = True # regex value arcs if dToken["sType"] not in frozenset(["INFO", "PUNC", "SIGN"]): if "<re_value>" in dNode: for sRegex in dNode["<re_value>"]: if "¬" not in sRegex: # no anti-pattern if re.search(sRegex, dToken["sValue"]): yield ("~", sRegex, dNode["<re_value>"][sRegex]) bTokenFound = True else: # there is an anti-pattern sPattern, sNegPattern = sRegex.split("¬", 1) if sNegPattern and re.search(sNegPattern, dToken["sValue"]): continue if not sPattern or re.search(sPattern, dToken["sValue"]): yield ("~", sRegex, dNode["<re_value>"][sRegex]) bTokenFound = True # analysable tokens if dToken["sType"][0:4] == "WORD": # token lemmas if "<lemmas>" in dNode: for sLemma in _oSpellChecker.getLemma(dToken["sValue"]): if sLemma in dNode["<lemmas>"]: yield (">", sLemma, dNode["<lemmas>"][sLemma]) bTokenFound = True # phonetic similarity if "<phonet>" in dNode: for sPhonet in dNode["<phonet>"]: if sPhonet.endswith("!"): sPhon = sPhonet[0:-1] if dToken["sValue"] == sPhon: continue if dToken["sValue"][0:1].isupper(): if dToken["sValue"].lower() == sPhon: continue if dToken["sValue"].isupper() and dToken["sValue"].capitalize() == sPhon: continue if phonet.isSimilAs(dToken["sValue"], sPhonet.rstrip("!")): yield ("#", sPhonet, dNode["<phonet>"][sPhonet]) bTokenFound = True # morph arcs if "<morph>" in dNode: lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"])) if lMorph: for sSearch in dNode["<morph>"]: if "¬" not in sSearch: # no anti-pattern if any(sSearch in sMorph for sMorph in lMorph): yield ("$", sSearch, dNode["<morph>"][sSearch]) bTokenFound = True else: # there is an anti-pattern sPattern, sNegPattern = sSearch.split("¬", 1) if sNegPattern == "*": # all morphologies must match with <sPattern> if sPattern: if all(sPattern in sMorph for sMorph in lMorph): yield ("$", sSearch, dNode["<morph>"][sSearch]) bTokenFound = True else: if sNegPattern and any(sNegPattern in sMorph for sMorph in lMorph): continue if not sPattern or any(sPattern in sMorph for sMorph in lMorph): yield ("$", sSearch, dNode["<morph>"][sSearch]) bTokenFound = True # regex morph arcs if "<re_morph>" in dNode: lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"])) if lMorph: for sRegex in dNode["<re_morph>"]: if "¬" not in sRegex: # no anti-pattern if any(re.search(sRegex, sMorph) for sMorph in lMorph): yield ("@", sRegex, dNode["<re_morph>"][sRegex]) bTokenFound = True else: # there is an anti-pattern sPattern, sNegPattern = sRegex.split("¬", 1) if sNegPattern == "*": # all morphologies must match with <sPattern> if sPattern: if all(re.search(sPattern, sMorph) for sMorph in lMorph): yield ("@", sRegex, dNode["<re_morph>"][sRegex]) bTokenFound = True else: if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in lMorph): continue if not sPattern or any(re.search(sPattern, sMorph) for sMorph in lMorph): yield ("@", sRegex, dNode["<re_morph>"][sRegex]) bTokenFound = True # token tags if "aTags" in dToken and "<tags>" in dNode: for sTag in dToken["aTags"]: if sTag in dNode["<tags>"]: yield ("/", sTag, dNode["<tags>"][sTag]) bTokenFound = True # meta arc (for token type) if "<meta>" in dNode: for sMeta in dNode["<meta>"]: # no regex here, we just search if <dNode["sType"]> exists within <sMeta> if sMeta == "*" or dToken["sType"] == sMeta: yield ("*", sMeta, dNode["<meta>"][sMeta]) bTokenFound = True elif "¬" in sMeta: if dToken["sType"] not in sMeta: yield ("*", sMeta, dNode["<meta>"][sMeta]) bTokenFound = True if not bTokenFound and bKeep: yield (None, "", -1) # JUMP # Warning! Recursion! if "<>" in dNode: yield from self._getMatches(dGraph, dToken, dGraph[dNode["<>"]], bKeep=True) def parseGraph (self, dGraph, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False): "parse graph with tokens from the text and execute actions encountered" lPointer = [] bTagAndRewrite = False for iToken, dToken in enumerate(self.lTokens): if bDebug: echo("TOKEN: " + dToken["sValue"]) # check arcs for each existing pointer lNextPointer = [] for dPointer in lPointer: for cActionType, sMatch, iNode in self._getMatches(dGraph, dToken, dGraph[dPointer["iNode"]]): if cActionType is None: lNextPointer.append(dPointer) continue if bDebug: echo(" MATCH: " + cActionType + sMatch) lNextPointer.append({ "iToken1": dPointer["iToken1"], "iNode": iNode }) lPointer = lNextPointer # check arcs of first nodes for cActionType, sMatch, iNode in self._getMatches(dGraph, dToken, dGraph[0]): if cActionType is None: continue if bDebug: echo(" MATCH: " + cActionType + sMatch) lPointer.append({ "iToken1": iToken, "iNode": iNode }) # check if there is rules to check for each pointer for dPointer in lPointer: #if bDebug: # echo("+", dPointer) if "<rules>" in dGraph[dPointer["iNode"]]: bChange = self._executeActions(dGraph, dGraph[dPointer["iNode"]]["<rules>"], dPointer["iToken1"]-1, iToken, dOptions, sCountry, bShowRuleId, bDebug, bContext) if bChange: |
︙ | ︙ |
Modified gc_lang/fr/perf_memo.txt from [5cb40c8830] to [d10e676dcc].
︙ | ︙ | |||
31 32 33 34 35 36 37 | 1.9.0 2020.04.20 19:57 1.51183 0.369546 0.25681 0.0734314 0.0764396 0.0785668 0.183922 0.103674 0.0185812 0.002099 (NFC normalization) 1.9.2 2020.05.12 08:43 1.62465 0.398831 0.273012 0.0810811 0.080937 0.0845885 0.204133 0.114146 0.0212864 0.0029547 1.12.2 2020.09.09 13:34 1.50568 0.374504 0.233108 0.0798712 0.0804466 0.0769674 0.171519 0.0945132 0.0165344 0.0019474 1.12.2 2020.09.09 13:35 1.41094 0.359093 0.236443 0.06968 0.0734418 0.0738087 0.169371 0.0946279 0.0167106 0.0019773 1.12.2 2020.09.11 19:16 1.35297 0.330545 0.221731 0.0666998 0.0692539 0.0701707 0.160564 0.0891676 0.015807 0.0045998 1.12.2 2020.09.30 14:50 1.37531 0.330381 0.226012 0.0668063 0.0690574 0.0694727 0.160282 0.0929373 0.0176629 0.0019713 1.12.2 2020.09.30 17:01 1.37168 0.329009 0.248127 0.0670758 0.0701238 0.0910568 0.170556 0.093876 0.0168925 0.0020051 | | > | 31 32 33 34 35 36 37 38 39 | 1.9.0 2020.04.20 19:57 1.51183 0.369546 0.25681 0.0734314 0.0764396 0.0785668 0.183922 0.103674 0.0185812 0.002099 (NFC normalization) 1.9.2 2020.05.12 08:43 1.62465 0.398831 0.273012 0.0810811 0.080937 0.0845885 0.204133 0.114146 0.0212864 0.0029547 1.12.2 2020.09.09 13:34 1.50568 0.374504 0.233108 0.0798712 0.0804466 0.0769674 0.171519 0.0945132 0.0165344 0.0019474 1.12.2 2020.09.09 13:35 1.41094 0.359093 0.236443 0.06968 0.0734418 0.0738087 0.169371 0.0946279 0.0167106 0.0019773 1.12.2 2020.09.11 19:16 1.35297 0.330545 0.221731 0.0666998 0.0692539 0.0701707 0.160564 0.0891676 0.015807 0.0045998 1.12.2 2020.09.30 14:50 1.37531 0.330381 0.226012 0.0668063 0.0690574 0.0694727 0.160282 0.0929373 0.0176629 0.0019713 1.12.2 2020.09.30 17:01 1.37168 0.329009 0.248127 0.0670758 0.0701238 0.0910568 0.170556 0.093876 0.0168925 0.0020051 1.12.2 2020.10.01 11:18 1.36493 0.34176 0.24473 0.0691607 0.0720002 0.0903613 0.170067 0.0934571 0.0174357 0.0019585 2.0.0 2020.11.29 00:00 1.27748 0.320919 0.227774 0.0649503 0.0688481 0.0672859 0.163426 0.0878984 0.016784 0.0018913 |