Overview
Comment: | [core] gc engine: small debugging readability improvement |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | core | rg |
Files: | files | file ages | folders |
SHA3-256: |
acd785f13d603384d4e05e9f89acbaff |
User & Date: | olr on 2018-06-21 11:37:01 |
Other Links: | branch diff | manifest | tags |
Context
2018-06-22
| ||
06:39 | [fr] conversion: regex rules -> graph rules check-in: 37884ecdd4 user: olr tags: fr, rg | |
2018-06-21
| ||
11:37 | [core] gc engine: small debugging readability improvement check-in: acd785f13d user: olr tags: core, rg | |
09:11 | [core][py] quick hack to avoid stupidity of .istitle() check-in: d283df68f1 user: olr tags: core, rg | |
Changes
Modified gc_core/py/lang_core/gc_engine.py from [889885e93a] to [d166a26ff3].
︙ | ︙ | |||
597 598 599 600 601 602 603 | self.lToken = list(_oTokenizer.genTokens(sSentence, True)) def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False): "generator: return nodes where <dToken> “values” match <dNode> arcs" # token value if dToken["sValue"] in dNode: if bDebug: | | | | | | | | | | | | 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 | self.lToken = list(_oTokenizer.genTokens(sSentence, True)) def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False): "generator: return nodes where <dToken> “values” match <dNode> arcs" # token value if dToken["sValue"] in dNode: if bDebug: print(" MATCH:", dToken["sValue"]) yield dGraph[dNode[dToken["sValue"]]] if dToken["sValue"][0:2].istitle(): # we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". sValue = dToken["sValue"].lower() if sValue in dNode: if bDebug: print(" MATCH:", sValue) yield dGraph[dNode[sValue]] elif dToken["sValue"].isupper(): sValue = dToken["sValue"].lower() if sValue in dNode: if bDebug: print(" MATCH:", sValue) yield dGraph[dNode[sValue]] sValue = dToken["sValue"].capitalize() if sValue in dNode: if bDebug: print(" MATCH:", sValue) yield dGraph[dNode[sValue]] # token lemmas if "<lemmas>" in dNode: for sLemma in _oSpellChecker.getLemma(dToken["sValue"]): if sLemma in dNode["<lemmas>"]: if bDebug: print(" MATCH: >" + sLemma) yield dGraph[dNode["<lemmas>"][sLemma]] # universal arc if "*" in dNode: if bDebug: print(" MATCH: *") yield dGraph[dNode["*"]] # regex value arcs if "<re_value>" in dNode: for sRegex in dNode["<re_value>"]: if re.search(sRegex, dToken["sValue"]): if bDebug: print(" MATCH: ~" + sRegex) yield dGraph[dNode["<re_value>"][sRegex]] # regex morph arcs if "<re_morph>" in dNode: for sRegex in dNode["<re_morph>"]: if "¬" not in sRegex: # no anti-pattern if any(re.search(sRegex, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): if bDebug: print(" MATCH: @" + sRegex) yield dGraph[dNode["<re_morph>"][sRegex]] else: # there is an anti-pattern sPattern, sNegPattern = sRegex.split("¬", 1) if sNegPattern == "*": # all morphologies must match with <sPattern> if all(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): if bDebug: print(" MATCH: @" + sRegex) yield dGraph[dNode["<re_morph>"][sRegex]] else: if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): continue if any(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): if bDebug: print(" MATCH: @" + sRegex) yield dGraph[dNode["<re_morph>"][sRegex]] def parse (self, dGraph, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False): dErr = {} dPriority = {} # Key = position; value = priority dOpt = _dOptions if not dOptions else dOptions lPointer = [] |
︙ | ︙ |