Overview
Comment: | [core] gc engine: don’t analyse lemmas and morphologies of tokens who don’t have such things |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | core | rg |
Files: | files | file ages | folders |
SHA3-256: |
032aff09bb3825e33cc9a6241097fa86 |
User & Date: | olr on 2018-06-28 08:54:22 |
Other Links: | branch diff | manifest | tags |
Context
2018-06-29
| ||
09:27 | [fr] remove test graph check-in: f3fb6556ae user: olr tags: fr, rg | |
2018-06-28
| ||
08:54 | [core] gc engine: don’t analyse lemmas and morphologies of tokens who don’t have such things check-in: 032aff09bb user: olr tags: core, rg | |
08:26 | [graphspell][core] tokenizer: rename ACRONYM tokens to WORD_ACRONYM check-in: ccbbecbd1b user: olr tags: core, graphspell, rg | |
Changes
Modified gc_core/py/lang_core/gc_engine.py from [2db7047948] to [eca6b550ae].
︙ | ︙ | |||
601 602 603 604 605 606 607 | print(" MATCH:", sValue) yield dGraph[dNode[sValue]] sValue = dToken["sValue"].capitalize() if sValue in dNode: if bDebug: print(" MATCH:", sValue) yield dGraph[dNode[sValue]] | < < < < < < < > > > > > > > > > | | | | | | | | | | | | | | | | | | | | | | | | | | 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 | print(" MATCH:", sValue) yield dGraph[dNode[sValue]] sValue = dToken["sValue"].capitalize() if sValue in dNode: if bDebug: print(" MATCH:", sValue) yield dGraph[dNode[sValue]] # regex value arcs if "<re_value>" in dNode: for sRegex in dNode["<re_value>"]: if "¬" not in sRegex: # no anti-pattern if re.search(sRegex, dToken["sValue"]): if bDebug: print(" MATCH: ~" + sRegex) yield dGraph[dNode["<re_value>"][sRegex]] else: # there is an anti-pattern sPattern, sNegPattern = sRegex.split("¬", 1) if sNegPattern and re.search(sNegPattern, dToken["sValue"]): continue if not sPattern or re.search(sPattern, dToken["sValue"]): if bDebug: print(" MATCH: ~" + sRegex) yield dGraph[dNode["<re_value>"][sRegex]] # analysable tokens if dToken["sType"][0:4] == "WORD": # token lemmas if "<lemmas>" in dNode: for sLemma in _oSpellChecker.getLemma(dToken["sValue"]): if sLemma in dNode["<lemmas>"]: if bDebug: print(" MATCH: >" + sLemma) yield dGraph[dNode["<lemmas>"][sLemma]] # regex morph arcs if "<re_morph>" in dNode: for sRegex in dNode["<re_morph>"]: if "¬" not in sRegex: # no anti-pattern if any(re.search(sRegex, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): if bDebug: print(" MATCH: @" + sRegex) yield dGraph[dNode["<re_morph>"][sRegex]] else: # there is an anti-pattern sPattern, sNegPattern = sRegex.split("¬", 1) if sNegPattern == "*": # all morphologies must match with <sPattern> if sPattern and all(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): if bDebug: print(" MATCH: @" + sRegex) yield dGraph[dNode["<re_morph>"][sRegex]] else: if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): continue if not sPattern or any(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): if bDebug: print(" MATCH: @" + sRegex) yield dGraph[dNode["<re_morph>"][sRegex]] # meta arc (for token type) if "<meta>" in dNode: for sMeta in dNode["<meta>"]: # not regex here, we just search if <dNode["sType"]> exists within <sMeta> if sMeta == "*": if bDebug: print(" MATCH: *" + sMeta) |
︙ | ︙ |