Grammalecte  Check-in [102180fb1d]

Overview
Comment:[core] gc engine update
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: 102180fb1d85ea5fc89ea6df5e70e5434b34060c092051ab053d51ef130a9e01
User & Date: olr on 2018-06-01 10:51:41
Other Links: branch diff | manifest | tags
Context
2018-06-02
13:47
[graphspell] tokenizer: add option for <start> and <end> tokens check-in: 3339da6424 user: olr tags: graphspell, rg
2018-06-01
10:51
[core] gc engine update check-in: 102180fb1d user: olr tags: core, rg
2018-05-29
16:17
[core] gc engine update check-in: c06b45b671 user: olr tags: core, rg
Changes

Modified gc_core/py/lang_core/gc_engine.py from [03b818b7a2] to [c9f32df1df].

76
77
78
79
80
81
82
83


84
85
86
87
88
89
90
76
77
78
79
80
81
82

83
84
85
86
87
88
89
90
91







-
+
+







            dDA.clear()
            try:
                # regex parser
                _, errs = _proofread(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dDA, dPriority, sCountry, dOpt, bDebug, bContext)
                aErrors.update(errs)
                # token parser
                oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart)
                oSentence.parse(dPriority, sCountry, dOpt, bDebug, bContext)
                _, errs = oSentence.parse(dPriority, sCountry, dOpt, bDebug, bContext)
                aErrors.update(errs)
            except:
                raise
    return aErrors.values() # this is a view (iterable)


def _getSentenceBoundaries (sText):
    iStart = _zBeginOfParagraph.match(sText).end()
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712

713
714
715
716
717
718
719
672
673
674
675
676
677
678


































679
680
681
682
683
684
685
686
687







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-

+








    def __init__ (self, sSentence, sSentence0, iStart):
        self.sSentence = sSentence
        self.sSentence0 = sSentence0
        self.iStart = iStart
        self.lToken = list(_oTokenizer.genTokens(sSentence))

    def parse (self, dPriority, sCountry="${country_default}", dOptions=None, bDebug=False, bContext=False):
        dErr = {}
        dPriority = {}  # Key = position; value = priority
        dOpt = _dOptions  if not dOptions  else dOptions
        lPointer = []
        bIdRule = option('idrule')
        for dToken in self.lToken:
            # check arcs for each existing pointer
            lNewPointer = []
            for i, dPointer in enumerate(lPointer):
                bValid = False
                bFirst = True
                for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]):
                    if bFirst:
                        dPointer["nOffset"] = dToken["i"]
                        dPointer["dNode"] = dNode
                    else:
                        lNewPointer.append({"nOffset": dPointer["nOffset"], "dNode": dNode})
                    bFirst = False
                    bValid = True
                if not bValid:
                    del lPointer[i]
            lPointer.extend(lNewPointer)
            # check arcs of first nodes
            for dNode in self._getNextMatchingNodes(dToken, dGraph[0]):
                lPointer.append({"nOffset": 0, "dNode": dNode})
            # check if there is rules to check for each pointer
            for dPointer in lPointer:
                if "<rules>" in dPointer["dNode"]:
                    dErr = self._executeActions(dPointer["dNode"]["<rules>"], dPointer["nOffset"], dPriority, dOpt, bIdRule, bContext)
        if dErr:
            print(dErr)
        return dErr

    def _getNextMatchingNodes (self, dToken, dNode):
        "generator: return nodes where <dToken> “values” match <dNode> arcs"
        # token value
        if dToken["sValue"] in dNode:
            #print("value found: ", dToken["sValue"])
            yield dGraph[dNode[dToken["sValue"]]]
        # token lemmas
        if "<lemmas>" in dNode:
            for sLemma in _oSpellChecker.getLemma(dToken["sValue"]):
733
734
735
736
737
738
739
740





































741

742

743
744
745
746
747
748
749
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756








+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

+

+







        # regex morph arcs
        if "<re_morph>" in dNode:
            for sRegex in dNode["<re_morph>"]:
                for sMorph in _oSpellChecker.getMorph(dToken["sValue"]):
                    if re.search(sRegex, sMorph):
                        #print("morph regex matching: ", sRegex)
                        yield dGraph[dNode["<re_morph>"][sRegex]]

    def parse (self, dPriority, sCountry="${country_default}", dOptions=None, bDebug=False, bContext=False):
        dErr = {}
        dPriority = {}  # Key = position; value = priority
        dOpt = _dOptions  if not dOptions  else dOptions
        lPointer = []
        bIdRule = option('idrule')
        bChange = False
        for dToken in self.lToken:
            # check arcs for each existing pointer
            lNewPointer = []
            for i, dPointer in enumerate(lPointer):
                bValid = False
                bFirst = True
                for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]):
                    if bFirst:
                        dPointer["dNode"] = dNode
                    else:
                        lNewPointer.append({"nOffset": dPointer["nOffset"], "dNode": dNode})
                    bFirst = False
                    bValid = True
                if not bValid:
                    del lPointer[i]
            lPointer.extend(lNewPointer)
            # check arcs of first nodes
            for dNode in self._getNextMatchingNodes(dToken, dGraph[0]):
                lPointer.append({"nOffset": 0, "dNode": dNode})
            # check if there is rules to check for each pointer
            for dPointer in lPointer:
                if "<rules>" in dPointer["dNode"]:
                    bHasChanged, errs = self._executeActions(dPointer["dNode"]["<rules>"], dPointer["nOffset"], dPriority, dOpt, bIdRule, bContext)
                    dErr.update(errs)
                    if bHasChanged:
                        bChange = True
        if dErr:
            print(dErr)
        return (bChange, dErr)

    def _executeActions (self, dNode, nTokenOffset, dPriority, dOpt, bIdRule, bContext):
        print(locals())
        dErrs = {}
        bChange = False
        for sLineId, nextNodeKey in dNode.items():
            for sRuleId in dGraph[nextNodeKey]:
                print(sRuleId)
                bCondMemo = None
                sFuncCond, cActionType, sWhat, *eAct = dRule[sRuleId]
                # action in lActions: [ condition, action type, replacement/suggestion/action[, iTokenStart, iTokenEnd[, nPriority, message, URL]] ]
                try:
775
776
777
778
779
780
781
782

783
784
785
786
787
788
789
782
783
784
785
786
787
788

789
790
791
792
793
794
795
796







-
+







                            pass
                        else:
                            print("# error: unknown action at " + sLineId)
                    elif cActionType == ">":
                        break
                except Exception as e:
                    raise Exception(str(e), sLineId)
        return dErrs
        return bChange, dErrs

    def _rewrite (self, sWhat, nErrorStart, nErrorEnd):
        "text processor: rewrite tokens between <nErrorStart> and <nErrorEnd> position"
        lTokenValue = sWhat.split("|")
        if len(lTokenValue) != (nErrorEnd - nErrorStart + 1):
            print("Error. Text processor: number of replacements != number of tokens.")
            return