Grammalecte  Check-in [fea041c0de]

Overview
Comment:[build][core] regex for token value and token morphologies
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | build | rg
Files: files | file ages | folders
SHA3-256: fea041c0de3be9452eb414a624da8f5e87ee818429b12bf1d13785b4c351cac2
User & Date: olr on 2018-05-24 09:25:29
Other Links: branch diff | manifest | tags
Context
2018-05-24
11:13
[build][bug] darg: wrong statement check-in: c4ce706a6b user: olr tags: build, rg
09:25
[build][core] regex for token value and token morphologies check-in: fea041c0de user: olr tags: core, build, rg
2018-05-23
12:19
[build][core] rules graph: merge lemmas in key <lemmas> check-in: a5edfddfa8 user: olr tags: core, build, rg
Changes

Modified darg.py from [7e9f6a7653] to [e83ec424d3].

153
154
155
156
157
158
159
160

161
162
163


164
165
166
167
168
169
170
171
172
173


174
175
176
177
178
179
180
        # Used as a key in a python dictionary.
        # Nodes are equivalent if they have identical arcs, and each identical arc leads to identical states.
        return self.__str__() == other.__str__()        

    def getNodeAsDict (self):
        "returns the node as a dictionary structure"
        dNode = {}
        dRegex = {}

        dRules = {}
        dLemmas = {}
        for sArc, oNode in self.dArcs.items():


            if sArc.startswith("~") and len(sArc) > 1:
                dRegex[sArc[1:]] = oNode.__hash__()
            elif sArc.startswith(">") and len(sArc) > 1:
                dLemmas[sArc[1:]] = oNode.__hash__()
            elif sArc.startswith("##"):
                dRules[sArc[1:]] = oNode.__hash__()
            else:
                dNode[sArc] = oNode.__hash__()
        if dRegex:
            dNode["<regex>"] = dRegex


        if dLemmas:
            dNode["<lemmas>"] = dLemmas
        if dRules:
            dNode["<rules>"] = dRules
        #if self.bFinal:
        #    dNode["<final>"] = 1
        return dNode







|
>



>
>
|
|






|
|
>
>







153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
        # Used as a key in a python dictionary.
        # Nodes are equivalent if they have identical arcs, and each identical arc leads to identical states.
        return self.__str__() == other.__str__()        

    def getNodeAsDict (self):
        "returns the node as a dictionary structure"
        dNode = {}
        dReValue = {}
        dReMorph = {}
        dRules = {}
        dLemmas = {}
        for sArc, oNode in self.dArcs.items():
            if sArc.startswith("~~") and len(sArc) > 2:
                dReMorph[sArc[1:]] = oNode.__hash__()
            elif sArc.startswith("~") and len(sArc) > 1:
                dReValue[sArc[1:]] = oNode.__hash__()
            elif sArc.startswith(">") and len(sArc) > 1:
                dLemmas[sArc[1:]] = oNode.__hash__()
            elif sArc.startswith("##"):
                dRules[sArc[1:]] = oNode.__hash__()
            else:
                dNode[sArc] = oNode.__hash__()
        if dReValue:
            dNode["<re_value>"] = dReValue
        il dReMorph:
            dNode["<re_morph>"] = dReMorph
        if dLemmas:
            dNode["<lemmas>"] = dLemmas
        if dRules:
            dNode["<rules>"] = dRules
        #if self.bFinal:
        #    dNode["<final>"] = 1
        return dNode

Modified gc_core/py/lang_core/gc_engine.py from [de8dd5167f] to [db19c73d82].

612
613
614
615
616
617
618
619
620
621





622
623
624
625
626
627
628
629
630
631
        if "<lemmas>" in dNode:
            for sLemma in _oSpellChecker.getLemma(dToken["sValue"]):
                if sLemma in dNode["<lemmas>"]:
                    yield dGraph[dNode["<lemmas>"][sLemma]]
        # universal arc
        if "*" in dNode:
            yield dGraph[dNode["*"]]
        # regex arcs
        if "<regex>" in dNode:
            for sRegex in dNode["~"]:





                for sMorph in _oSpellChecker.getMorph(dToken["sValue"]):
                    if re.search(sRegex, sMorph):
                        yield dGraph[dNode["regex"][sRegex]]

    def _executeActions (self, dNode, nOffset):
        for sLineId, nextNodeKey in dNode.items():
            for sArc in dGraph[nextNodeKey]:
                print(sArc)
                bCondMemo = None
                sFuncCond, cActionType, sWhat, *eAct = dRule[sArc]







|
|
|
>
>
>
>
>


|







612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
        if "<lemmas>" in dNode:
            for sLemma in _oSpellChecker.getLemma(dToken["sValue"]):
                if sLemma in dNode["<lemmas>"]:
                    yield dGraph[dNode["<lemmas>"][sLemma]]
        # universal arc
        if "*" in dNode:
            yield dGraph[dNode["*"]]
        # regex value arcs
        if "<re_value>" in dNode:
            for sRegex in dNode["<re_value>"]:
                if re.search(sRegex, dToken["sValue"]):
                    yield dGraph[dNode["<re_value>"][sRegex]]
        # regex morph arcs
        if "<re_morph>" in dNode:
            for sRegex in dNode["<re_morph>"]:
                for sMorph in _oSpellChecker.getMorph(dToken["sValue"]):
                    if re.search(sRegex, sMorph):
                        yield dGraph[dNode["<re_morph>"][sRegex]]

    def _executeActions (self, dNode, nOffset):
        for sLineId, nextNodeKey in dNode.items():
            for sArc in dGraph[nextNodeKey]:
                print(sArc)
                bCondMemo = None
                sFuncCond, cActionType, sWhat, *eAct = dRule[sArc]