Grammalecte  Check-in [5a15898d11]

Overview
Comment:[core] rename parameter tags -> aTags
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: 5a15898d113cdcbf3ade04bd4ea0e563ad34b463fb19dfd68272ba386229449e
User & Date: olr on 2018-09-12 10:38:22
Other Links: branch diff | manifest | tags
Context
2018-09-12
17:21
[core][js] gc engine: fix several issues check-in: 900ff7dc44 user: olr tags: core, rg
10:38
[core] rename parameter tags -> aTags check-in: 5a15898d11 user: olr tags: core, rg
09:49
[core][js] small code cleaning check-in: 05090f91ec user: olr tags: core, rg
Changes

Modified gc_core/js/lang_core/gc_engine.js from [225ec72077] to [eabdda0297].

183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
        s += "sentence: " + this.sSentence0 + "\n";
        s += "now:      " + this.sSentence  + "\n";
        for (let dToken of this.lToken) {
            s += `#${dToken["i"]}\t${dToken["nStart"]}:${dToken["nEnd"]}\t${dToken["sValue"]}\t${dToken["sType"]}`;
            if (dToken.hasOwnProperty("lMorph")) {
                s += "\t" + dToken["lMorph"].toString();
            }
            if (dToken.hasOwnProperty("tags")) {
                s += "\t" + dToken["tags"].toString();
            }
            s += "\n";
        }
        return s;
    }

    parse (sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false) {







|
|







183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
        s += "sentence: " + this.sSentence0 + "\n";
        s += "now:      " + this.sSentence  + "\n";
        for (let dToken of this.lToken) {
            s += `#${dToken["i"]}\t${dToken["nStart"]}:${dToken["nEnd"]}\t${dToken["sValue"]}\t${dToken["sType"]}`;
            if (dToken.hasOwnProperty("lMorph")) {
                s += "\t" + dToken["lMorph"].toString();
            }
            if (dToken.hasOwnProperty("aTags")) {
                s += "\t" + dToken["aTags"].toString();
            }
            s += "\n";
        }
        return s;
    }

    parse (sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false) {
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
        // update <sSentence> and retokenize
        this.sSentence = sSentence;
        let lNewToken = Array.from(_oTokenizer.genTokens(sSentence, true));
        for (let dToken of lNewToken) {
            if (this.dTokenPos.gl_get(dToken["nStart"], {}).hasOwnProperty("lMorph")) {
                dToken["lMorph"] = this.dTokenPos.get(dToken["nStart"])["lMorph"];
            }
            if (this.dTokenPos.gl_get(dToken["nStart"], {}).hasOwnProperty("tags")) {
                dToken["tags"] = this.dTokenPos.get(dToken["nStart"])["tags"];
            }
        }
        this.lToken = lNewToken;
        this.dTokenPos.clear();
        for (let dToken of this.lToken) {
            if (dToken["sType"] != "INFO") {
                this.dTokenPos.set(dToken["nStart"], dToken);







|
|







336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
        // update <sSentence> and retokenize
        this.sSentence = sSentence;
        let lNewToken = Array.from(_oTokenizer.genTokens(sSentence, true));
        for (let dToken of lNewToken) {
            if (this.dTokenPos.gl_get(dToken["nStart"], {}).hasOwnProperty("lMorph")) {
                dToken["lMorph"] = this.dTokenPos.get(dToken["nStart"])["lMorph"];
            }
            if (this.dTokenPos.gl_get(dToken["nStart"], {}).hasOwnProperty("aTags")) {
                dToken["aTags"] = this.dTokenPos.get(dToken["nStart"])["aTags"];
            }
        }
        this.lToken = lNewToken;
        this.dTokenPos.clear();
        for (let dToken of this.lToken) {
            if (dToken["sType"] != "INFO") {
                this.dTokenPos.set(dToken["nStart"], dToken);
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
                                }
                            }
                        }
                    }
                }
            }
            // token tags
            if (dToken.hasOwnProperty("tags") && dNode.hasOwnProperty("<tags>")) {
                for (let sTag in dToken["tags"]) {
                    if (dNode["<tags>"].hasOwnProperty(sTag)) {
                        if (bDebug) {
                            console.log("  MATCH: /" + sTag);
                        }
                        yield { "iNode1": iNode1, "dNode": dGraph[dNode["<tags>"][sTag]] };
                        bTokenFound = true;
                    }







|
|







483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
                                }
                            }
                        }
                    }
                }
            }
            // token tags
            if (dToken.hasOwnProperty("aTags") && dNode.hasOwnProperty("<tags>")) {
                for (let sTag in dToken["aTags"]) {
                    if (dNode["<tags>"].hasOwnProperty(sTag)) {
                        if (bDebug) {
                            console.log("  MATCH: /" + sTag);
                        }
                        yield { "iNode1": iNode1, "dNode": dGraph[dNode["<tags>"][sTag]] };
                        bTokenFound = true;
                    }
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
                                }
                            }
                            else if (cActionType == "/") {
                                // Tag
                                let nTokenStart = (eAct[0] > 0) ? nTokenOffset + eAct[0] : nLastToken + eAct[0];
                                let nTokenEnd = (eAct[1] > 0) ? nTokenOffset + eAct[1] : nLastToken + eAct[1];
                                for (let i = nTokenStart; i <= nTokenEnd; i++) {
                                    if (this.lToken[i].hasOwnProperty("tags")) {
                                        this.lToken[i]["tags"].add(...sWhat.split("|"))
                                    } else {
                                        this.lToken[i]["tags"] = new Set(sWhat.split("|"));
                                    }
                                }
                                if (bDebug) {
                                    console.log(`    TAG:  ${sWhat} > [${this.lToken[nTokenStart]["sValue"]}:${this.lToken[nTokenEnd]["sValue"]}]`);
                                }
                                if (!this.dTags.has(sWhat)) {
                                    this.dTags.set(sWhat, [nTokenStart, nTokenStart]);







|
|

|







635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
                                }
                            }
                            else if (cActionType == "/") {
                                // Tag
                                let nTokenStart = (eAct[0] > 0) ? nTokenOffset + eAct[0] : nLastToken + eAct[0];
                                let nTokenEnd = (eAct[1] > 0) ? nTokenOffset + eAct[1] : nLastToken + eAct[1];
                                for (let i = nTokenStart; i <= nTokenEnd; i++) {
                                    if (this.lToken[i].hasOwnProperty("aTags")) {
                                        this.lToken[i]["aTags"].add(...sWhat.split("|"))
                                    } else {
                                        this.lToken[i]["aTags"] = new Set(sWhat.split("|"));
                                    }
                                }
                                if (bDebug) {
                                    console.log(`    TAG:  ${sWhat} > [${this.lToken[nTokenStart]["sValue"]}:${this.lToken[nTokenEnd]["sValue"]}]`);
                                }
                                if (!this.dTags.has(sWhat)) {
                                    this.dTags.set(sWhat, [nTokenStart, nTokenStart]);
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
    if (dToken["i"] < dTags.get(sTag)[1]) {
        return true;
    }
    return false;
}

function g_tag (dToken, sTag) {
    return dToken.hasOwnProperty("tags") && dToken["tags"].has(sTag);
}

function g_space_between_tokens (dToken1, dToken2, nMin, nMax=null) {
    let nSpace = dToken2["nStart"] - dToken1["nEnd"]
    if (nSpace < nMin) {
        return false;
    }







|







1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
    if (dToken["i"] < dTags.get(sTag)[1]) {
        return true;
    }
    return false;
}

function g_tag (dToken, sTag) {
    return dToken.hasOwnProperty("aTags") && dToken["aTags"].has(sTag);
}

function g_space_between_tokens (dToken1, dToken2, nMin, nMax=null) {
    let nSpace = dToken2["nStart"] - dToken1["nEnd"]
    if (nSpace < nMin) {
        return false;
    }

Modified gc_core/py/lang_core/gc_engine.py from [a6af5aa7ed] to [b63b69316e].

229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
        s = "===== TEXT =====\n"
        s += "sentence: " + self.sSentence0 + "\n"
        s += "now:      " + self.sSentence  + "\n"
        for dToken in self.lToken:
            s += '#{i}\t{nStart}:{nEnd}\t{sValue}\t{sType}'.format(**dToken)
            if "lMorph" in dToken:
                s += "\t" + str(dToken["lMorph"])
            if "tags" in dToken:
                s += "\t" + str(dToken["tags"])
            s += "\n"
        #for nPos, dToken in self.dTokenPos.items():
        #    s += "{}\t{}\n".format(nPos, dToken)
        return s

    def parse (self, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
        "analyses the paragraph sText and returns list of errors"







|
|







229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
        s = "===== TEXT =====\n"
        s += "sentence: " + self.sSentence0 + "\n"
        s += "now:      " + self.sSentence  + "\n"
        for dToken in self.lToken:
            s += '#{i}\t{nStart}:{nEnd}\t{sValue}\t{sType}'.format(**dToken)
            if "lMorph" in dToken:
                s += "\t" + str(dToken["lMorph"])
            if "aTags" in dToken:
                s += "\t" + str(dToken["aTags"])
            s += "\n"
        #for nPos, dToken in self.dTokenPos.items():
        #    s += "{}\t{}\n".format(nPos, dToken)
        return s

    def parse (self, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
        "analyses the paragraph sText and returns list of errors"
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
    def update (self, sSentence, bDebug=False):
        "update <sSentence> and retokenize"
        self.sSentence = sSentence
        lNewToken = list(_oTokenizer.genTokens(sSentence, True))
        for dToken in lNewToken:
            if "lMorph" in self.dTokenPos.get(dToken["nStart"], {}):
                dToken["lMorph"] = self.dTokenPos[dToken["nStart"]]["lMorph"]
            if "tags" in self.dTokenPos.get(dToken["nStart"], {}):
                dToken["tags"] = self.dTokenPos[dToken["nStart"]]["tags"]
        self.lToken = lNewToken
        self.dTokenPos = { dToken["nStart"]: dToken  for dToken in self.lToken  if dToken["sType"] != "INFO" }
        if bDebug:
            echo("UPDATE:")
            echo(self)

    def _getNextPointers (self, dToken, dGraph, dPointer, bDebug=False):







|
|







339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
    def update (self, sSentence, bDebug=False):
        "update <sSentence> and retokenize"
        self.sSentence = sSentence
        lNewToken = list(_oTokenizer.genTokens(sSentence, True))
        for dToken in lNewToken:
            if "lMorph" in self.dTokenPos.get(dToken["nStart"], {}):
                dToken["lMorph"] = self.dTokenPos[dToken["nStart"]]["lMorph"]
            if "aTags" in self.dTokenPos.get(dToken["nStart"], {}):
                dToken["aTags"] = self.dTokenPos[dToken["nStart"]]["aTags"]
        self.lToken = lNewToken
        self.dTokenPos = { dToken["nStart"]: dToken  for dToken in self.lToken  if dToken["sType"] != "INFO" }
        if bDebug:
            echo("UPDATE:")
            echo(self)

    def _getNextPointers (self, dToken, dGraph, dPointer, bDebug=False):
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
                                continue
                            if not sPattern or any(re.search(sPattern, sMorph)  for sMorph in lMorph):
                                if bDebug:
                                    echo("  MATCH: @" + sRegex)
                                yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_morph>"][sRegex]] }
                                bTokenFound = True
        # token tags
        if "tags" in dToken and "<tags>" in dNode:
            for sTag in dToken["tags"]:
                if sTag in dNode["<tags>"]:
                    if bDebug:
                        echo("  MATCH: /" + sTag)
                    yield { "iNode1": iNode1, "dNode": dGraph[dNode["<tags>"][sTag]] }
                    bTokenFound = True
        # meta arc (for token type)
        if "<meta>" in dNode:







|
|







440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
                                continue
                            if not sPattern or any(re.search(sPattern, sMorph)  for sMorph in lMorph):
                                if bDebug:
                                    echo("  MATCH: @" + sRegex)
                                yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_morph>"][sRegex]] }
                                bTokenFound = True
        # token tags
        if "aTags" in dToken and "<tags>" in dNode:
            for sTag in dToken["aTags"]:
                if sTag in dNode["<tags>"]:
                    if bDebug:
                        echo("  MATCH: /" + sTag)
                    yield { "iNode1": iNode1, "dNode": dGraph[dNode["<tags>"][sTag]] }
                    bTokenFound = True
        # meta arc (for token type)
        if "<meta>" in dNode:
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
                                    echo("    COND_OK")
                                pass
                            elif cActionType == "/":
                                # Tag
                                nTokenStart = nTokenOffset + eAct[0]  if eAct[0] > 0  else nLastToken + eAct[0]
                                nTokenEnd = nTokenOffset + eAct[1]  if eAct[1] > 0  else nLastToken + eAct[1]
                                for i in range(nTokenStart, nTokenEnd+1):
                                    if "tags" in self.lToken[i]:
                                        self.lToken[i]["tags"].update(sWhat.split("|"))
                                    else:
                                        self.lToken[i]["tags"] = set(sWhat.split("|"))
                                if bDebug:
                                    echo("    TAG: {} >  [{}:{}]".format(sWhat, self.lToken[nTokenStart]["sValue"], self.lToken[nTokenEnd]["sValue"]))
                                if sWhat not in self.dTags:
                                    self.dTags[sWhat] = [nTokenStart, nTokenStart]
                                else:
                                    self.dTags[sWhat][0] = min(nTokenStart, self.dTags[sWhat][0])
                                    self.dTags[sWhat][1] = max(nTokenEnd, self.dTags[sWhat][1])







|
|

|







553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
                                    echo("    COND_OK")
                                pass
                            elif cActionType == "/":
                                # Tag
                                nTokenStart = nTokenOffset + eAct[0]  if eAct[0] > 0  else nLastToken + eAct[0]
                                nTokenEnd = nTokenOffset + eAct[1]  if eAct[1] > 0  else nLastToken + eAct[1]
                                for i in range(nTokenStart, nTokenEnd+1):
                                    if "aTags" in self.lToken[i]:
                                        self.lToken[i]["aTags"].update(sWhat.split("|"))
                                    else:
                                        self.lToken[i]["aTags"] = set(sWhat.split("|"))
                                if bDebug:
                                    echo("    TAG: {} >  [{}:{}]".format(sWhat, self.lToken[nTokenStart]["sValue"], self.lToken[nTokenEnd]["sValue"]))
                                if sWhat not in self.dTags:
                                    self.dTags[sWhat] = [nTokenStart, nTokenStart]
                                else:
                                    self.dTags[sWhat][0] = min(nTokenStart, self.dTags[sWhat][0])
                                    self.dTags[sWhat][1] = max(nTokenEnd, self.dTags[sWhat][1])
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
        return False
    if dToken["i"] < dTags[sTag][1]:
        return True
    return False


def g_tag (dToken, sTag):
    return "tags" in dToken and sTag in dToken["tags"]


def g_space_between_tokens (dToken1, dToken2, nMin, nMax=None):
    nSpace = dToken2["nStart"] - dToken1["nEnd"]
    if nSpace < nMin:
        return False
    if nMax is not None and nSpace > nMax:







|







1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
        return False
    if dToken["i"] < dTags[sTag][1]:
        return True
    return False


def g_tag (dToken, sTag):
    return "aTags" in dToken and sTag in dToken["aTags"]


def g_space_between_tokens (dToken1, dToken2, nMin, nMax=None):
    nSpace = dToken2["nStart"] - dToken1["nEnd"]
    if nSpace < nMin:
        return False
    if nMax is not None and nSpace > nMax: