Grammalecte  Check-in [c8734856cf]

Overview
Comment:[core] gc engine: fix bug for multi-token detection
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | mtok
Files: files | file ages | folders
SHA3-256: c8734856cf5ae6bf119ab6cba37c84f1698e68c913b1c0a9776769f60639ba49
User & Date: olr on 2021-03-16 18:12:35
Other Links: branch diff | manifest | tags
Context
2021-03-16
18:13
[fr] ajustements: locutions adverbiales check-in: 66c9bda313 user: olr tags: fr, mtok
18:12
[core] gc engine: fix bug for multi-token detection check-in: c8734856cf user: olr tags: core, mtok
2021-03-13
11:34
[fr] ajustements check-in: b62d7a4807 user: olr tags: fr, mtok
Changes

Modified gc_core/js/lang_core/gc_engine.js from [95c9d7785e] to [28d62d5f91].

177
178
179
180
181
182
183



184
185
186
187
188
189
190
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193







+
+
+







            s += `#${dToken["i"]}\t${dToken["nStart"]}:${dToken["nEnd"]}\t${dToken["sValue"]}\t${dToken["sType"]}`;
            if (dToken.hasOwnProperty("lMorph")) {
                s += "\t" + dToken["lMorph"].toString();
            }
            if (dToken.hasOwnProperty("aTags")) {
                s += "\t" + dToken["aTags"].toString();
            }
            if (dToken.hasOwnProperty("nMultiStartTo")) {
                s += "\t>>" + dToken["nMultiStartTo"].toString();
            }
            s += "\n";
        }
        return s;
    }

    parse (sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false, bFullInfo=false) {
        // analyses <sText> and returns an iterable of errors or (with option <bFullInfo>) a list of sentences with tokens and errors
633
634
635
636
637
638
639
640

641
642
643
644
645
646
647
636
637
638
639
640
641
642

643
644
645
646
647
648
649
650







-
+







                }
                // check if there is rules to check for each pointer
                for (let oPointer of lPointers) {
                    if (oPointer["nMultiEnd"] != -1) {
                        if (oToken["i"] < oPointer["nMultiEnd"]) {
                            continue;
                        }
                        if (oToken["i"] == oPointer["nMultiEnd"]) {
                        if (oToken["i"] >= oPointer["nMultiEnd"]) {
                            oPointer["nMultiEnd"] = -1;
                        }
                    }
                    if (oGraph[oPointer["iNode"]].hasOwnProperty("<rules>")) {
                        let bChange = this._executeActions(oGraph, oGraph[oPointer["iNode"]]["<rules>"], oPointer["iToken1"]-1, iToken, dOptions, sCountry, bShowRuleId, bDebug, bContext);
                        if (bChange) {
                            bTagAndRewrite = true;
766
767
768
769
770
771
772
773

774
775
776
777
778
779
780
781
782
783
784


785
786



787
788
789
790
791
792
793
769
770
771
772
773
774
775

776
777
778
779
780
781
782
783
784
785


786
787
788
789
790
791
792
793
794
795
796
797
798
799







-
+









-
-
+
+


+
+
+







                                        let nErrorStart = this.nOffsetWithinParagraph + this.lTokens[i]["nStart"];
                                        if (this.dError.has(nErrorStart)) {
                                            this.dError.delete(nErrorStart);
                                        }
                                    }
                                }
                            }
                            else if (cActionType == "#") {
                            else if (cActionType == "&") {
                                // multi-tokens
                                let nTokenStart = (eAct[0] > 0) ? nTokenOffset + eAct[0] : nLastToken + eAct[0];
                                let nTokenEnd = (eAct[1] > 0) ? nTokenOffset + eAct[1] : nLastToken + eAct[1];
                                let oMultiToken = {
                                    "nTokenStart": nTokenStart,
                                    "nTokenEnd": nTokenEnd,
                                    "lTokens": this.lTokens.slice(nTokenStart, nTokenEnd+1),
                                    "lMorph": (sAction) ? sAction.split("|") : [":HM"]
                                }
                                this.lTokens[nTokenStart]["nMultiStartTo"] = nTokenEnd;
                                this.lTokens[nTokenEnd]["nMultiEndFrom"] = nTokenStart;
                                this.lTokens[nTokenStart]["nMultiStartTo"] = this.lTokens[nTokenEnd]["i"];
                                this.lTokens[nTokenEnd]["nMultiEndFrom"] = this.lTokens[nTokenStart]["i"];
                                this.lTokens[nTokenStart]["oMultiToken"] = oMultiToken;
                                this.lTokens[nTokenEnd]["oMultiToken"] = oMultiToken;
                                if (bDebug) {
                                    console.log(`"    MULTI-TOKEN: ${sAction}  [${this.lTokens[nTokenStart]["sValue"]}:${this.lTokens[nTokenEnd]["sValue"]}]`);
                                }
                            }
                            else {
                                console.log("# error: unknown action at " + sLineId);
                            }
                        }
                        else if (cActionType == ">") {
                            if (bDebug) {

Modified gc_core/py/lang_core/gc_engine.py from [caa8d936e0] to [db2ab5bd90].

250
251
252
253
254
255
256


257
258
259
260
261
262
263
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265







+
+







        s += "now:      " + self.sSentence  + "\n"
        for dToken in self.lTokens:
            s += '#{i}\t{nStart}:{nEnd}\t{sValue}\t{sType}'.format(**dToken)
            if "lMorph" in dToken:
                s += "\t" + str(dToken["lMorph"])
            if "aTags" in dToken:
                s += "\t" + str(dToken["aTags"])
            if "nMultiStartTo" in dToken:
                s += "\t>>" + str(dToken["nMultiStartTo"])
            s += "\n"
        #for nPos, dToken in self.dTokenPos.items():
        #    s += "{}\t{}\n".format(nPos, dToken)
        return s

    def parse (self, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False, bFullInfo=False):
        "analyses <sText> and returns an iterable of errors or (with option <bFullInfo>) paragraphs errors and sentences with tokens and errors"
591
592
593
594
595
596
597
598

599
600
601
602
603
604
605
593
594
595
596
597
598
599

600
601
602
603
604
605
606
607







-
+







                nMultiEnd = -1  if cNodeType != "&"  else dToken["nMultiStartTo"]
                lPointers.append({ "iToken1": iToken, "iNode": iNode, "nMultiEnd": nMultiEnd })
            # check if there is rules to check for each pointer
            for dPointer in lPointers:
                if dPointer["nMultiEnd"] != -1:
                    if dToken["i"] < dPointer["nMultiEnd"]:
                        continue
                    if dToken["i"] == dPointer["nMultiEnd"]:
                    if dToken["i"] >= dPointer["nMultiEnd"]:
                        dPointer["nMultiEnd"] = -1
                if "<rules>" in dGraph[dPointer["iNode"]]:
                    bChange = self._executeActions(dGraph, dGraph[dPointer["iNode"]]["<rules>"], dPointer["iToken1"]-1, iToken, dOptions, sCountry, bShowRuleId, bDebug, bContext)
                    if bChange:
                        bTagAndRewrite = True
        if bTagAndRewrite:
            self.rewriteFromTags(bDebug)
700
701
702
703
704
705
706
707
708


709
710
711
712

713
714
715
716
717
718
719
702
703
704
705
706
707
708


709
710
711
712
713

714
715
716
717
718
719
720
721







-
-
+
+



-
+







                                nTokenEnd = nTokenOffset + eAct[1]  if eAct[1] > 0  else nLastToken + eAct[1]
                                dMultiToken = {
                                    "nTokenStart": nTokenStart,
                                    "nTokenEnd": nTokenEnd,
                                    "lTokens": self.lTokens[nTokenStart:nTokenEnd+1],
                                    "lMorph": sAction.split("|")  if sAction else  [":HM"]
                                }
                                self.lTokens[nTokenStart]["nMultiStartTo"] = nTokenEnd
                                self.lTokens[nTokenEnd]["nMultiEndFrom"] = nTokenStart
                                self.lTokens[nTokenStart]["nMultiStartTo"] = self.lTokens[nTokenEnd]["i"]
                                self.lTokens[nTokenEnd]["nMultiEndFrom"] = self.lTokens[nTokenStart]["i"]
                                self.lTokens[nTokenStart]["dMultiToken"] = dMultiToken
                                self.lTokens[nTokenEnd]["dMultiToken"] = dMultiToken
                                if bDebug:
                                    echo("    MULTI-TOKEN: ({})  [{}:{}]".format(sAction, self.lTokens[nTokenOffset+1]["sValue"], self.lTokens[nLastToken]["sValue"]))
                                    echo("    MULTI-TOKEN: ({})  [{}:{}]".format(sAction, self.lTokens[nTokenStart]["sValue"], self.lTokens[nTokenEnd]["sValue"]))
                                #print(dMultiToken)
                            else:
                                echo("# error: unknown action at " + sLineId)
                        elif cActionType == ">":
                            if bDebug:
                                echo("    COND_BREAK")
                            break