Grammalecte  Check-in [516c08196b]

Overview
Comment:[core][js] gc engine: fix syntax errors
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: 516c08196bca4753563f37a9735f3708bcd024ecd8d2565a6d66b5ee6c82dba0
User & Date: olr on 2018-09-10 13:00:40
Other Links: branch diff | manifest | tags
Context
2018-09-10
14:49
[core][js] gc engine: fix syntax errors check-in: 36ac4eb1e0 user: olr tags: core, rg
13:00
[core][js] gc engine: fix syntax errors check-in: 516c08196b user: olr tags: core, rg
11:00
[build][js] fix graph rules builder check-in: 2ae38c10dc user: olr tags: build, rg
Changes

Modified gc_core/js/lang_core/gc_engine.js from [bd3e580039] to [47e7686c16].

351
352
353
354
355
356
357
358

359
360
361
362
363
364
365
351
352
353
354
355
356
357

358
359
360
361
362
363
364
365







-
+







        }
        if (bDebug) {
            console.log("UPDATE:");
            console.log(this.asString());
        }
    }

    _getNextPointers (self, dToken, dGraph, dPointer, bDebug=false) {
    * _getNextPointers (self, dToken, dGraph, dPointer, bDebug=false) {
        // generator: return nodes where <dToken> “values” match <dNode> arcs
        let dNode = dPointer["dNode"];
        let iNode1 = dPointer["iNode1"];
        let bTokenFound = false;
        // token value
        if (dNode.hasOwnProperty(dToken["sValue"])) {
            if (bDebug) {
398
399
400
401
402
403
404
405

406
407
408
409
410

411
412
413
414
415
416
417
398
399
400
401
402
403
404

405
406
407
408
409
410
411
412
413
414
415
416
417
418







-
+





+







        }
        // regex value arcs
        if (dToken["sType"] != "INFO"  &&  dToken["sType"] != "PUNC"  &&  dToken["sType"] != "SIGN") {
            if (dNode.hasOwnProperty("<re_value>")) {
                for (let sRegex in dNode["<re_value>"]) {
                    if (!sRegex.includes("¬")) {
                        // no anti-pattern
                        if (dToken["sValue"].search(sRegex) !== -1):
                        if (dToken["sValue"].search(sRegex) !== -1) {
                            if (bDebug) {
                                console.log("  MATCH: ~" + sRegex);
                            }
                            yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_value>"][sRegex]] };
                            bTokenFound = true;
                        }
                    } else {
                        // there is an anti-pattern
                        let [sPattern, sNegPattern] = sRegex.split("¬", 1);
                        if (sNegPattern && dToken["sValue"].search(sNegPattern) !== -1) {
                            continue;
                        }
                        if (!sPattern || dToken["sValue"].search(sPattern) !== -1) {
482
483
484
485
486
487
488
489

490
491
492
493
494
495
496
497
498
499
500
501
502
503

504
505
506
507
508
509
510
483
484
485
486
487
488
489

490
491
492
493
494
495
496
497
498
499
500
501
502
503

504
505
506
507
508
509
510
511







-
+













-
+







                        }
                    }
                }
            }
        }
        // token tags
        if (dToken.hasOwnProperty("tags") && dNode.hasOwnProperty("<tags>")) {
            for (dToken["tags"].has(sTag)) {
            for (let sTag in dToken["tags"]) {
                if (dNode["<tags>"].hasOwnProperty(sTag)) {
                    if (bDebug) {
                        console.log("  MATCH: /" + sTag);
                    }
                    yield { "iNode1": iNode1, "dNode": dGraph[dNode["<tags>"][sTag]] };
                    bTokenFound = true;
                }
            }
        }
        // meta arc (for token type)
        if (dNode.hasOwnProperty("<meta>")) {
            for (let sMeta in dNode["<meta>"]) {
                // no regex here, we just search if <dNode["sType"]> exists within <sMeta>
                if (sMeta == "*" or dToken["sType"] == sMeta) {
                if (sMeta == "*" || dToken["sType"] == sMeta) {
                    if (bDebug) {
                        console.log("  MATCH: *" + sMeta);
                    }
                    yield { "iNode1": iNode1, "dNode": dGraph[dNode["<meta>"][sMeta]] };
                    bTokenFound = true;
                }
                else if (sMeta.includes("¬")) {
519
520
521
522
523
524
525
526

527
528
529
530
531
532
533
534
535
536

537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552

553
554
555
556
557
558

559
560
561
562
563
564
565
566
567
568
569
570
571
572
573

574
575
576
577
578
579
580
520
521
522
523
524
525
526

527
528
529
530
531
532
533
534
535
536

537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552

553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574

575
576
577
578
579
580
581
582







-
+









-
+















-
+






+














-
+







            }
        }
        if (!bTokenFound  &&  dPointer.hasOwnProperty("bKeep")) {
            yield dPointer;
        }
        // JUMP
        // Warning! Recurssion!
        if (dNode.has("<>")) {
        if (dNode.hasOwnPropertys("<>")) {
            let dPointer2 = { "iNode1": iNode1, "dNode": dGraph[dNode["<>"]], "bKeep": True };
            yield* this._getNextPointers(dToken, dGraph, dPointer2, bDebug);
        }
    }

    parseGraph (dGraph, sCountry="${country_default}", dOptions=null, bShowRuleId=false, bDebug=false, bContext=false) {
        // parse graph with tokens from the text and execute actions encountered
        let lPointer = [];
        let bTagAndRewrite = false;
        for let [iToken, dToken] in this.lToken.entries():
        for (let [iToken, dToken] in this.lToken.entries()) {
            if (bDebug) {
                console.log("TOKEN: " + dToken["sValue"]);
            }
            // check arcs for each existing pointer
            let lNextPointer = [];
            for (let dPointer of lPointer) {
                lNextPointer.push(...this._getNextPointers(dToken, dGraph, dPointer, bDebug));
            }
            lPointer = lNextPointer;
            // check arcs of first nodes
            lPointer.push(...this._getNextPointers(dToken, dGraph, { "iNode1": iToken, "dNode": dGraph[0] }, bDebug));
            // check if there is rules to check for each pointer
            for (let dPointer of lPointer) {
                //if bDebug:
                //    console.log("+", dPointer);
                if ("<rules>" in dPointer["dNode"]) {
                if (dPointer["dNode"].hasOwnProperty("<rules>")) {
                    let bChange = this._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iNode1"]-1, iToken, dOpt, sCountry, bShowRuleId, bDebug, bContext);
                    if (bChange) {
                        bTagAndRewrite = true;
                    }
                }
            }
        }
        if (bTagAndRewrite) {
            this.rewriteFromTags(bDebug);
        }
        if (bDebug) {
            console.log(this);
        }
        return this.sSentence;
    }

    _executeActions (dGraph, dNode, nTokenOffset, nLastToken, dOptions, sCountry, bShowRuleId, bDebug, bContext) {
        // execute actions found in the DARG
        let bChange = false;
        for (let [sLineId, nextNodeKey] of Object.entries(dNode)) {
            let bCondMemo = null;
            for sRuleId in dGraph[nextNodeKey]:
            for (let sRuleId in dGraph[nextNodeKey]) {
                try {
                    if (bDebug) {
                        console.log("   >TRY: " + sRuleId);
                    }
                    let [sOption, sFuncCond, cActionType, sWhat, ...eAct] = gc_rules_graph.dRule[sRuleId];
                    // Suggestion    [ option, condition, "-", replacement/suggestion/action, iTokenStart, iTokenEnd, cStartLimit, cEndLimit, bCaseSvty, nPriority, sMessage, sURL ]
                    // TextProcessor [ option, condition, "~", replacement/suggestion/action, iTokenStart, iTokenEnd, bCaseSvty ]
640
641
642
643
644
645
646
647

648
649
650
651
652
653
654
655
656


657
658
659
660
661
662
663
664
665
666
667

668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687

688
689
690
691
692
693
694
642
643
644
645
646
647
648

649
650
651
652
653
654
655
656


657
658
659
660
661
662
663
664
665
666
667
668

669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697







-
+







-
-
+
+










-
+




















+







                                if (bDebug) {
                                    console.log("    TAG:  " + sRuleId + " " + sLineId);
                                    console.log("       " + sWhat + " > " + this.lToken[nTokenStart]["sValue"] + " : " + this.lToken[nTokenEnd]["sValue"]);
                                }
                                if (!this.dTags.has(sWhat)) {
                                    this.dTags.set(sWhat, [nTokenStart, nTokenStart]);
                                } else {
                                    this.dTags.set(sWhat, [min(nTokenStart, this.dTags.get(sWhat)[0], max(nTokenEnd, this.dTags.get(sWhat)[1]]);
                                    this.dTags.set(sWhat, [Math.min(nTokenStart, this.dTags.get(sWhat)[0]), Math.max(nTokenEnd, this.dTags.get(sWhat)[1])]);
                                }
                            }
                            else if (cActionType == "%") {
                                // immunity
                                if (bDebug) {
                                    console.log("    IMMUNITY:\n      " + _rules_graph.dRule[sRuleId]);;
                                }
                                nTokenStart = (eAct[0] > 0) ? nTokenOffset + eAct[0] : else nLastToken + eAct[0];
                                nTokenEnd = (eAct[1] > 0) ? nTokenOffset + eAct[1] : else nLastToken + eAct[1];
                                nTokenStart = (eAct[0] > 0) ? nTokenOffset + eAct[0] : nLastToken + eAct[0];
                                nTokenEnd = (eAct[1] > 0) ? nTokenOffset + eAct[1] : nLastToken + eAct[1];
                                if (nTokenEnd - nTokenStart == 0) {
                                    this.lToken[nTokenStart]["bImmune"] = True
                                    let nErrorStart = this.nOffsetWithinParagraph + this.lToken[nTokenStart]["nStart"];
                                    if (nErrorStart in this.dError) {
                                        this.dError.delete(nErrorStart);
                                    }
                                } else {
                                    for (let i = nTokenStart;  i <= nTokenEnd;  i++) {
                                        this.lToken[i]["bImmune"] = true;
                                        let nErrorStart = this.nOffsetWithinParagraph + this.lToken[i]["nStart"];
                                        if (nErrorStart in this.dError) {
                                        if (this.dError.has(nErrorStart)) {
                                            this.dError.delete(nErrorStart);
                                        }
                                    }
                                }
                            } else {
                                console.log("# error: unknown action at " + sLineId);
                            }
                        }
                        else if (cActionType == ">") {
                            if (bDebug) {
                                console.log("    COND_BREAK:  " + sRuleId + " " + sLineId);
                            }
                            break;
                        }
                    }
                }
                catch (e) {
                    console.log("Error: ", sLineId, sRuleId, this.sSentence);
                    console.error(e);
                }
            }
        }
        return bChange;
    }

    _createErrorFromRegex (sText, sText0, sSugg, nOffset, m, iGroup, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext) {
        let nStart = nOffset + m.start(iGroup);
        let nEnd = nOffset + m.end(iGroup);
739
740
741
742
743
744
745
746

747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762

763
764
765
766
767
768
769
770
771
772
773
774
775
776

777

778

779

780

781

782
783
784
785
786
787
788
742
743
744
745
746
747
748

749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764

765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780

781
782
783

784
785
786

787
788
789
790
791
792
793
794







-
+















-
+














+
-
+

+
-
+

+
-
+








    _createError (nStart, nEnd, sLineId, sRuleId, sOption, sMessage, lSugg, sURL, bContext) {
        oErr = {
            "nStart": nStart,
            "nEnd": nEnd,
            "sLineId": sLineId,
            "sRuleId": sRuleId,
            "sType": sOption  if sOption  else "notype",
            "sType": sOption || "notype",
            "sMessage": sMessage,
            "aSuggestions": lSugg,
            "URL": sURL
        }
        if (bContext) {
            oErr['sUnderlined'] = self.sText0.slice(nStart, nEnd);
            oErr['sBefore'] = self.sText0.slice(Math.max(0,nStart-80), nStart);
            oErr['sAfter'] = self.sText0.slice(nEnd, nEnd+80);
        }
        return oErr;
    }

    _expand (sText, nTokenOffset, nLastToken) {
        let m;
        while ((m = /\\(-?[0-9]+)/.exec(sText)) !== null) {
            if (m[1][0:1] == "-") {
            if (m[1].slice(0,1) == "-") {
                sText = sText.replace(m[0], self.lToken[nLastToken+parseInt(m[1],10)+1]["sValue"]);
            } else {
                sText = sText.replace(m[0], self.lToken[nTokenOffset+parseInt(m[1],10)]["sValue"]);
            }
        }
        return sText;
    }

    rewriteText (sText, sRepl, iGroup, m, bUppercase) {
        // text processor: write sRepl in sText at iGroup position"
        let ln = m.end[iGroup] - m.start[iGroup];
        let sNew = "";
        if (sRepl === "*") {
            sNew = " ".repeat(ln);
        }
        } else if (sRepl === ">" || sRepl === "_" || sRepl === "~") {
        else if (sRepl === ">" || sRepl === "_" || sRepl === "~") {
            sNew = sRepl + " ".repeat(ln-1);
        }
        } else if (sRepl === "@") {
        else if (sRepl === "@") {
            sNew = "@".repeat(ln);
        }
        } else if (sRepl.slice(0,1) === "=") {
        else if (sRepl.slice(0,1) === "=") {
            sNew = oEvalFunc[sRepl.slice(1)](sText, m);
            sNew = sNew + " ".repeat(ln-sNew.length);
            if (bUppercase && m[iGroup].slice(0,1).gl_isUpperCase()) {
                sNew = sNew.gl_toCapitalize();
            }
        } else {
            sNew = sRepl.gl_expand(m);
901
902
903
904
905
906
907
908
909
910



911
912
913
914
915
916
917
907
908
909
910
911
912
913



914
915
916
917
918
919
920
921
922
923







-
-
-
+
+
+







                if (dToken.hasOwnProperty("sNewValue")) {
                    // rewrite token and sentence
                    if (bDebug) {
                        console.log(dToken["sValue"] + " -> " + dToken["sNewValue"]);
                    }
                    dToken["sRealValue"] = dToken["sValue"];
                    dToken["sValue"] = dToken["sNewValue"];
                    nDiffLen = len(dToken["sRealValue"]) - len(dToken["sNewValue"]);
                    sNewRepl = (nDiffLen >= 0) ? (dToken["sNewValue"] + " ".repeat(nDiffLen) : dToken["sNewValue"].slice(0, len(dToken["sRealValue"]));
                    this.sSentence = this.sSentence[:dToken["nStart"]] + sNewRepl + this.sSentence[dToken["nEnd"]:];
                    nDiffLen = dToken["sRealValue"].length - dToken["sNewValue"].length;
                    sNewRepl = (nDiffLen >= 0) ? dToken["sNewValue"] + " ".repeat(nDiffLen) : dToken["sNewValue"].slice(0, dToken["sRealValue"].length);
                    this.sSentence = this.sSentence.slice(0,dToken["nStart"]) + sNewRepl + this.sSentence.slice(dToken["nEnd"]);
                    delete dToken["sNewValue"];
                }
            }
            else {
                try {
                    this.dTokenPos.delete(dToken["nStart"]);
                }
1049
1050
1051
1052
1053
1054
1055

1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075

1076
1077
1078
1079
1080
1081
1082
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090







+




















+







        return false;
    }
    if (sNegPattern) {
        // check negative condition
        if (sNegPattern === "*") {
            // all morph must match sPattern
            return lMorph.every(sMorph  =>  (sMorph.search(sPattern) !== -1));
        }
        else {
            if (lMorph.some(sMorph  =>  (sMorph.search(sNegPattern) !== -1))) {
                return false;
        }
    }
    // search sPattern
    return lMorph.some(sMorph  =>  (sMorph.search(sPattern) !== -1));
}

function analyse (sWord, sPattern, sNegPattern) {
    // analyse a word, returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation off)
    let lMorph = _oSpellChecker.getMorph(sWord);
    if (lMorph.length === 0) {
        return false;
    }
    if (sNegPattern) {
        // check negative condition
        if (sNegPattern === "*") {
            // all morph must match sPattern
            return lMorph.every(sMorph  =>  (sMorph.search(sPattern) !== -1));
        }
        else {
            if (lMorph.some(sMorph  =>  (sMorph.search(sNegPattern) !== -1))) {
                return false;
        }
    }
    // search sPattern
    return lMorph.some(sMorph  =>  (sMorph.search(sPattern) !== -1));
1095
1096
1097
1098
1099
1100
1101
1102

1103
1104
1105
1106
1107
1108
1109
1103
1104
1105
1106
1107
1108
1109

1110
1111
1112
1113
1114
1115
1116
1117







-
+







        if (sValues.includes(sValue.toLowerCase())) {
            return true;
        }
    }
    else if (dToken["sValue"].gl_isUpperCase()) {
        //if sValue.lower() in sValues:
        //    return true;
        sValue = "|"+sValue[1:].gl_toCapitalize();
        sValue = "|"+sValue.slice(1).gl_toCapitalize();
        if (sValues.includes(sValue)) {
            return true;
        }
    }
    return false;
}