Grammalecte  jsex_regex.js at [001f5897d2]

File gc_core/js/jsex_regex.js artifact bb46156b4b part of check-in 001f5897d2



// regex

if (RegExp.prototype.__grammalecte__ === undefined) {
    RegExp.prototype._exec2 = function (sText, aGroupsPos, aNegLookBefore=null) {
        let m;
        while ((m = this.exec(sText)) !== null) {
            // we have to iterate over sText here too
            // because first match doesn’t imply it’s a valid match according to negative lookbefore assertions,
            // and even if first match is finally invalid, it doesn’t mean the following eligible matchs would be invalid too.
            if (aNegLookBefore !== null) {
                // check negative look before assertions
                if ( !aNegLookBefore.some(sRegEx  =>  (RegExp.leftContext.search(sRegEx) >= 0)) ) {
                    break;
                }
            } else {
                break;
            }
        }
        if (m === null) {
            return null;
        }

        let codePos;
        let iPos = 0;
        m.start = [m.index];
        m.end = [this.lastIndex];
        if (m.length > 1) {
            // there is subgroup(s)
            if (aGroupsPos !== null) {
                // aGroupsPos is defined
                for (let i = 1; i <= m.length-1; i++) {
                    codePos = aGroupsPos[i-1];
                    if (typeof codePos === "number") {
                        // position as a number
                        m.start.push(m.index + codePos);
                        m.end.push(m.index + codePos + m[i].length);
                    } else if (codePos === "$") {
                        // at the end of the pattern
                        m.start.push(this.lastIndex - m[i].length);
                        m.end.push(this.lastIndex);
                    } else if (codePos === "w") {
                        // word in the middle of the pattern
                        iPos = m[0].search("[ ’,()«»“”]"+m[i]+"[ ,’()«»“”]") + 1 + m.index
                        m.start.push(iPos);
                        m.end.push(iPos + m[i].length)
                    } else if (codePos === "*") {
                        // anywhere
                        iPos = m[0].indexOf(m[i]) + m.index;
                        m.start.push(iPos);
                        m.end.push(iPos + m[i].length)
                    } else if (codePos === "**") {
                        // anywhere after previous group
                        iPos = m[0].indexOf(m[i], m.end[i-1]-m.index) + m.index;
                        m.start.push(iPos);
                        m.end.push(iPos + m[i].length)
                    } else if (codePos.startsWith(">")) {
                        // >x:_
                        // todo: look in substring x
                        iPos = m[0].indexOf(m[i]) + m.index;
                        m.start.push(iPos);
                        m.end.push(iPos + m[i].length);
                    } else {
                        console.error("# Error: unknown positioning code in regex [" + this.source + "], for group[" + i.toString() +"], code: [" + codePos + "]");
                    }
                }
            } else {
                // no aGroupsPos
                for (let subm of m.slice(1)) {
                    iPos = m[0].indexOf(subm) + m.index;
                    m.start.push(iPos);
                    m.end.push(iPos + subm.length);
                }
            }
        }
        return m;
    }

    RegExp.prototype.__grammalecte__ = true;
}