Index: gc_core/js/lang_core/gc_engine.js ================================================================== --- gc_core/js/lang_core/gc_engine.js +++ gc_core/js/lang_core/gc_engine.js @@ -398,46 +398,46 @@ } * _getNextPointers (dToken, dGraph, dPointer, bDebug=false) { // generator: return nodes where “values” match arcs try { - let dNode = dPointer["dNode"]; - let iNode1 = dPointer["iNode1"]; + let dNode = dGraph[dPointer["iNode"]]; + let iToken1 = dPointer["iToken1"]; let bTokenFound = false; // token value if (dNode.hasOwnProperty(dToken["sValue"])) { if (bDebug) { console.log(" MATCH: " + dToken["sValue"]); } - yield { "iNode1": iNode1, "dNode": dGraph[dNode[dToken["sValue"]]] }; + yield { "iToken1": iToken1, "iNode": dNode[dToken["sValue"]] }; bTokenFound = true; } if (dToken["sValue"].slice(0,2).gl_isTitle()) { // we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". let sValue = dToken["sValue"].toLowerCase(); if (dNode.hasOwnProperty(sValue)) { if (bDebug) { console.log(" MATCH: " + sValue); } - yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] }; + yield { "iToken1": iToken1, "iNode": dNode[sValue] }; bTokenFound = true; } } else if (dToken["sValue"].gl_isUpperCase()) { let sValue = dToken["sValue"].toLowerCase(); if (dNode.hasOwnProperty(sValue)) { if (bDebug) { console.log(" MATCH: " + sValue); } - yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] }; + yield { "iToken1": iToken1, "iNode": dNode[sValue] }; bTokenFound = true; } sValue = dToken["sValue"].gl_toCapitalize(); if (dNode.hasOwnProperty(sValue)) { if (bDebug) { console.log(" MATCH: " + sValue); } - yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] }; + yield { "iToken1": iToken1, "iNode": dNode[sValue] }; bTokenFound = true; } } // regex value arcs if (dToken["sType"] != "INFO" && dToken["sType"] != "PUNC" && dToken["sType"] != "SIGN") { @@ -447,11 +447,11 @@ // no anti-pattern if (dToken["sValue"].search(sRegex) !== -1) { if (bDebug) { console.log(" MATCH: ~" + sRegex); } - yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] }; + yield { "iToken1": iToken1, "iNode": dNode[""][sRegex] }; bTokenFound = true; } } else { // there is an anti-pattern let [sPattern, sNegPattern] = sRegex.split("¬", 2); @@ -460,11 +460,11 @@ } if (!sPattern || dToken["sValue"].search(sPattern) !== -1) { if (bDebug) { console.log(" MATCH: ~" + sRegex); } - yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] }; + yield { "iToken1": iToken1, "iNode": dNode[""][sRegex] }; bTokenFound = true; } } } } @@ -476,52 +476,54 @@ for (let sLemma of _oSpellChecker.getLemma(dToken["sValue"])) { if (dNode[""].hasOwnProperty(sLemma)) { if (bDebug) { console.log(" MATCH: >" + sLemma); } - yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sLemma]] }; + yield { "iToken1": iToken1, "iNode": dNode[""][sLemma] }; bTokenFound = true; } } } // regex morph arcs if (dNode.hasOwnProperty("")) { let lMorph = (dToken.hasOwnProperty("lMorph")) ? dToken["lMorph"] : _oSpellChecker.getMorph(dToken["sValue"]); - for (let sRegex in dNode[""]) { - if (!sRegex.includes("¬")) { - // no anti-pattern - if (lMorph.some(sMorph => (sMorph.search(sRegex) !== -1))) { - if (bDebug) { - console.log(" MATCH: @" + sRegex); - } - yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] }; - bTokenFound = true; - } - } else { - // there is an anti-pattern - let [sPattern, sNegPattern] = sRegex.split("¬", 2); - if (sNegPattern == "*") { - // all morphologies must match with - if (sPattern) { - if (lMorph.length > 0 && lMorph.every(sMorph => (sMorph.search(sPattern) !== -1))) { - if (bDebug) { - console.log(" MATCH: @" + sRegex); - } - yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] }; - bTokenFound = true; - } - } - } else { - if (sNegPattern && lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { - continue; - } - if (!sPattern || lMorph.some(sMorph => (sMorph.search(sPattern) !== -1))) { - if (bDebug) { - console.log(" MATCH: @" + sRegex); - } - yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] }; - bTokenFound = true; + if (lMorph.length > 0) { + for (let sRegex in dNode[""]) { + if (!sRegex.includes("¬")) { + // no anti-pattern + if (lMorph.some(sMorph => (sMorph.search(sRegex) !== -1))) { + if (bDebug) { + console.log(" MATCH: @" + sRegex); + } + yield { "iToken1": iToken1, "iNode": dNode[""][sRegex] }; + bTokenFound = true; + } + } else { + // there is an anti-pattern + let [sPattern, sNegPattern] = sRegex.split("¬", 2); + if (sNegPattern == "*") { + // all morphologies must match with + if (sPattern) { + if (lMorph.every(sMorph => (sMorph.search(sPattern) !== -1))) { + if (bDebug) { + console.log(" MATCH: @" + sRegex); + } + yield { "iToken1": iToken1, "iNode": dNode[""][sRegex] }; + bTokenFound = true; + } + } + } else { + if (sNegPattern && lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { + continue; + } + if (!sPattern || lMorph.some(sMorph => (sMorph.search(sPattern) !== -1))) { + if (bDebug) { + console.log(" MATCH: @" + sRegex); + } + yield { "iToken1": iToken1, "iNode": dNode[""][sRegex] }; + bTokenFound = true; + } } } } } } @@ -531,11 +533,11 @@ for (let sTag of dToken["aTags"]) { if (dNode[""].hasOwnProperty(sTag)) { if (bDebug) { console.log(" MATCH: /" + sTag); } - yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sTag]] }; + yield { "iToken1": iToken1, "iNode": dNode[""][sTag] }; bTokenFound = true; } } } // meta arc (for token type) @@ -544,19 +546,19 @@ // no regex here, we just search if exists within if (sMeta == "*" || dToken["sType"] == sMeta) { if (bDebug) { console.log(" MATCH: *" + sMeta); } - yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sMeta]] }; + yield { "iToken1": iToken1, "iNode": dNode[""][sMeta] }; bTokenFound = true; } else if (sMeta.includes("¬")) { if (!sMeta.includes(dToken["sType"])) { if (bDebug) { console.log(" MATCH: *" + sMeta); } - yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sMeta]] }; + yield { "iToken1": iToken1, "iNode": dNode[""][sMeta] }; bTokenFound = true; } } } } @@ -564,11 +566,11 @@ yield dPointer; } // JUMP // Warning! Recurssion! if (dNode.hasOwnProperty("<>")) { - let dPointer2 = { "iNode1": iNode1, "dNode": dGraph[dNode["<>"]], "bKeep": true }; + let dPointer2 = { "iToken1": iToken1, "iNode": dNode["<>"], "bKeep": true }; yield* this._getNextPointers(dToken, dGraph, dPointer2, bDebug); } } catch (e) { console.error(e); @@ -589,15 +591,15 @@ for (let dPointer of lPointer) { lNextPointer.push(...this._getNextPointers(dToken, dGraph, dPointer, bDebug)); } lPointer = lNextPointer; // check arcs of first nodes - lPointer.push(...this._getNextPointers(dToken, dGraph, { "iNode1": iToken, "dNode": dGraph[0] }, bDebug)); + lPointer.push(...this._getNextPointers(dToken, dGraph, { "iToken1": iToken, "iNode": 0 }, bDebug)); // check if there is rules to check for each pointer for (let dPointer of lPointer) { - if (dPointer["dNode"].hasOwnProperty("")) { - let bChange = this._executeActions(dGraph, dPointer["dNode"][""], dPointer["iNode1"]-1, iToken, dOptions, sCountry, bShowRuleId, bDebug, bContext); + if (dGraph[dPointer["iNode"]].hasOwnProperty("")) { + let bChange = this._executeActions(dGraph, dGraph[dPointer["iNode"]][""], dPointer["iToken1"]-1, iToken, dOptions, sCountry, bShowRuleId, bDebug, bContext); if (bChange) { bTagAndRewrite = true; } } } Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -389,38 +389,38 @@ echo("UPDATE:") echo(self) def _getNextPointers (self, dToken, dGraph, dPointer, bDebug=False): "generator: return nodes where “values” match arcs" - dNode = dPointer["dNode"] - iNode1 = dPointer["iNode1"] + dNode = dGraph[dPointer["iNode"]] + iToken1 = dPointer["iToken1"] bTokenFound = False # token value if dToken["sValue"] in dNode: if bDebug: echo(" MATCH: " + dToken["sValue"]) - yield { "iNode1": iNode1, "dNode": dGraph[dNode[dToken["sValue"]]] } + yield { "iToken1": iToken1, "iNode": dNode[dToken["sValue"]] } bTokenFound = True if dToken["sValue"][0:2].istitle(): # we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". sValue = dToken["sValue"].lower() if sValue in dNode: if bDebug: echo(" MATCH: " + sValue) - yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] } + yield { "iToken1": iToken1, "iNode": dNode[sValue] } bTokenFound = True elif dToken["sValue"].isupper(): sValue = dToken["sValue"].lower() if sValue in dNode: if bDebug: echo(" MATCH: " + sValue) - yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] } + yield { "iToken1": iToken1, "iNode": dNode[sValue] } bTokenFound = True sValue = dToken["sValue"].capitalize() if sValue in dNode: if bDebug: echo(" MATCH: " + sValue) - yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] } + yield { "iToken1": iToken1, "iNode": dNode[sValue] } bTokenFound = True # regex value arcs if dToken["sType"] not in frozenset(["INFO", "PUNC", "SIGN"]): if "" in dNode: for sRegex in dNode[""]: @@ -427,91 +427,92 @@ if "¬" not in sRegex: # no anti-pattern if re.search(sRegex, dToken["sValue"]): if bDebug: echo(" MATCH: ~" + sRegex) - yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] } + yield { "iToken1": iToken1, "iNode": dNode[""][sRegex] } bTokenFound = True else: # there is an anti-pattern sPattern, sNegPattern = sRegex.split("¬", 1) if sNegPattern and re.search(sNegPattern, dToken["sValue"]): continue if not sPattern or re.search(sPattern, dToken["sValue"]): if bDebug: echo(" MATCH: ~" + sRegex) - yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] } + yield { "iToken1": iToken1, "iNode": dNode[""][sRegex] } bTokenFound = True # analysable tokens if dToken["sType"][0:4] == "WORD": # token lemmas if "" in dNode: for sLemma in _oSpellChecker.getLemma(dToken["sValue"]): if sLemma in dNode[""]: if bDebug: echo(" MATCH: >" + sLemma) - yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sLemma]] } + yield { "iToken1": iToken1, "iNode": dNode[""][sLemma] } bTokenFound = True # regex morph arcs if "" in dNode: lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"])) - for sRegex in dNode[""]: - if "¬" not in sRegex: - # no anti-pattern - if any(re.search(sRegex, sMorph) for sMorph in lMorph): - if bDebug: - echo(" MATCH: @" + sRegex) - yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] } - bTokenFound = True - else: - # there is an anti-pattern - sPattern, sNegPattern = sRegex.split("¬", 1) - if sNegPattern == "*": - # all morphologies must match with - if sPattern: - if lMorph and all(re.search(sPattern, sMorph) for sMorph in lMorph): - if bDebug: - echo(" MATCH: @" + sRegex) - yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] } - bTokenFound = True - else: - if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in lMorph): - continue - if not sPattern or any(re.search(sPattern, sMorph) for sMorph in lMorph): - if bDebug: - echo(" MATCH: @" + sRegex) - yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sRegex]] } - bTokenFound = True + if lMorph: + for sRegex in dNode[""]: + if "¬" not in sRegex: + # no anti-pattern + if any(re.search(sRegex, sMorph) for sMorph in lMorph): + if bDebug: + echo(" MATCH: @" + sRegex) + yield { "iToken1": iToken1, "iNode": dNode[""][sRegex] } + bTokenFound = True + else: + # there is an anti-pattern + sPattern, sNegPattern = sRegex.split("¬", 1) + if sNegPattern == "*": + # all morphologies must match with + if sPattern: + if all(re.search(sPattern, sMorph) for sMorph in lMorph): + if bDebug: + echo(" MATCH: @" + sRegex) + yield { "iToken1": iToken1, "iNode": dNode[""][sRegex] } + bTokenFound = True + else: + if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in lMorph): + continue + if not sPattern or any(re.search(sPattern, sMorph) for sMorph in lMorph): + if bDebug: + echo(" MATCH: @" + sRegex) + yield { "iToken1": iToken1, "iNode": dNode[""][sRegex] } + bTokenFound = True # token tags if "aTags" in dToken and "" in dNode: for sTag in dToken["aTags"]: if sTag in dNode[""]: if bDebug: echo(" MATCH: /" + sTag) - yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sTag]] } + yield { "iToken1": iToken1, "iNode": dNode[""][sTag] } bTokenFound = True # meta arc (for token type) if "" in dNode: for sMeta in dNode[""]: # no regex here, we just search if exists within if sMeta == "*" or dToken["sType"] == sMeta: if bDebug: echo(" MATCH: *" + sMeta) - yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sMeta]] } + yield { "iToken1": iToken1, "iNode": dNode[""][sMeta] } bTokenFound = True elif "¬" in sMeta: if dToken["sType"] not in sMeta: if bDebug: echo(" MATCH: *" + sMeta) - yield { "iNode1": iNode1, "dNode": dGraph[dNode[""][sMeta]] } + yield { "iToken1": iToken1, "iNode": dNode[""][sMeta] } bTokenFound = True if not bTokenFound and "bKeep" in dPointer: yield dPointer # JUMP # Warning! Recurssion! if "<>" in dNode: - dPointer2 = { "iNode1": iNode1, "dNode": dGraph[dNode["<>"]], "bKeep": True } + dPointer2 = { "iToken1": iToken1, "iNode": dNode["<>"], "bKeep": True } yield from self._getNextPointers(dToken, dGraph, dPointer2, bDebug) def parseGraph (self, dGraph, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False): "parse graph with tokens from the text and execute actions encountered" lPointer = [] @@ -523,17 +524,17 @@ lNextPointer = [] for dPointer in lPointer: lNextPointer.extend(self._getNextPointers(dToken, dGraph, dPointer, bDebug)) lPointer = lNextPointer # check arcs of first nodes - lPointer.extend(self._getNextPointers(dToken, dGraph, { "iNode1": iToken, "dNode": dGraph[0] }, bDebug)) + lPointer.extend(self._getNextPointers(dToken, dGraph, { "iToken1": iToken, "iNode": 0 }, bDebug)) # check if there is rules to check for each pointer for dPointer in lPointer: #if bDebug: # echo("+", dPointer) - if "" in dPointer["dNode"]: - bChange = self._executeActions(dGraph, dPointer["dNode"][""], dPointer["iNode1"]-1, iToken, dOptions, sCountry, bShowRuleId, bDebug, bContext) + if "" in dGraph[dPointer["iNode"]]: + bChange = self._executeActions(dGraph, dGraph[dPointer["iNode"]][""], dPointer["iToken1"]-1, iToken, dOptions, sCountry, bShowRuleId, bDebug, bContext) if bChange: bTagAndRewrite = True if bTagAndRewrite: self.rewriteFromTags(bDebug) if bDebug: