Index: gc_core/js/lang_core/gc_engine.js ================================================================== --- gc_core/js/lang_core/gc_engine.js +++ gc_core/js/lang_core/gc_engine.js @@ -391,49 +391,35 @@ console.log("UPDATE:"); console.log(this.asString()); } } - * _getNextPointers (oToken, oGraph, oPointer, bDebug=false) { - // generator: return nodes where “values” match arcs + * _getMatches (oGraph, oToken, oNode, bKeep=false) { + // generator: return matches where “values” match arcs try { - let oNode = oGraph[oPointer["iNode"]]; - let iToken1 = oPointer["iToken1"]; let bTokenFound = false; // token value if (oNode.hasOwnProperty(oToken["sValue"])) { - if (bDebug) { - console.log(" MATCH: " + oToken["sValue"]); - } - yield { "iToken1": iToken1, "iNode": oNode[oToken["sValue"]] }; + yield [" ", oToken["sValue"], oNode[oToken["sValue"]]]; bTokenFound = true; } if (oToken["sValue"].slice(0,2).gl_isTitle()) { // we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". let sValue = oToken["sValue"].toLowerCase(); if (oNode.hasOwnProperty(sValue)) { - if (bDebug) { - console.log(" MATCH: " + sValue); - } - yield { "iToken1": iToken1, "iNode": oNode[sValue] }; + yield [" ", sValue, oNode[sValue]]; bTokenFound = true; } } else if (oToken["sValue"].gl_isUpperCase()) { let sValue = oToken["sValue"].toLowerCase(); if (oNode.hasOwnProperty(sValue)) { - if (bDebug) { - console.log(" MATCH: " + sValue); - } - yield { "iToken1": iToken1, "iNode": oNode[sValue] }; + yield [" ", sValue, oNode[sValue]]; bTokenFound = true; } sValue = oToken["sValue"].gl_toCapitalize(); if (oNode.hasOwnProperty(sValue)) { - if (bDebug) { - console.log(" MATCH: " + sValue); - } - yield { "iToken1": iToken1, "iNode": oNode[sValue] }; + yield [" ", sValue, oNode[sValue]]; bTokenFound = true; } } // regex value arcs if (oToken["sType"] != "INFO" && oToken["sType"] != "PUNC" && oToken["sType"] != "SIGN") { @@ -440,27 +426,21 @@ if (oNode.hasOwnProperty("")) { for (let sRegex in oNode[""]) { if (!sRegex.includes("¬")) { // no anti-pattern if (oToken["sValue"].search(sRegex) !== -1) { - if (bDebug) { - console.log(" MATCH: ~" + sRegex); - } - yield { "iToken1": iToken1, "iNode": oNode[""][sRegex] }; + yield ["~", sRegex, oNode[""][sRegex]]; bTokenFound = true; } } else { // there is an anti-pattern let [sPattern, sNegPattern] = sRegex.split("¬", 2); if (sNegPattern && oToken["sValue"].search(sNegPattern) !== -1) { continue; } if (!sPattern || oToken["sValue"].search(sPattern) !== -1) { - if (bDebug) { - console.log(" MATCH: ~" + sRegex); - } - yield { "iToken1": iToken1, "iNode": oNode[""][sRegex] }; + yield ["~", sRegex, oNode[""][sRegex]]; bTokenFound = true; } } } } @@ -469,14 +449,11 @@ if (oToken["sType"].slice(0,4) == "WORD") { // token lemmas if (oNode.hasOwnProperty("")) { for (let sLemma of gc_engine.oSpellChecker.getLemma(oToken["sValue"])) { if (oNode[""].hasOwnProperty(sLemma)) { - if (bDebug) { - console.log(" MATCH: >" + sLemma); - } - yield { "iToken1": iToken1, "iNode": oNode[""][sLemma] }; + yield [">", sLemma, oNode[""][sLemma]]; bTokenFound = true; } } } // phonetic similarity @@ -495,14 +472,11 @@ continue; } } } if (phonet.isSimilAs(oToken["sValue"], sPhonet.gl_trimRight("!"))) { - if (bDebug) { - console.log(" MATCH: %" + sPhonet); - } - yield { "iToken1": iToken1, "iNode": oNode[""][sPhonet] }; + yield ["#", sPhonet, oNode[""][sPhonet]]; bTokenFound = true; } } } // morph arcs @@ -511,39 +485,30 @@ if (lMorph.length > 0) { for (let sSearch in oNode[""]) { if (!sSearch.includes("¬")) { // no anti-pattern if (lMorph.some(sMorph => (sMorph.includes(sSearch)))) { - if (bDebug) { - console.log(" MATCH: $" + sSearch); - } - yield { "iToken1": iToken1, "iNode": oNode[""][sSearch] }; + yield ["$", sSearch, oNode[""][sSearch]]; bTokenFound = true; } } else { // there is an anti-pattern let [sPattern, sNegPattern] = sSearch.split("¬", 2); if (sNegPattern == "*") { // all morphologies must match with if (sPattern) { if (lMorph.every(sMorph => (sMorph.includes(sPattern)))) { - if (bDebug) { - console.log(" MATCH: $" + sSearch); - } - yield { "iToken1": iToken1, "iNode": oNode[""][sSearch] }; + yield ["$", sSearch, oNode[""][sSearch]]; bTokenFound = true; } } } else { if (sNegPattern && lMorph.some(sMorph => (sMorph.includes(sNegPattern)))) { continue; } if (!sPattern || lMorph.some(sMorph => (sMorph.includes(sPattern)))) { - if (bDebug) { - console.log(" MATCH: $" + sSearch); - } - yield { "iToken1": iToken1, "iNode": oNode[""][sSearch] }; + yield ["$", sSearch, oNode[""][sSearch]]; bTokenFound = true; } } } } @@ -555,39 +520,30 @@ if (lMorph.length > 0) { for (let sRegex in oNode[""]) { if (!sRegex.includes("¬")) { // no anti-pattern if (lMorph.some(sMorph => (sMorph.search(sRegex) !== -1))) { - if (bDebug) { - console.log(" MATCH: @" + sRegex); - } - yield { "iToken1": iToken1, "iNode": oNode[""][sRegex] }; + yield ["@", sRegex, oNode[""][sRegex]]; bTokenFound = true; } } else { // there is an anti-pattern let [sPattern, sNegPattern] = sRegex.split("¬", 2); if (sNegPattern == "*") { // all morphologies must match with if (sPattern) { if (lMorph.every(sMorph => (sMorph.search(sPattern) !== -1))) { - if (bDebug) { - console.log(" MATCH: @" + sRegex); - } - yield { "iToken1": iToken1, "iNode": oNode[""][sRegex] }; + yield ["@", sRegex, oNode[""][sRegex]]; bTokenFound = true; } } } else { if (sNegPattern && lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) { continue; } if (!sPattern || lMorph.some(sMorph => (sMorph.search(sPattern) !== -1))) { - if (bDebug) { - console.log(" MATCH: @" + sRegex); - } - yield { "iToken1": iToken1, "iNode": oNode[""][sRegex] }; + yield ["@", sRegex, oNode[""][sRegex]]; bTokenFound = true; } } } } @@ -596,48 +552,38 @@ } // token tags if (oToken.hasOwnProperty("aTags") && oNode.hasOwnProperty("")) { for (let sTag of oToken["aTags"]) { if (oNode[""].hasOwnProperty(sTag)) { - if (bDebug) { - console.log(" MATCH: /" + sTag); - } - yield { "iToken1": iToken1, "iNode": oNode[""][sTag] }; + yield ["/", sTag, oNode[""][sTag]]; bTokenFound = true; } } } // meta arc (for token type) if (oNode.hasOwnProperty("")) { for (let sMeta in oNode[""]) { // no regex here, we just search if exists within if (sMeta == "*" || oToken["sType"] == sMeta) { - if (bDebug) { - console.log(" MATCH: *" + sMeta); - } - yield { "iToken1": iToken1, "iNode": oNode[""][sMeta] }; + yield ["*", sMeta, oNode[""][sMeta]]; bTokenFound = true; } else if (sMeta.includes("¬")) { if (!sMeta.includes(oToken["sType"])) { - if (bDebug) { - console.log(" MATCH: *" + sMeta); - } - yield { "iToken1": iToken1, "iNode": oNode[""][sMeta] }; + yield ["*", sMeta, oNode[""][sMeta]]; bTokenFound = true; } } } } - if (!bTokenFound && oPointer.hasOwnProperty("bKeep")) { - yield oPointer; + if (!bTokenFound && bKeep) { + yield [null, "", -1]; } // JUMP // Warning! Recurssion! if (oNode.hasOwnProperty("<>")) { - let oPointer2 = { "iToken1": iToken1, "iNode": oNode["<>"], "bKeep": true }; - yield* this._getNextPointers(oToken, oGraph, oPointer2, bDebug); + yield* this._getMatches(oGraph, oToken, oGraph[oNode["<>"]], bKeep=true); } } catch (e) { console.error(e); } @@ -653,15 +599,32 @@ console.log("TOKEN: " + oToken["sValue"]); } // check arcs for each existing pointer let lNextPointer = []; for (let oPointer of lPointer) { - lNextPointer.push(...this._getNextPointers(oToken, oGraph, oPointer, bDebug)); + for (let [cActionType, sMatch, iNode] of this._getMatches(oGraph, oToken, oGraph[oPointer["iNode"]])) { + if (cActionType === null) { + lNextPointer.push(oPointer); + continue; + } + if (bDebug) { + console.log(" MATCH: " + cActionType + sMatch); + } + lNextPointer.push({ "iToken1": oPointer["iToken1"], "iNode": iNode }); + } } lPointer = lNextPointer; // check arcs of first nodes - lPointer.push(...this._getNextPointers(oToken, oGraph, { "iToken1": iToken, "iNode": 0 }, bDebug)); + for (let [cActionType, sMatch, iNode] of this._getMatches(oGraph, oToken, oGraph[0])) { + if (cActionType === null) { + continue; + } + if (bDebug) { + console.log(" MATCH: " + cActionType + sMatch); + } + lPointer.push({ "iToken1": iToken, "iNode": iNode }); + } // check if there is rules to check for each pointer for (let oPointer of lPointer) { if (oGraph[oPointer["iNode"]].hasOwnProperty("")) { let bChange = this._executeActions(oGraph, oGraph[oPointer["iNode"]][""], oPointer["iToken1"]-1, iToken, dOptions, sCountry, bShowRuleId, bDebug, bContext); if (bChange) { Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -398,71 +398,55 @@ self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lTokens if dToken["sType"] != "INFO" } if bDebug: echo("UPDATE:") echo(self) - def _getNextPointers (self, dToken, dGraph, dPointer, bDebug=False): - "generator: return nodes where “values” match arcs" - dNode = dGraph[dPointer["iNode"]] - iToken1 = dPointer["iToken1"] + def _getMatches (self, dGraph, dToken, dNode, bKeep=False): + "generator: return matches where “values” match arcs" bTokenFound = False # token value if dToken["sValue"] in dNode: - if bDebug: - echo(" MATCH: " + dToken["sValue"]) - yield { "iToken1": iToken1, "iNode": dNode[dToken["sValue"]] } + yield (" ", dToken["sValue"], dNode[dToken["sValue"]]) bTokenFound = True if dToken["sValue"][0:2].istitle(): # we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". sValue = dToken["sValue"].lower() if sValue in dNode: - if bDebug: - echo(" MATCH: " + sValue) - yield { "iToken1": iToken1, "iNode": dNode[sValue] } + yield (" ", sValue, dNode[sValue]) bTokenFound = True elif dToken["sValue"].isupper(): sValue = dToken["sValue"].lower() if sValue in dNode: - if bDebug: - echo(" MATCH: " + sValue) - yield { "iToken1": iToken1, "iNode": dNode[sValue] } + yield (" ", sValue, dNode[sValue]) bTokenFound = True sValue = dToken["sValue"].capitalize() if sValue in dNode: - if bDebug: - echo(" MATCH: " + sValue) - yield { "iToken1": iToken1, "iNode": dNode[sValue] } + yield (" ", sValue, dNode[sValue]) bTokenFound = True # regex value arcs if dToken["sType"] not in frozenset(["INFO", "PUNC", "SIGN"]): if "" in dNode: for sRegex in dNode[""]: if "¬" not in sRegex: # no anti-pattern if re.search(sRegex, dToken["sValue"]): - if bDebug: - echo(" MATCH: ~" + sRegex) - yield { "iToken1": iToken1, "iNode": dNode[""][sRegex] } + yield ("~", sRegex, dNode[""][sRegex]) bTokenFound = True else: # there is an anti-pattern sPattern, sNegPattern = sRegex.split("¬", 1) if sNegPattern and re.search(sNegPattern, dToken["sValue"]): continue if not sPattern or re.search(sPattern, dToken["sValue"]): - if bDebug: - echo(" MATCH: ~" + sRegex) - yield { "iToken1": iToken1, "iNode": dNode[""][sRegex] } + yield ("~", sRegex, dNode[""][sRegex]) bTokenFound = True # analysable tokens if dToken["sType"][0:4] == "WORD": # token lemmas if "" in dNode: for sLemma in _oSpellChecker.getLemma(dToken["sValue"]): if sLemma in dNode[""]: - if bDebug: - echo(" MATCH: >" + sLemma) - yield { "iToken1": iToken1, "iNode": dNode[""][sLemma] } + yield (">", sLemma, dNode[""][sLemma]) bTokenFound = True # phonetic similarity if "" in dNode: for sPhonet in dNode[""]: if sPhonet.endswith("!"): @@ -473,106 +457,85 @@ if dToken["sValue"].lower() == sPhon: continue if dToken["sValue"].isupper() and dToken["sValue"].capitalize() == sPhon: continue if phonet.isSimilAs(dToken["sValue"], sPhonet.rstrip("!")): - if bDebug: - echo(" MATCH: %" + sPhonet) - yield { "iToken1": iToken1, "iNode": dNode[""][sPhonet] } + yield ("#", sPhonet, dNode[""][sPhonet]) bTokenFound = True # morph arcs if "" in dNode: lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"])) if lMorph: for sSearch in dNode[""]: if "¬" not in sSearch: # no anti-pattern if any(sSearch in sMorph for sMorph in lMorph): - if bDebug: - echo(" MATCH: $" + sSearch) - yield { "iToken1": iToken1, "iNode": dNode[""][sSearch] } + yield ("$", sSearch, dNode[""][sSearch]) bTokenFound = True else: # there is an anti-pattern sPattern, sNegPattern = sSearch.split("¬", 1) if sNegPattern == "*": # all morphologies must match with if sPattern: if all(sPattern in sMorph for sMorph in lMorph): - if bDebug: - echo(" MATCH: $" + sSearch) - yield { "iToken1": iToken1, "iNode": dNode[""][sSearch] } + yield ("$", sSearch, dNode[""][sSearch]) bTokenFound = True else: if sNegPattern and any(sNegPattern in sMorph for sMorph in lMorph): continue if not sPattern or any(sPattern in sMorph for sMorph in lMorph): - if bDebug: - echo(" MATCH: $" + sSearch) - yield { "iToken1": iToken1, "iNode": dNode[""][sSearch] } + yield ("$", sSearch, dNode[""][sSearch]) bTokenFound = True # regex morph arcs if "" in dNode: lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"])) if lMorph: for sRegex in dNode[""]: if "¬" not in sRegex: # no anti-pattern if any(re.search(sRegex, sMorph) for sMorph in lMorph): - if bDebug: - echo(" MATCH: @" + sRegex) - yield { "iToken1": iToken1, "iNode": dNode[""][sRegex] } + yield ("@", sRegex, dNode[""][sRegex]) bTokenFound = True else: # there is an anti-pattern sPattern, sNegPattern = sRegex.split("¬", 1) if sNegPattern == "*": # all morphologies must match with if sPattern: if all(re.search(sPattern, sMorph) for sMorph in lMorph): - if bDebug: - echo(" MATCH: @" + sRegex) - yield { "iToken1": iToken1, "iNode": dNode[""][sRegex] } + yield ("@", sRegex, dNode[""][sRegex]) bTokenFound = True else: if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in lMorph): continue if not sPattern or any(re.search(sPattern, sMorph) for sMorph in lMorph): - if bDebug: - echo(" MATCH: @" + sRegex) - yield { "iToken1": iToken1, "iNode": dNode[""][sRegex] } + yield ("@", sRegex, dNode[""][sRegex]) bTokenFound = True # token tags if "aTags" in dToken and "" in dNode: for sTag in dToken["aTags"]: if sTag in dNode[""]: - if bDebug: - echo(" MATCH: /" + sTag) - yield { "iToken1": iToken1, "iNode": dNode[""][sTag] } + yield ("/", sTag, dNode[""][sTag]) bTokenFound = True # meta arc (for token type) if "" in dNode: for sMeta in dNode[""]: # no regex here, we just search if exists within if sMeta == "*" or dToken["sType"] == sMeta: - if bDebug: - echo(" MATCH: *" + sMeta) - yield { "iToken1": iToken1, "iNode": dNode[""][sMeta] } + yield ("*", sMeta, dNode[""][sMeta]) bTokenFound = True elif "¬" in sMeta: if dToken["sType"] not in sMeta: - if bDebug: - echo(" MATCH: *" + sMeta) - yield { "iToken1": iToken1, "iNode": dNode[""][sMeta] } + yield ("*", sMeta, dNode[""][sMeta]) bTokenFound = True - if not bTokenFound and "bKeep" in dPointer: - yield dPointer + if not bTokenFound and bKeep: + yield (None, "", -1) # JUMP - # Warning! Recurssion! + # Warning! Recursion! if "<>" in dNode: - dPointer2 = { "iToken1": iToken1, "iNode": dNode["<>"], "bKeep": True } - yield from self._getNextPointers(dToken, dGraph, dPointer2, bDebug) + yield from self._getMatches(dGraph, dToken, dGraph[dNode["<>"]], bKeep=True) def parseGraph (self, dGraph, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False): "parse graph with tokens from the text and execute actions encountered" lPointer = [] bTagAndRewrite = False @@ -580,14 +543,25 @@ if bDebug: echo("TOKEN: " + dToken["sValue"]) # check arcs for each existing pointer lNextPointer = [] for dPointer in lPointer: - lNextPointer.extend(self._getNextPointers(dToken, dGraph, dPointer, bDebug)) + for cActionType, sMatch, iNode in self._getMatches(dGraph, dToken, dGraph[dPointer["iNode"]]): + if cActionType is None: + lNextPointer.append(dPointer) + continue + if bDebug: + echo(" MATCH: " + cActionType + sMatch) + lNextPointer.append({ "iToken1": dPointer["iToken1"], "iNode": iNode }) lPointer = lNextPointer # check arcs of first nodes - lPointer.extend(self._getNextPointers(dToken, dGraph, { "iToken1": iToken, "iNode": 0 }, bDebug)) + for cActionType, sMatch, iNode in self._getMatches(dGraph, dToken, dGraph[0]): + if cActionType is None: + continue + if bDebug: + echo(" MATCH: " + cActionType + sMatch) + lPointer.append({ "iToken1": iToken, "iNode": iNode }) # check if there is rules to check for each pointer for dPointer in lPointer: #if bDebug: # echo("+", dPointer) if "" in dGraph[dPointer["iNode"]]: Index: gc_lang/fr/perf_memo.txt ================================================================== --- gc_lang/fr/perf_memo.txt +++ gc_lang/fr/perf_memo.txt @@ -33,6 +33,7 @@ 1.12.2 2020.09.09 13:34 1.50568 0.374504 0.233108 0.0798712 0.0804466 0.0769674 0.171519 0.0945132 0.0165344 0.0019474 1.12.2 2020.09.09 13:35 1.41094 0.359093 0.236443 0.06968 0.0734418 0.0738087 0.169371 0.0946279 0.0167106 0.0019773 1.12.2 2020.09.11 19:16 1.35297 0.330545 0.221731 0.0666998 0.0692539 0.0701707 0.160564 0.0891676 0.015807 0.0045998 1.12.2 2020.09.30 14:50 1.37531 0.330381 0.226012 0.0668063 0.0690574 0.0694727 0.160282 0.0929373 0.0176629 0.0019713 1.12.2 2020.09.30 17:01 1.37168 0.329009 0.248127 0.0670758 0.0701238 0.0910568 0.170556 0.093876 0.0168925 0.0020051 -1.12.2 2020.10.01 11:18 1.36493 0.34176 0.24473 0.0691607 0.0720002 0.0903613 0.170067 0.0934571 0.0174357 0.0019585 +1.12.2 2020.10.01 11:18 1.36493 0.34176 0.24473 0.0691607 0.0720002 0.0903613 0.170067 0.0934571 0.0174357 0.0019585 +2.0.0 2020.11.29 00:00 1.27748 0.320919 0.227774 0.0649503 0.0688481 0.0672859 0.163426 0.0878984 0.016784 0.0018913