Overview
Comment: | [core] gc engine: fix bug for multi-token detection |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | core | mtok |
Files: | files | file ages | folders |
SHA3-256: |
c8734856cf5ae6bf119ab6cba37c84f1 |
User & Date: | olr on 2021-03-16 18:12:35 |
Other Links: | branch diff | manifest | tags |
Context
2021-03-16
| ||
18:13 | [fr] ajustements: locutions adverbiales check-in: 66c9bda313 user: olr tags: fr, mtok | |
18:12 | [core] gc engine: fix bug for multi-token detection check-in: c8734856cf user: olr tags: core, mtok | |
2021-03-13
| ||
11:34 | [fr] ajustements check-in: b62d7a4807 user: olr tags: fr, mtok | |
Changes
Modified gc_core/js/lang_core/gc_engine.js from [95c9d7785e] to [28d62d5f91].
︙ | ︙ | |||
177 178 179 180 181 182 183 184 185 186 187 188 189 190 | s += `#${dToken["i"]}\t${dToken["nStart"]}:${dToken["nEnd"]}\t${dToken["sValue"]}\t${dToken["sType"]}`; if (dToken.hasOwnProperty("lMorph")) { s += "\t" + dToken["lMorph"].toString(); } if (dToken.hasOwnProperty("aTags")) { s += "\t" + dToken["aTags"].toString(); } s += "\n"; } return s; } parse (sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false, bFullInfo=false) { // analyses <sText> and returns an iterable of errors or (with option <bFullInfo>) a list of sentences with tokens and errors | > > > | 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 | s += `#${dToken["i"]}\t${dToken["nStart"]}:${dToken["nEnd"]}\t${dToken["sValue"]}\t${dToken["sType"]}`; if (dToken.hasOwnProperty("lMorph")) { s += "\t" + dToken["lMorph"].toString(); } if (dToken.hasOwnProperty("aTags")) { s += "\t" + dToken["aTags"].toString(); } if (dToken.hasOwnProperty("nMultiStartTo")) { s += "\t>>" + dToken["nMultiStartTo"].toString(); } s += "\n"; } return s; } parse (sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false, bFullInfo=false) { // analyses <sText> and returns an iterable of errors or (with option <bFullInfo>) a list of sentences with tokens and errors |
︙ | ︙ | |||
633 634 635 636 637 638 639 | } // check if there is rules to check for each pointer for (let oPointer of lPointers) { if (oPointer["nMultiEnd"] != -1) { if (oToken["i"] < oPointer["nMultiEnd"]) { continue; } | | | 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 | } // check if there is rules to check for each pointer for (let oPointer of lPointers) { if (oPointer["nMultiEnd"] != -1) { if (oToken["i"] < oPointer["nMultiEnd"]) { continue; } if (oToken["i"] >= oPointer["nMultiEnd"]) { oPointer["nMultiEnd"] = -1; } } if (oGraph[oPointer["iNode"]].hasOwnProperty("<rules>")) { let bChange = this._executeActions(oGraph, oGraph[oPointer["iNode"]]["<rules>"], oPointer["iToken1"]-1, iToken, dOptions, sCountry, bShowRuleId, bDebug, bContext); if (bChange) { bTagAndRewrite = true; |
︙ | ︙ | |||
766 767 768 769 770 771 772 | let nErrorStart = this.nOffsetWithinParagraph + this.lTokens[i]["nStart"]; if (this.dError.has(nErrorStart)) { this.dError.delete(nErrorStart); } } } } | | | | > > > | 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 | let nErrorStart = this.nOffsetWithinParagraph + this.lTokens[i]["nStart"]; if (this.dError.has(nErrorStart)) { this.dError.delete(nErrorStart); } } } } else if (cActionType == "&") { // multi-tokens let nTokenStart = (eAct[0] > 0) ? nTokenOffset + eAct[0] : nLastToken + eAct[0]; let nTokenEnd = (eAct[1] > 0) ? nTokenOffset + eAct[1] : nLastToken + eAct[1]; let oMultiToken = { "nTokenStart": nTokenStart, "nTokenEnd": nTokenEnd, "lTokens": this.lTokens.slice(nTokenStart, nTokenEnd+1), "lMorph": (sAction) ? sAction.split("|") : [":HM"] } this.lTokens[nTokenStart]["nMultiStartTo"] = this.lTokens[nTokenEnd]["i"]; this.lTokens[nTokenEnd]["nMultiEndFrom"] = this.lTokens[nTokenStart]["i"]; this.lTokens[nTokenStart]["oMultiToken"] = oMultiToken; this.lTokens[nTokenEnd]["oMultiToken"] = oMultiToken; if (bDebug) { console.log(`" MULTI-TOKEN: ${sAction} [${this.lTokens[nTokenStart]["sValue"]}:${this.lTokens[nTokenEnd]["sValue"]}]`); } } else { console.log("# error: unknown action at " + sLineId); } } else if (cActionType == ">") { if (bDebug) { |
︙ | ︙ |
Modified gc_core/py/lang_core/gc_engine.py from [caa8d936e0] to [db2ab5bd90].
︙ | ︙ | |||
250 251 252 253 254 255 256 257 258 259 260 261 262 263 | s += "now: " + self.sSentence + "\n" for dToken in self.lTokens: s += '#{i}\t{nStart}:{nEnd}\t{sValue}\t{sType}'.format(**dToken) if "lMorph" in dToken: s += "\t" + str(dToken["lMorph"]) if "aTags" in dToken: s += "\t" + str(dToken["aTags"]) s += "\n" #for nPos, dToken in self.dTokenPos.items(): # s += "{}\t{}\n".format(nPos, dToken) return s def parse (self, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False, bFullInfo=False): "analyses <sText> and returns an iterable of errors or (with option <bFullInfo>) paragraphs errors and sentences with tokens and errors" | > > | 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 | s += "now: " + self.sSentence + "\n" for dToken in self.lTokens: s += '#{i}\t{nStart}:{nEnd}\t{sValue}\t{sType}'.format(**dToken) if "lMorph" in dToken: s += "\t" + str(dToken["lMorph"]) if "aTags" in dToken: s += "\t" + str(dToken["aTags"]) if "nMultiStartTo" in dToken: s += "\t>>" + str(dToken["nMultiStartTo"]) s += "\n" #for nPos, dToken in self.dTokenPos.items(): # s += "{}\t{}\n".format(nPos, dToken) return s def parse (self, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False, bFullInfo=False): "analyses <sText> and returns an iterable of errors or (with option <bFullInfo>) paragraphs errors and sentences with tokens and errors" |
︙ | ︙ | |||
591 592 593 594 595 596 597 | nMultiEnd = -1 if cNodeType != "&" else dToken["nMultiStartTo"] lPointers.append({ "iToken1": iToken, "iNode": iNode, "nMultiEnd": nMultiEnd }) # check if there is rules to check for each pointer for dPointer in lPointers: if dPointer["nMultiEnd"] != -1: if dToken["i"] < dPointer["nMultiEnd"]: continue | | | 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 | nMultiEnd = -1 if cNodeType != "&" else dToken["nMultiStartTo"] lPointers.append({ "iToken1": iToken, "iNode": iNode, "nMultiEnd": nMultiEnd }) # check if there is rules to check for each pointer for dPointer in lPointers: if dPointer["nMultiEnd"] != -1: if dToken["i"] < dPointer["nMultiEnd"]: continue if dToken["i"] >= dPointer["nMultiEnd"]: dPointer["nMultiEnd"] = -1 if "<rules>" in dGraph[dPointer["iNode"]]: bChange = self._executeActions(dGraph, dGraph[dPointer["iNode"]]["<rules>"], dPointer["iToken1"]-1, iToken, dOptions, sCountry, bShowRuleId, bDebug, bContext) if bChange: bTagAndRewrite = True if bTagAndRewrite: self.rewriteFromTags(bDebug) |
︙ | ︙ | |||
700 701 702 703 704 705 706 | nTokenEnd = nTokenOffset + eAct[1] if eAct[1] > 0 else nLastToken + eAct[1] dMultiToken = { "nTokenStart": nTokenStart, "nTokenEnd": nTokenEnd, "lTokens": self.lTokens[nTokenStart:nTokenEnd+1], "lMorph": sAction.split("|") if sAction else [":HM"] } | | | | | 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 | nTokenEnd = nTokenOffset + eAct[1] if eAct[1] > 0 else nLastToken + eAct[1] dMultiToken = { "nTokenStart": nTokenStart, "nTokenEnd": nTokenEnd, "lTokens": self.lTokens[nTokenStart:nTokenEnd+1], "lMorph": sAction.split("|") if sAction else [":HM"] } self.lTokens[nTokenStart]["nMultiStartTo"] = self.lTokens[nTokenEnd]["i"] self.lTokens[nTokenEnd]["nMultiEndFrom"] = self.lTokens[nTokenStart]["i"] self.lTokens[nTokenStart]["dMultiToken"] = dMultiToken self.lTokens[nTokenEnd]["dMultiToken"] = dMultiToken if bDebug: echo(" MULTI-TOKEN: ({}) [{}:{}]".format(sAction, self.lTokens[nTokenStart]["sValue"], self.lTokens[nTokenEnd]["sValue"])) #print(dMultiToken) else: echo("# error: unknown action at " + sLineId) elif cActionType == ">": if bDebug: echo(" COND_BREAK") break |
︙ | ︙ |