Index: graphspell-js/dawg.js ================================================================== --- graphspell-js/dawg.js +++ graphspell-js/dawg.js @@ -96,33 +96,27 @@ } lEntry.length = 0; // clear the array // Dictionary of arc values occurrency, to sort arcs of each node let lKeyVal = []; - for (let c of dChar.keys()) { lKeyVal.push([dChar[c], dCharOccur[c]]); } - for (let sAff of dAff.keys()) { lKeyVal.push([dAff[sAff]+nChar, dAffOccur[sAff]]); } - for (let sTag in dTag.keys()) { lKeyVal.push([dTag[sTag]+nChar+nAff, dTagOccur[sTag]]); } + for (let c of dChar.keys()) { lKeyVal.push([dChar.get(c), dCharOccur.get(c)]); } + for (let sAff of dAff.keys()) { lKeyVal.push([dAff.get(sAff)+nChar, dAffOccur.get(sAff)]); } + for (let sTag of dTag.keys()) { lKeyVal.push([dTag.get(sTag)+nChar+nAff, dTagOccur.get(sTag)]); } let dValOccur = new Map(lKeyVal); lKeyVal.length = 0; // clear the array - //with open(spfSrc[:-8]+".valuesfreq.txt", 'w', encoding='utf-8') as hFreqDst: # DEBUG - // for iKey, nOcc in sorted(dValOccur.entries(), key=lambda t: t[1], reverse=True): - // hFreqDst.write("{}: {}\n".format(lVal[iKey], nOcc)) - // hFreqDst.close() - this.sLang = sLangName; this.nEntry = lWord.length; this.aPreviousEntry = []; oNodeCounter.reset(); this.oRoot = new DawgNode(); this.lUncheckedNodes = []; // list of nodes that have not been checked for duplication. this.dMinimizedNodes = new Map(); // list of unique nodes that have been checked for duplication. - this.lSortedNodes = []; // version 2 and 3 this.nNode = 0; this.nArc = 0; this.dChar = dChar; - this.nChar = dChar.length; + this.nChar = dChar.size; this.nAff = nAff; this.lArcVal = lVal; this.nArcVal = lVal.length; this.nTag = this.nArcVal - this.nChar - nAff; this.cStemming = cStemming; @@ -140,22 +134,24 @@ xProgressBarNode.value = 0; xProgressBarNode.max = lWord.length; } let i = 0; for (let aEntry of lWord) { + console.log(aEntry); this.insert(aEntry); if (xProgressBarNode) { xProgressBarNode.value = i; i += 1; } } this.finish(); this.countNodes(); this.countArcs(); - this.sortNodes(); this.sortNodeArcs(dValOccur); this.displayInfo(); + this.writeInfo(); + //this.oRoot.display(0, this.lArcVal, true); } // BUILD DAWG insert (aEntry) { if (aEntry < this.aPreviousEntry) { @@ -196,11 +192,11 @@ this._minimize(0); } _minimize (nDownTo) { // proceed from the leaf up to a certain point - for (let i = this.lUncheckedNodes.length-1; i < nDownTo-1; i--) { + for (let i = this.lUncheckedNodes.length-1; i > nDownTo-1; i--) { let [oNode, char, oChildNode] = this.lUncheckedNodes[i]; if (this.dMinimizedNodes.has(oChildNode.__hash__())) { // replace the child with the previously encountered one oNode.arcs.set(char, this.dMinimizedNodes.get(oChildNode.__hash__())); } else { @@ -215,42 +211,23 @@ this.nNode = this.dMinimizedNodes.size; } countArcs () { this.nArc = 0; - for (let oNode of this.dMinimizedNodes) { + for (let oNode of this.dMinimizedNodes.values()) { this.nArc += oNode.arcs.size; } } sortNodeArcs (dValOccur) { console.log(" > Sort node arcs"); this.oRoot.sortArcs(dValOccur); - for (let oNode of this.dMinimizedNodes) { + for (let oNode of this.dMinimizedNodes.values()) { oNode.sortArcs(dValOccur); } } - sortNodes () { - console.log(" > Sort nodes"); - for (let oNode of this.oRoot.arcs.values()) { - this._parseNodes(oNode); - } - } - - _parseNodes (oNode) { - // Warning: recursive method - if (oNode.pos > 0) { - return; - } - //oNode.setPos(); // version 2 - this.lSortedNodes.push(oNode); - for (let oNextNode of oNode.arcs.values()) { - this._parseNodes(oNextNode); - } - } - lookup (sWord) { let oNode = this.oRoot; for (let c of sWord) { if (!oNode.arcs.has(this.dChar.gl_get(c, ''))) { return false; @@ -260,11 +237,11 @@ return oNode.final; } morph (sWord) { let oNode = this.oRoot; - for (let c in sWord) { + for (let c of sWord) { if (!oNode.arcs.has(this.dChar.get(c, ''))) { return ''; } oNode = oNode.arcs.get(this.dChar.get(c)); } @@ -271,11 +248,11 @@ if (oNode.final) { let s = "* "; for (let arc of oNode.arcs.keys()) { if (arc >= this.nChar) { s += " [" + this.funcStemming(sWord, this.lArcVal[arc]); - let oNode2 = oNode.arcs[arc] + let oNode2 = oNode.arcs.get(arc); for (let arc2 of oNode2.arcs.keys()) { s += " / " + this.lArcVal[arc2]; } s += "]"; } @@ -313,10 +290,11 @@ console.log(this.getArcStats()); console.log("\n * Values:\n"); let i = 0; for (let s of this.lArcVal) { console.log(i + ": " + s); + i++; } } // BINARY CONVERSION createBinary (sPathFile, nMethod) { @@ -443,22 +421,16 @@ constructor () { this.i = oNodeCounter.getId(); this.final = false; this.arcs = new Map(); // key: arc value; value: a node this.addr = 0; // address in the binary dictionary - this.pos = 0; // position in the binary dictionary (version 2) - this.size = 0; // size of node in bytes (version 3) } __str__ () { // Caution! this function is used for hashing and comparison! - let l = []; - if (this.final) { - l.push("1"); - } else { - l.push("0"); - } + let sFinalChar = (self.final) ? "1" : "0"; + let l = [sFinalChar]; for (let [key, node] of this.arcs.entries()) { l.push(key.toString()); l.push(node.i.toString()); } return l.join("_"); @@ -474,20 +446,33 @@ // Nodes are equivalent if they have identical arcs, and each identical arc leads to identical states. return this.__str__() == other.__str__(); } sortArcs (dValOccur) { - let lTemp = this.arcs.entries(); + let lTemp = Array.from(this.arcs.entries()); lTemp.sort(function (a, b) { if (dValOccur.get(a[0], 0) > dValOccur.get(b[0], 0)) return -1; if (dValOccur.get(a[0], 0) < dValOccur.get(b[0], 0)) return 1; return 0; }); this.arcs = new Map(lTemp); } + + display (nTab, lArcVal, bRecur=false) { + let sResult = " ".repeat(nTab) + "Node: " + this.i + " " + this.final + "\n"; + for (let arc of this.arcs.keys()) { + sResult += " ".repeat(nTab) + lArcVal[arc] + "\n"; + } + console.log(sResult); + if (bRecur) { + for (let oNode of this.arcs.values()) { + oNode.display(nTab+1, lArcVal, bRecur); + } + } + } // VERSION 1 ===================================================================================================== convToBytes1 (nBytesArc, nBytesNodeAddress) { /* Node scheme: @@ -574,11 +559,11 @@ } function displayCharOrder () { for (let [key, value] of _dCharOrder.entries()) { let s = "[" + key + "]: "; - let lTemp = value.entries(); + let lTemp = Array.from(value.entries()); lTemp.sort(function (a, b) { if (a[1] > b[1]) return -1; if (a[1] < b[1]) return 1;