Index: graphspell-js/dawg.js ================================================================== --- graphspell-js/dawg.js +++ graphspell-js/dawg.js @@ -342,20 +342,16 @@ } } } // BINARY CONVERSION - createBinaryJSON (nCompressionMethod) { - console.log("Write DAWG as an indexable binary dictionary [method: "+nCompressionMethod+"]"); - if (nCompressionMethod == 1) { - this.nBytesArc = Math.floor( (this.nArcVal.toString(2).length + 2) / 8 ) + 1; // We add 2 bits. See DawgNode.convToBytes1() - this.nBytesOffset = 0; - this._calcNumBytesNodeAddress(); - this._calcNodesAddress1(); - } else { - console.log("Error: unknown compression method"); - } + createBinaryJSON (nCompressionMethod=1) { + console.log("Write DAWG as an indexable binary dictionary"); + this.nBytesArc = Math.floor( (this.nArcVal.toString(2).length + 2) / 8 ) + 1; // We add 2 bits. See DawgNode.convToBytes() + this.nBytesOffset = 0; + this._calcNumBytesNodeAddress(); + this._calcNodesAddress(); console.log("Arc values (chars, affixes and tags): " + this.nArcVal); console.log("Arc size: "+this.nBytesArc+" bytes, Address size: "+this.nBytesNodeAddress+" bytes"); console.log("-> " + this.nBytesArc+this.nBytesNodeAddress + " * " + this.nArc + " = " + (this.nBytesArc+this.nBytesNodeAddress)*this.nArc + " bytes"); return this._createJSON(nCompressionMethod); } @@ -366,26 +362,23 @@ while (((this.nBytesArc + this.nBytesNodeAddress) * this.nArc) > (2 ** (this.nBytesNodeAddress * 8))) { this.nBytesNodeAddress += 1; } } - _calcNodesAddress1 () { + _calcNodesAddress () { let nBytesNode = this.nBytesArc + this.nBytesNodeAddress; let iAddr = this.oRoot.arcs.size * nBytesNode; for (let oNode of this.dMinimizedNodes.values()) { oNode.addr = iAddr; iAddr += Math.max(oNode.arcs.size, 1) * nBytesNode; } } - _createJSON (nCompressionMethod) { - let sByDic = ""; - if (nCompressionMethod == 1) { - sByDic = this.oRoot.convToBytes1(this.nBytesArc, this.nBytesNodeAddress); - for (let oNode of this.dMinimizedNodes.values()) { - sByDic += oNode.convToBytes1(this.nBytesArc, this.nBytesNodeAddress); - } + _createJSON (nCompressionMethod=1) { + let sByDic = this.oRoot.convToBytes(this.nBytesArc, this.nBytesNodeAddress); + for (let oNode of this.dMinimizedNodes.values()) { + sByDic += oNode.convToBytes(this.nBytesArc, this.nBytesNodeAddress); } let oJSON = { "sHeader": "/grammalecte-fsa/", "sLangCode": this.sLangCode, "sLangName": this.sLangName, @@ -494,11 +487,11 @@ } } } // VERSION 1 ===================================================================================================== - convToBytes1 (nBytesArc, nBytesNodeAddress) { + convToBytes (nBytesArc, nBytesNodeAddress) { /* Node scheme: - Arc length is defined by nBytesArc - Address length is defined by nBytesNodeAddress Index: graphspell/dawg.py ================================================================== --- graphspell/dawg.py +++ graphspell/dawg.py @@ -356,30 +356,16 @@ if not zPattern or zPattern.search(self.lArcVal[nMorphVal]): yield sEntry + "\t" + self.lArcVal[nMorphVal] # BINARY CONVERSION - def _calculateBinary (self, nCompressionMethod): - print(" > Write DAWG as an indexable binary dictionary [method: %d]" % nCompressionMethod) - if nCompressionMethod == 1: - self.nBytesArc = ( (self.nArcVal.bit_length() + 2) // 8 ) + 1 # We add 2 bits. See DawgNode.convToBytes1() - self.nBytesOffset = 0 - self._calcNumBytesNodeAddress() - self._calcNodesAddress1() - elif nCompressionMethod == 2: - self.nBytesArc = ( (self.nArcVal.bit_length() + 3) // 8 ) + 1 # We add 3 bits. See DawgNode.convToBytes2() - self.nBytesOffset = 0 - self._calcNumBytesNodeAddress() - self._calcNodesAddress2() - elif nCompressionMethod == 3: - self.nBytesArc = ( (self.nArcVal.bit_length() + 3) // 8 ) + 1 # We add 3 bits. See DawgNode.convToBytes3() - self.nBytesOffset = 1 - self.nMaxOffset = (2 ** (self.nBytesOffset * 8)) - 1 - self._calcNumBytesNodeAddress() - self._calcNodesAddress3() - else: - print(" # Error: unknown compression method") + def _calculateBinary (self, nCompressionMethod=1): + print(" > Write DAWG as an indexable binary dictionary") + self.nBytesArc = ( (self.nArcVal.bit_length() + 2) // 8 ) + 1 # We add 2 bits. See DawgNode.convToBytes() + self.nBytesOffset = 0 + self._calcNumBytesNodeAddress() + self._calcNodesAddress() print(" Arc values (chars, affixes and tags): {} -> {} bytes".format( self.nArcVal, len("\t".join(self.lArcVal).encode("utf-8")) )) print(" Arc size: {} bytes, Address size: {} bytes -> {} * {} = {} bytes".format( self.nBytesArc, self.nBytesNodeAddress, \ self.nBytesArc+self.nBytesNodeAddress, self.nArc, \ (self.nBytesArc+self.nBytesNodeAddress)*self.nArc )) @@ -387,70 +373,24 @@ "how many bytes needed to store all nodes/arcs in the binary dictionary" self.nBytesNodeAddress = 1 while ((self.nBytesArc + self.nBytesNodeAddress) * self.nArc) > (2 ** (self.nBytesNodeAddress * 8)): self.nBytesNodeAddress += 1 - def _calcNodesAddress1 (self): + def _calcNodesAddress (self): nBytesNode = self.nBytesArc + self.nBytesNodeAddress iAddr = len(self.oRoot.arcs) * nBytesNode for oNode in self.lMinimizedNodes: oNode.addr = iAddr iAddr += max(len(oNode.arcs), 1) * nBytesNode - def _calcNodesAddress2 (self): - nBytesNode = self.nBytesArc + self.nBytesNodeAddress - iAddr = len(self.oRoot.arcs) * nBytesNode - for oNode in self.lSortedNodes: - oNode.addr = iAddr - iAddr += max(len(oNode.arcs), 1) * nBytesNode - for oNextNode in oNode.arcs.values(): - if (oNode.pos + 1) == oNextNode.pos: - iAddr -= self.nBytesNodeAddress - #break - - def _calcNodesAddress3 (self): - nBytesNode = self.nBytesArc + self.nBytesNodeAddress - # theorical nodes size if only addresses and no offset - self.oRoot.size = len(self.oRoot.arcs) * nBytesNode - for oNode in self.lSortedNodes: - oNode.size = max(len(oNode.arcs), 1) * nBytesNode - # rewind and calculate dropdown from the end, several times - nDiff = self.nBytesNodeAddress - self.nBytesOffset - bEnd = False - while not bEnd: - bEnd = True - # recalculate addresses - iAddr = self.oRoot.size - for oNode in self.lSortedNodes: - oNode.addr = iAddr - iAddr += oNode.size - # rewind and calculate dropdown from the end, several times - for i in range(self.nNode-1, -1, -1): - nSize = max(len(self.lSortedNodes[i].arcs), 1) * nBytesNode - for oNextNode in self.lSortedNodes[i].arcs.values(): - if 1 < (oNextNode.addr - self.lSortedNodes[i].addr) < self.nMaxOffset: - nSize -= nDiff - if self.lSortedNodes[i].size != nSize: - self.lSortedNodes[i].size = nSize - bEnd = False - def getBinaryAsJSON (self, nCompressionMethod=1, bBinaryDictAsHexString=True): "return a JSON string containing all necessary data of the dictionary (compressed as a binary string)" self._calculateBinary(nCompressionMethod) byDic = b"" - if nCompressionMethod == 1: - byDic = self.oRoot.convToBytes1(self.nBytesArc, self.nBytesNodeAddress) - for oNode in self.lMinimizedNodes: - byDic += oNode.convToBytes1(self.nBytesArc, self.nBytesNodeAddress) - elif nCompressionMethod == 2: - byDic = self.oRoot.convToBytes2(self.nBytesArc, self.nBytesNodeAddress) - for oNode in self.lSortedNodes: - byDic += oNode.convToBytes2(self.nBytesArc, self.nBytesNodeAddress) - elif nCompressionMethod == 3: - byDic = self.oRoot.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset) - for oNode in self.lSortedNodes: - byDic += oNode.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset) + byDic = self.oRoot.convToBytes(self.nBytesArc, self.nBytesNodeAddress) + for oNode in self.lMinimizedNodes: + byDic += oNode.convToBytes(self.nBytesArc, self.nBytesNodeAddress) return { "sHeader": "/grammalecte-fsa/", "sLangCode": self.sLangCode, "sLangName": self.sLangName, "sDicName": self.sDicName, @@ -487,106 +427,21 @@ hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ') hDst.write( json.dumps(self.getBinaryAsJSON(nCompressionMethod, bBinaryDictAsHexString), ensure_ascii=False) ) if bInJSModule: hDst.write(";\n\nexports.dictionary = dictionary;\n") - def writeBinary (self, sPathFile, nCompressionMethod, bDebug=False): - """ - Save as a binary file. - - Format of the binary indexable dictionary: - Each section is separated with 4 bytes of \0 - - - Section Header: - /grammalecte-fsa/[compression method] - * compression method is an ASCII string - - - Section Informations: - /[lang code] - /[lang name] - /[dictionary name] - /[date creation] - /[number of chars] - /[number of bytes for each arc] - /[number of bytes for each address node] - /[number of entries] - /[number of nodes] - /[number of arcs] - /[number of affixes] - * each field is a ASCII string - /[stemming code] - * "S" means stems are generated by /suffix_code/, - "A" means they are generated by /affix_code/ - See defineSuffixCode() and defineAffixCode() for details. - "N" means no stemming - - - Section Values: - * a list of strings encoded in binary from utf-8, each value separated with a tabulation - - - Section Word Graph (nodes / arcs) - * A list of nodes which are a list of arcs with an address of the next node. - See DawgNode.convToBytes() for details. - - - Section 2grams: - * A list of 2grams (as strings: 2 chars) encoded in binary from utf-8, each value separated with a tabulation - """ - self._calculateBinary(nCompressionMethod) - if not sPathFile.endswith(".bdic"): - sPathFile += "."+str(nCompressionMethod)+".bdic" - with open(sPathFile, 'wb') as hDst: - # header - hDst.write("/grammalecte-fsa/{}/".format(nCompressionMethod).encode("utf-8")) - hDst.write(b"\0\0\0\0") - # infos - sInfo = "{}//{}//{}//{}//{}//{}//{}//{}//{}//{}//{}//{}//{}".format(self.sLangCode, self.sLangName, self.sDicName, self.sDescription, self._getDate(), \ - self.nChar, self.nBytesArc, self.nBytesNodeAddress, \ - self.nEntry, self.nNode, self.nArc, self.nAff, self.cStemming) - hDst.write(sInfo.encode("utf-8")) - hDst.write(b"\0\0\0\0") - # lArcVal - hDst.write("\t".join(self.lArcVal).encode("utf-8")) - hDst.write(b"\0\0\0\0") - # 2grams - hDst.write("\t".join(self.a2grams).encode("utf-8")) - hDst.write(b"\0\0\0\0") - # DAWG: nodes / arcs - if nCompressionMethod == 1: - hDst.write(self.oRoot.convToBytes1(self.nBytesArc, self.nBytesNodeAddress)) - for oNode in self.lMinimizedNodes: - hDst.write(oNode.convToBytes1(self.nBytesArc, self.nBytesNodeAddress)) - elif nCompressionMethod == 2: - hDst.write(self.oRoot.convToBytes2(self.nBytesArc, self.nBytesNodeAddress)) - for oNode in self.lSortedNodes: - hDst.write(oNode.convToBytes2(self.nBytesArc, self.nBytesNodeAddress)) - elif nCompressionMethod == 3: - hDst.write(self.oRoot.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset)) - for oNode in self.lSortedNodes: - hDst.write(oNode.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset)) - if bDebug: - self._writeNodes(sPathFile, nCompressionMethod) - def _getDate (self): return time.strftime("%Y-%m-%d %H:%M:%S") - def _writeNodes (self, sPathFile, nCompressionMethod): + def _writeNodes (self, sPathFile, nCompressionMethod=1): "for debugging only" print(" > Write nodes") with open(sPathFile+".nodes."+str(nCompressionMethod)+".txt", 'w', encoding='utf-8', newline="\n") as hDst: - if nCompressionMethod == 1: - hDst.write(self.oRoot.getTxtRepr1(self.nBytesArc, self.lArcVal)+"\n") - #hDst.write( ''.join( [ "%02X " % z for z in self.oRoot.convToBytes1(self.nBytesArc, self.nBytesNodeAddress) ] ).strip() ) - for oNode in self.lMinimizedNodes: - hDst.write(oNode.getTxtRepr1(self.nBytesArc, self.lArcVal)+"\n") - if nCompressionMethod == 2: - hDst.write(self.oRoot.getTxtRepr2(self.nBytesArc, self.lArcVal)+"\n") - for oNode in self.lSortedNodes: - hDst.write(oNode.getTxtRepr2(self.nBytesArc, self.lArcVal)+"\n") - if nCompressionMethod == 3: - hDst.write(self.oRoot.getTxtRepr3(self.nBytesArc, self.nBytesOffset, self.lArcVal)+"\n") - #hDst.write( ''.join( [ "%02X " % z for z in self.oRoot.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset) ] ).strip() ) - for oNode in self.lSortedNodes: - hDst.write(oNode.getTxtRepr3(self.nBytesArc, self.nBytesOffset, self.lArcVal)+"\n") + hDst.write(self.oRoot.getTxtRepr(self.nBytesArc, self.lArcVal)+"\n") + #hDst.write( ''.join( [ "%02X " % z for z in self.oRoot.convToBytes(self.nBytesArc, self.nBytesNodeAddress) ] ).strip() ) + for oNode in self.lMinimizedNodes: + hDst.write(oNode.getTxtRepr(self.nBytesArc, self.lArcVal)+"\n") class DawgNode: """Node of the word graph""" @@ -646,11 +501,11 @@ def sortArcs2 (self, dValOccur, lArcVal): "sort arcs of each node depending on the previous char" self.arcs = collections.OrderedDict(sorted(self.arcs.items(), key=lambda t: dValOccur.get(lArcVal[t[0]], 0), reverse=True)) # VERSION 1 ===================================================================================================== - def convToBytes1 (self, nBytesArc, nBytesNodeAddress): + def convToBytes (self, nBytesArc, nBytesNodeAddress): """ Convert to bytes (method 1). Node scheme: - Arc length is defined by nBytesArc @@ -688,11 +543,11 @@ val = val | nFinalArcMask by += val.to_bytes(nBytesArc, byteorder='big') by += self.arcs[arc].addr.to_bytes(nBytesNodeAddress, byteorder='big') return by - def getTxtRepr1 (self, nBytesArc, lVal): + def getTxtRepr (self, nBytesArc, lVal): "return representation as string of node (method 1)" nArc = len(self.arcs) nFinalNodeMask = 1 << ((nBytesArc*8)-1) nFinalArcMask = 1 << ((nBytesArc*8)-2) s = "i{:_>10} -- #{:_>10}\n".format(self.i, self.addr) @@ -705,161 +560,10 @@ val = val | nFinalNodeMask if i == nArc: val = val | nFinalArcMask s += " {:<20} {:0>16} i{:_>10} #{:_>10}\n".format(lVal[arc], bin(val)[2:], self.arcs[arc].i, self.arcs[arc].addr) return s - - # VERSION 2 ===================================================================================================== - def convToBytes2 (self, nBytesArc, nBytesNodeAddress): - """ - Convert to bytes (method 2). - - Node scheme: - - Arc length is defined by nBytesArc - - Address length is defined by nBytesNodeAddress - - | Arc | Address of next node | - | | | - ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ - ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ - ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ - [...] - ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ - ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ - ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ - ^ ^ ^ - ┃ ┃ ┃ - ┃ ┃ ┗━━ if 1, caution, no address: next node is the following node - ┃ ┗━━━━ if 1, last arc of this node - ┗━━━━━━ if 1, this node is final (only on the first arc) - """ - nArc = len(self.arcs) - nFinalNodeMask = 1 << ((nBytesArc*8)-1) - nFinalArcMask = 1 << ((nBytesArc*8)-2) - nNextNodeMask = 1 << ((nBytesArc*8)-3) - if not nArc: - val = nFinalNodeMask | nFinalArcMask - by = val.to_bytes(nBytesArc, byteorder='big') - by += (0).to_bytes(nBytesNodeAddress, byteorder='big') - return by - by = b"" - for i, arc in enumerate(self.arcs, 1): - val = arc - if i == 1 and self.final: - val = val | nFinalNodeMask - if i == nArc: - val = val | nFinalArcMask - if (self.pos + 1) == self.arcs[arc].pos and self.i != 0: - val = val | nNextNodeMask - by += val.to_bytes(nBytesArc, byteorder='big') - else: - by += val.to_bytes(nBytesArc, byteorder='big') - by += self.arcs[arc].addr.to_bytes(nBytesNodeAddress, byteorder='big') - return by - - def getTxtRepr2 (self, nBytesArc, lVal): - "return representation as string of node (method 2)" - nArc = len(self.arcs) - nFinalNodeMask = 1 << ((nBytesArc*8)-1) - nFinalArcMask = 1 << ((nBytesArc*8)-2) - nNextNodeMask = 1 << ((nBytesArc*8)-3) - s = "i{:_>10} -- #{:_>10}\n".format(self.i, self.addr) - if not nArc: - s += " {:<20} {:0>16} i{:_>10} #{:_>10}\n".format("", bin(nFinalNodeMask | nFinalArcMask)[2:], "0", "0") - return s - for i, arc in enumerate(self.arcs, 1): - val = arc - if i == 1 and self.final: - val = val | nFinalNodeMask - if i == nArc: - val = val | nFinalArcMask - if (self.pos + 1) == self.arcs[arc].pos and self.i != 0: - val = val | nNextNodeMask - s += " {:<20} {:0>16}\n".format(lVal[arc], bin(val)[2:]) - else: - s += " {:<20} {:0>16} i{:_>10} #{:_>10}\n".format(lVal[arc], bin(val)[2:], self.arcs[arc].i, self.arcs[arc].addr) - return s - - # VERSION 3 ===================================================================================================== - def convToBytes3 (self, nBytesArc, nBytesNodeAddress, nBytesOffset): - """ - Convert to bytes (method 3). - - Node scheme: - - Arc length is defined by nBytesArc - - Address length is defined by nBytesNodeAddress - - Offset length is defined by nBytesOffset - - | Arc | Address of next node or offset to next node | - | | | - ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ - ┃1┃0┃0┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ - ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ - [...] - ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ - ┃0┃0┃1┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ Offsets are shorter than addresses - ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ - ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ - ┃0┃1┃0┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ - ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ - - ^ ^ ^ - ┃ ┃ ┃ - ┃ ┃ ┗━━ if 1, offset instead of address of next node - ┃ ┗━━━━ if 1, last arc of this node - ┗━━━━━━ if 1, this node is final (only on the first arc) - """ - nArc = len(self.arcs) - nFinalNodeMask = 1 << ((nBytesArc*8)-1) - nFinalArcMask = 1 << ((nBytesArc*8)-2) - nNextNodeMask = 1 << ((nBytesArc*8)-3) - nMaxOffset = (2 ** (nBytesOffset * 8)) - 1 - if not nArc: - val = nFinalNodeMask | nFinalArcMask - by = val.to_bytes(nBytesArc, byteorder='big') - by += (0).to_bytes(nBytesNodeAddress, byteorder='big') - return by - by = b"" - for i, arc in enumerate(self.arcs, 1): - val = arc - if i == 1 and self.final: - val = val | nFinalNodeMask - if i == nArc: - val = val | nFinalArcMask - if 1 < (self.arcs[arc].addr - self.addr) < nMaxOffset and self.i != 0: - val = val | nNextNodeMask - by += val.to_bytes(nBytesArc, byteorder='big') - by += (self.arcs[arc].addr-self.addr).to_bytes(nBytesOffset, byteorder='big') - else: - by += val.to_bytes(nBytesArc, byteorder='big') - by += self.arcs[arc].addr.to_bytes(nBytesNodeAddress, byteorder='big') - return by - - def getTxtRepr3 (self, nBytesArc, nBytesOffset, lVal): - "return representation as string of node (method 3)" - nArc = len(self.arcs) - nFinalNodeMask = 1 << ((nBytesArc*8)-1) - nFinalArcMask = 1 << ((nBytesArc*8)-2) - nNextNodeMask = 1 << ((nBytesArc*8)-3) - nMaxOffset = (2 ** (nBytesOffset * 8)) - 1 - s = "i{:_>10} -- #{:_>10} ({})\n".format(self.i, self.addr, self.size) - if not nArc: - s += " {:<20} {:0>16} i{:_>10} #{:_>10}\n".format("", bin(nFinalNodeMask | nFinalArcMask)[2:], "0", "0") - return s - for i, arc in enumerate(self.arcs, 1): - val = arc - if i == 1 and self.final: - val = val | nFinalNodeMask - if i == nArc: - val = val | nFinalArcMask - if 1 < (self.arcs[arc].addr - self.addr) < nMaxOffset and self.i != 0: - val = val | nNextNodeMask - s += " {:<20} {:0>16} i{:_>10} +{:_>10}\n".format(lVal[arc], bin(val)[2:], self.arcs[arc].i, self.arcs[arc].addr - self.addr) - else: - s += " {:<20} {:0>16} i{:_>10} #{:_>10}\n".format(lVal[arc], bin(val)[2:], self.arcs[arc].i, self.arcs[arc].addr) - return s - # Another attempt to sort node arcs _dCharOrder = {