Grammalecte  Diff

Differences From Artifact [729715ac89]:

To Artifact [e9390e8710]:


161
162
163
164
165
166
167




168
169
170
171
172
173
174
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178







+
+
+
+








        # calculated later
        self.nBytesNodeAddress = 1
        self.nBytesArc = 0
        self.nBytesOffset = 0
        self.nMaxOffset = 0

        # binary dictionary
        self.byDic = b""
        self.lByDic = []

        # build
        lWord.sort()
        oProgBar = ProgressBar(0, len(lWord))
        for aEntry in lWord:
            self.insert(aEntry)
            oProgBar.increment(1)
        oProgBar.done()
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
364
365
366
367
368
369
370

371
372
373
374
375
376
377







-







    # BINARY CONVERSION
    def _calculateBinary (self):
        print(" > Write DAWG as an indexable binary dictionary")
        self.nBytesArc = ( (self.nArcVal.bit_length() + 2) // 8 ) + 1   # We add 2 bits. See DawgNode.convToBytes()
        self.nBytesOffset = 0
        self._calcNumBytesNodeAddress()
        self._calcNodesAddress()
        self.byDic = b""
        self.byDic = self.oRoot.convToBytes(self.nBytesArc, self.nBytesNodeAddress)
        for oNode in self.lMinimizedNodes:
            self.byDic += oNode.convToBytes(self.nBytesArc, self.nBytesNodeAddress)
        print("   Arc values (chars, affixes and tags): {}  ->  {} bytes".format( self.nArcVal, len("\t".join(self.lArcVal).encode("utf-8")) ))
        print("   Arc size: {} bytes, Address size: {} bytes   ->   {} * {} = {} bytes".format( self.nBytesArc, self.nBytesNodeAddress, \
                                                                                                self.nBytesArc+self.nBytesNodeAddress, self.nArc, \
                                                                                                (self.nBytesArc+self.nBytesNodeAddress)*self.nArc ))