161
162
163
164
165
166
167
168
169
170
171
172
173
174
|
# calculated later
self.nBytesNodeAddress = 1
self.nBytesArc = 0
self.nBytesOffset = 0
self.nMaxOffset = 0
# build
lWord.sort()
oProgBar = ProgressBar(0, len(lWord))
for aEntry in lWord:
self.insert(aEntry)
oProgBar.increment(1)
oProgBar.done()
|
>
>
>
>
|
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
|
# calculated later
self.nBytesNodeAddress = 1
self.nBytesArc = 0
self.nBytesOffset = 0
self.nMaxOffset = 0
# binary dictionary
self.byDic = b""
self.lByDic = []
# build
lWord.sort()
oProgBar = ProgressBar(0, len(lWord))
for aEntry in lWord:
self.insert(aEntry)
oProgBar.increment(1)
oProgBar.done()
|
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
|
# BINARY CONVERSION
def _calculateBinary (self):
print(" > Write DAWG as an indexable binary dictionary")
self.nBytesArc = ( (self.nArcVal.bit_length() + 2) // 8 ) + 1 # We add 2 bits. See DawgNode.convToBytes()
self.nBytesOffset = 0
self._calcNumBytesNodeAddress()
self._calcNodesAddress()
self.byDic = b""
self.byDic = self.oRoot.convToBytes(self.nBytesArc, self.nBytesNodeAddress)
for oNode in self.lMinimizedNodes:
self.byDic += oNode.convToBytes(self.nBytesArc, self.nBytesNodeAddress)
print(" Arc values (chars, affixes and tags): {} -> {} bytes".format( self.nArcVal, len("\t".join(self.lArcVal).encode("utf-8")) ))
print(" Arc size: {} bytes, Address size: {} bytes -> {} * {} = {} bytes".format( self.nBytesArc, self.nBytesNodeAddress, \
self.nBytesArc+self.nBytesNodeAddress, self.nArc, \
(self.nBytesArc+self.nBytesNodeAddress)*self.nArc ))
|
<
|
364
365
366
367
368
369
370
371
372
373
374
375
376
377
|
# BINARY CONVERSION
def _calculateBinary (self):
print(" > Write DAWG as an indexable binary dictionary")
self.nBytesArc = ( (self.nArcVal.bit_length() + 2) // 8 ) + 1 # We add 2 bits. See DawgNode.convToBytes()
self.nBytesOffset = 0
self._calcNumBytesNodeAddress()
self._calcNodesAddress()
self.byDic = self.oRoot.convToBytes(self.nBytesArc, self.nBytesNodeAddress)
for oNode in self.lMinimizedNodes:
self.byDic += oNode.convToBytes(self.nBytesArc, self.nBytesNodeAddress)
print(" Arc values (chars, affixes and tags): {} -> {} bytes".format( self.nArcVal, len("\t".join(self.lArcVal).encode("utf-8")) ))
print(" Arc size: {} bytes, Address size: {} bytes -> {} * {} = {} bytes".format( self.nBytesArc, self.nBytesNodeAddress, \
self.nBytesArc+self.nBytesNodeAddress, self.nArc, \
(self.nBytesArc+self.nBytesNodeAddress)*self.nArc ))
|