396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
|
for oNode in self.lSortedNodes:
byDic += oNode.convToBytes2(self.nBytesArc, self.nBytesNodeAddress)
elif nCompressionMethod == 3:
byDic = self.oRoot.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset)
for oNode in self.lSortedNodes:
byDic += oNode.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset)
return {
"sHeader": "/pyfsa/",
"sLangCode": self.sLangCode,
"sLangName": self.sLangName,
"sDicName": self.sDicName,
"sFileName": self.sFileName,
"sDate": self._getDate(),
"nEntry": self.nEntry,
"nChar": self.nChar,
|
|
|
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
|
for oNode in self.lSortedNodes:
byDic += oNode.convToBytes2(self.nBytesArc, self.nBytesNodeAddress)
elif nCompressionMethod == 3:
byDic = self.oRoot.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset)
for oNode in self.lSortedNodes:
byDic += oNode.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset)
return {
"sHeader": "/grammalecte-fsa/",
"sLangCode": self.sLangCode,
"sLangName": self.sLangName,
"sDicName": self.sDicName,
"sFileName": self.sFileName,
"sDate": self._getDate(),
"nEntry": self.nEntry,
"nChar": self.nChar,
|
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
|
def writeBinary (self, sPathFile, nCompressionMethod, bDebug=False):
"""
Format of the binary indexable dictionary:
Each section is separated with 4 bytes of \0
- Section Header:
/pyfsa/[compression method]
* compression method is an ASCII string
- Section Informations:
/[lang code]
/[lang name]
/[dictionary name]
/[date creation]
|
|
|
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
|
def writeBinary (self, sPathFile, nCompressionMethod, bDebug=False):
"""
Format of the binary indexable dictionary:
Each section is separated with 4 bytes of \0
- Section Header:
/grammalecte-fsa/[compression method]
* compression method is an ASCII string
- Section Informations:
/[lang code]
/[lang name]
/[dictionary name]
/[date creation]
|
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
|
See DawgNode.convToBytes() for details.
"""
self._calculateBinary(nCompressionMethod)
if not sPathFile.endswith(".bdic"):
sPathFile += "."+str(nCompressionMethod)+".bdic"
with open(sPathFile, 'wb') as hDst:
# header
hDst.write("/pyfsa/{}/".format(nCompressionMethod).encode("utf-8"))
hDst.write(b"\0\0\0\0")
# infos
sInfo = "{}//{}//{}//{}//{}//{}//{}//{}//{}//{}//{}//{}//".format(self.sLangCode, self.sLangName, self.sDicName, self._getDate(), \
self.nChar, self.nBytesArc, self.nBytesNodeAddress, \
self.nEntry, self.nNode, self.nArc, self.nAff, self.cStemming)
hDst.write(sInfo.encode("utf-8"))
hDst.write(b"\0\0\0\0")
|
|
|
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
|
See DawgNode.convToBytes() for details.
"""
self._calculateBinary(nCompressionMethod)
if not sPathFile.endswith(".bdic"):
sPathFile += "."+str(nCompressionMethod)+".bdic"
with open(sPathFile, 'wb') as hDst:
# header
hDst.write("/grammalecte-fsa/{}/".format(nCompressionMethod).encode("utf-8"))
hDst.write(b"\0\0\0\0")
# infos
sInfo = "{}//{}//{}//{}//{}//{}//{}//{}//{}//{}//{}//{}//".format(self.sLangCode, self.sLangName, self.sDicName, self._getDate(), \
self.nChar, self.nBytesArc, self.nBytesNodeAddress, \
self.nEntry, self.nNode, self.nArc, self.nAff, self.cStemming)
hDst.write(sInfo.encode("utf-8"))
hDst.write(b"\0\0\0\0")
|