75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
self.aSugg.clear()
self.dSugg.clear()
class IBDAWG:
"""INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""
def __init__ (self, sfDict):
self.by = pkgutil.get_data(__package__, "_dictionaries/" + sfDict)
if not self.by:
raise OSError("# Error. File not found or not loadable: "+sfDict)
if sfDict.endswith(".bdic"):
self._initBinary()
elif sfDict.endswith(".json"):
self._initJSON()
else:
raise OSError("# Error. Unknown file type: "+sfDict)
self.sFileName = sfDict
self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1
self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1)
self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2)
self._addrBitMask = 1 << ((self.nBytesArc * 8) - 3) # version 2
# function to decode the affix/suffix code
|
|
>
|
|
|
|
|
|
|
|
|
>
>
|
|
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
self.aSugg.clear()
self.dSugg.clear()
class IBDAWG:
"""INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""
def __init__ (self, source):
if type(source) is str:
self.by = pkgutil.get_data(__package__, "_dictionaries/" + source)
if not self.by:
raise OSError("# Error. File not found or not loadable: "+source)
if source.endswith(".bdic"):
self._initBinary()
elif source.endswith(".json"):
self._initJSON(json.loads(self.by.decode("utf-8"))) #json.loads(self.by) # In Python 3.6, can read directly binary strings
else:
raise OSError("# Error. Unknown file type: "+source)
else:
self._initJSON(source)
self.sFileName = source if type(source) is str else "[None]"
self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1
self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1)
self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2)
self._addrBitMask = 1 << ((self.nBytesArc * 8) - 3) # version 2
# function to decode the affix/suffix code
|
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
|
# <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
self.dChar = {}
for i in range(1, self.nChar+1):
self.dChar[self.lArcVal[i]] = i
self.dCharVal = { v: k for k, v in self.dChar.items() }
self.nBytesOffset = 1 # version 3
def _initJSON (self):
"initialize with a JSON text file"
self.__dict__.update(json.loads(self.by.decode("utf-8")))
#self.__dict__.update(json.loads(self.by)) # In Python 3.6, can read directly binary strings
self.byDic = binascii.unhexlify(self.sByDic)
def getInfo (self):
return " Language: {0.sLangName} Lang code: {0.sLangCode} Dictionary name: {0.sDicName}" \
" Compression method: {0.nCompressionMethod:>2} Date: {0.sDate} Stemming: {0.cStemming}FX\n" \
" Arcs values: {0.nArcVal:>10,} = {0.nChar:>5,} characters, {0.nAff:>6,} affixes, {0.nTag:>6,} tags\n" \
" Dictionary: {0.nEntry:>12,} entries, {0.nNode:>11,} nodes, {0.nArc:>11,} arcs\n" \
|
|
|
<
|
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
|
# <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
self.dChar = {}
for i in range(1, self.nChar+1):
self.dChar[self.lArcVal[i]] = i
self.dCharVal = { v: k for k, v in self.dChar.items() }
self.nBytesOffset = 1 # version 3
def _initJSON (self, oJSON):
"initialize with a JSON text file"
self.__dict__.update(oJSON)
self.byDic = binascii.unhexlify(self.sByDic)
def getInfo (self):
return " Language: {0.sLangName} Lang code: {0.sLangCode} Dictionary name: {0.sDicName}" \
" Compression method: {0.nCompressionMethod:>2} Date: {0.sDate} Stemming: {0.cStemming}FX\n" \
" Arcs values: {0.nArcVal:>10,} = {0.nChar:>5,} characters, {0.nAff:>6,} affixes, {0.nTag:>6,} tags\n" \
" Dictionary: {0.nEntry:>12,} entries, {0.nNode:>11,} nodes, {0.nArc:>11,} arcs\n" \
|