Overview
| Comment: | [graphspell][py] ibdawg: init directly from an object (JSON) |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | graphspell | multid |
| Files: | files | file ages | folders |
| SHA3-256: |
efd7bb0171c2350d26fe3371251ea666 |
| User & Date: | olr on 2018-02-28 10:34:28 |
| Other Links: | branch diff | manifest | tags |
Context
|
2018-02-28
| ||
| 15:28 | [graphspell][py] dawg: get JSON object instead of JSON string check-in: 1b8133065d user: olr tags: graphspell, multid | |
| 10:34 | [graphspell][py] ibdawg: init directly from an object (JSON) check-in: efd7bb0171 user: olr tags: graphspell, multid | |
| 07:50 | merge trunk check-in: a973e9aad8 user: olr tags: multid | |
Changes
Modified graphspell/ibdawg.py from [c41b426a86] to [145a26a0d0].
| ︙ | ︙ | |||
75 76 77 78 79 80 81 |
self.aSugg.clear()
self.dSugg.clear()
class IBDAWG:
"""INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""
| | > | | | | | | | | | > > | | 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
self.aSugg.clear()
self.dSugg.clear()
class IBDAWG:
"""INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""
def __init__ (self, source):
if type(source) is str:
self.by = pkgutil.get_data(__package__, "_dictionaries/" + source)
if not self.by:
raise OSError("# Error. File not found or not loadable: "+source)
if source.endswith(".bdic"):
self._initBinary()
elif source.endswith(".json"):
self._initJSON(json.loads(self.by.decode("utf-8"))) #json.loads(self.by) # In Python 3.6, can read directly binary strings
else:
raise OSError("# Error. Unknown file type: "+source)
else:
self._initJSON(source)
self.sFileName = source if type(source) is str else "[None]"
self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1
self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1)
self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2)
self._addrBitMask = 1 << ((self.nBytesArc * 8) - 3) # version 2
# function to decode the affix/suffix code
|
| ︙ | ︙ | |||
165 166 167 168 169 170 171 |
# <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
self.dChar = {}
for i in range(1, self.nChar+1):
self.dChar[self.lArcVal[i]] = i
self.dCharVal = { v: k for k, v in self.dChar.items() }
self.nBytesOffset = 1 # version 3
| | | < | 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 |
# <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
self.dChar = {}
for i in range(1, self.nChar+1):
self.dChar[self.lArcVal[i]] = i
self.dCharVal = { v: k for k, v in self.dChar.items() }
self.nBytesOffset = 1 # version 3
def _initJSON (self, oJSON):
"initialize with a JSON text file"
self.__dict__.update(oJSON)
self.byDic = binascii.unhexlify(self.sByDic)
def getInfo (self):
return " Language: {0.sLangName} Lang code: {0.sLangCode} Dictionary name: {0.sDicName}" \
" Compression method: {0.nCompressionMethod:>2} Date: {0.sDate} Stemming: {0.cStemming}FX\n" \
" Arcs values: {0.nArcVal:>10,} = {0.nChar:>5,} characters, {0.nAff:>6,} affixes, {0.nTag:>6,} tags\n" \
" Dictionary: {0.nEntry:>12,} entries, {0.nNode:>11,} nodes, {0.nArc:>11,} arcs\n" \
|
| ︙ | ︙ |