Overview
| Comment: | [graphspell][py] ibdawg: remove binary dict support |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | graphspell | dict2 |
| Files: | files | file ages | folders |
| SHA3-256: |
866ec22f7d45c0d8a1688402451a3057 |
| User & Date: | olr on 2020-11-04 12:21:12 |
| Other Links: | branch diff | manifest | tags |
Context
|
2020-11-04
| ||
| 12:37 | [graphspell][py] remove duplicate method check-in: a3980c3ca4 user: olr tags: graphspell, dict2 | |
| 12:21 | [graphspell][py] ibdawg: remove binary dict support check-in: 866ec22f7d user: olr tags: graphspell, dict2 | |
| 12:02 | [graphspell] ibdawg: code cleaning, remove old code, useless compression versions check-in: 86250e8e6c user: olr tags: graphspell, dict2 | |
Changes
Modified graphspell/ibdawg.py from [2cf8f6a51c] to [0da0287637].
| ︙ | ︙ | |||
110 111 112 113 114 115 116 |
class IBDAWG:
"""INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""
def __init__ (self, source):
if isinstance(source, str):
| | | | < < < | | < | | > | > > > | 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
class IBDAWG:
"""INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""
def __init__ (self, source):
if isinstance(source, str):
by = pkgutil.get_data(__package__, "_dictionaries/" + source)
if not by:
raise OSError("# Error. File not found or not loadable: "+source)
self.sFileName = source
oData = json.loads(by.decode("utf-8")) #json.loads(by) # In Python 3.6, can read directly binary strings
else:
self.sFileName = "[None]"
oData = source
self.sByDic = "" # init to prevent pylint whining
self.__dict__.update(oData)
self.byDic = binascii.unhexlify(self.sByDic)
self.dCharVal = { v: k for k, v in self.dChar.items() }
self.a2grams = set(getattr(self, 'l2grams')) if hasattr(self, 'l2grams') else None
# Performance trick:
# Instead of converting bytes to integers each times we parse the binary dictionary,
# we do it once, then parse the array
nAcc = 0
byBuffer = b""
self.lByDic = []
|
| ︙ | ︙ | |||
166 167 168 169 170 171 172 |
# lexicographer module ?
self.lexicographer = None
try:
self.lexicographer = importlib.import_module(".lexgraph_"+self.sLangCode, "grammalecte.graphspell")
except ImportError:
print("# No module <graphspell.lexgraph_"+self.sLangCode+".py>")
| < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
# lexicographer module ?
self.lexicographer = None
try:
self.lexicographer = importlib.import_module(".lexgraph_"+self.sLangCode, "grammalecte.graphspell")
except ImportError:
print("# No module <graphspell.lexgraph_"+self.sLangCode+".py>")
def getInfo (self):
"return string about the IBDAWG"
return " Language: {0.sLangName} Lang code: {0.sLangCode} Dictionary name: {0.sDicName}" \
" Compression method: {0.nCompressionMethod:>2} Date: {0.sDate} Stemming: {0.cStemming}FX\n" \
" Arcs values: {0.nArcVal:>10,} = {0.nChar:>5,} characters, {0.nAff:>6,} affixes, {0.nTag:>6,} tags\n" \
" Dictionary: {0.nEntry:>12,} entries, {0.nNode:>11,} nodes, {0.nArc:>11,} arcs\n" \
" Address size: {0.nBytesNodeAddress:>1} bytes, Arc size: {0.nBytesArc:>1} bytes\n".format(self)
|
| ︙ | ︙ |