Overview
Comment: | [graphspell][py] ibdawg: remove binary dict support |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | graphspell | dict2 |
Files: | files | file ages | folders |
SHA3-256: |
866ec22f7d45c0d8a1688402451a3057 |
User & Date: | olr on 2020-11-04 12:21:12 |
Other Links: | branch diff | manifest | tags |
Context
2020-11-04
| ||
12:37 | [graphspell][py] remove duplicate method check-in: a3980c3ca4 user: olr tags: graphspell, dict2 | |
12:21 | [graphspell][py] ibdawg: remove binary dict support check-in: 866ec22f7d user: olr tags: graphspell, dict2 | |
12:02 | [graphspell] ibdawg: code cleaning, remove old code, useless compression versions check-in: 86250e8e6c user: olr tags: graphspell, dict2 | |
Changes
Modified graphspell/ibdawg.py from [2cf8f6a51c] to [0da0287637].
︙ | ︙ | |||
110 111 112 113 114 115 116 | class IBDAWG: """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH""" def __init__ (self, source): if isinstance(source, str): | | | | < < < | | < | | > | > > > | 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | class IBDAWG: """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH""" def __init__ (self, source): if isinstance(source, str): by = pkgutil.get_data(__package__, "_dictionaries/" + source) if not by: raise OSError("# Error. File not found or not loadable: "+source) self.sFileName = source oData = json.loads(by.decode("utf-8")) #json.loads(by) # In Python 3.6, can read directly binary strings else: self.sFileName = "[None]" oData = source self.sByDic = "" # init to prevent pylint whining self.__dict__.update(oData) self.byDic = binascii.unhexlify(self.sByDic) self.dCharVal = { v: k for k, v in self.dChar.items() } self.a2grams = set(getattr(self, 'l2grams')) if hasattr(self, 'l2grams') else None # Performance trick: # Instead of converting bytes to integers each times we parse the binary dictionary, # we do it once, then parse the array nAcc = 0 byBuffer = b"" self.lByDic = [] |
︙ | ︙ | |||
166 167 168 169 170 171 172 | # lexicographer module ? self.lexicographer = None try: self.lexicographer = importlib.import_module(".lexgraph_"+self.sLangCode, "grammalecte.graphspell") except ImportError: print("# No module <graphspell.lexgraph_"+self.sLangCode+".py>") | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 166 167 168 169 170 171 172 173 174 175 176 177 178 179 | # lexicographer module ? self.lexicographer = None try: self.lexicographer = importlib.import_module(".lexgraph_"+self.sLangCode, "grammalecte.graphspell") except ImportError: print("# No module <graphspell.lexgraph_"+self.sLangCode+".py>") def getInfo (self): "return string about the IBDAWG" return " Language: {0.sLangName} Lang code: {0.sLangCode} Dictionary name: {0.sDicName}" \ " Compression method: {0.nCompressionMethod:>2} Date: {0.sDate} Stemming: {0.cStemming}FX\n" \ " Arcs values: {0.nArcVal:>10,} = {0.nChar:>5,} characters, {0.nAff:>6,} affixes, {0.nTag:>6,} tags\n" \ " Dictionary: {0.nEntry:>12,} entries, {0.nNode:>11,} nodes, {0.nArc:>11,} arcs\n" \ " Address size: {0.nBytesNodeAddress:>1} bytes, Arc size: {0.nBytesArc:>1} bytes\n".format(self) |
︙ | ︙ |