Grammalecte  Check-in [efd7bb0171]

Overview
Comment:[graphspell][py] ibdawg: init directly from an object (JSON)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | graphspell | multid
Files: files | file ages | folders
SHA3-256: efd7bb0171c2350d26fe3371251ea666afae0705b63fd7eab6dcc96510694490
User & Date: olr on 2018-02-28 10:34:28
Other Links: branch diff | manifest | tags
Context
2018-02-28
15:28
[graphspell][py] dawg: get JSON object instead of JSON string check-in: 1b8133065d user: olr tags: graphspell, multid
10:34
[graphspell][py] ibdawg: init directly from an object (JSON) check-in: efd7bb0171 user: olr tags: graphspell, multid
07:50
merge trunk check-in: a973e9aad8 user: olr tags: multid
Changes

Modified graphspell/ibdawg.py from [c41b426a86] to [145a26a0d0].

75
76
77
78
79
80
81
82
83
84
85





86
87
88
89
90
91
92








93
94

95
96
97
98
99
100
101
75
76
77
78
79
80
81




82
83
84
85
86
87






88
89
90
91
92
93
94
95
96

97
98
99
100
101
102
103
104







-
-
-
-
+
+
+
+
+

-
-
-
-
-
-
+
+
+
+
+
+
+
+

-
+







        self.aSugg.clear()
        self.dSugg.clear()


class IBDAWG:
    """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""

    def __init__ (self, sfDict):
        self.by = pkgutil.get_data(__package__, "_dictionaries/" + sfDict)
        if not self.by:
            raise OSError("# Error. File not found or not loadable: "+sfDict)
    def __init__ (self, source):
        if type(source) is str:
            self.by = pkgutil.get_data(__package__, "_dictionaries/" + source)
            if not self.by:
                raise OSError("# Error. File not found or not loadable: "+source)

        if sfDict.endswith(".bdic"):
            self._initBinary()
        elif sfDict.endswith(".json"):
            self._initJSON()
        else:
            raise OSError("# Error. Unknown file type: "+sfDict)
            if source.endswith(".bdic"):
                self._initBinary()
            elif source.endswith(".json"):
                self._initJSON(json.loads(self.by.decode("utf-8")))     #json.loads(self.by)    # In Python 3.6, can read directly binary strings
            else:
                raise OSError("# Error. Unknown file type: "+source)
        else:
            self._initJSON(source)

        self.sFileName = sfDict
        self.sFileName = source  if type(source) is str  else "[None]"

        self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1
        self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1)
        self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2)
        self._addrBitMask = 1 << ((self.nBytesArc * 8) - 3)  # version 2

        # function to decode the affix/suffix code
165
166
167
168
169
170
171
172

173
174

175
176
177
178
179
180
181
182
168
169
170
171
172
173
174

175
176

177

178
179
180
181
182
183
184







-
+

-
+
-







        # <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
        self.dChar = {}
        for i in range(1, self.nChar+1):
            self.dChar[self.lArcVal[i]] = i
        self.dCharVal = { v: k  for k, v in self.dChar.items() }
        self.nBytesOffset = 1 # version 3

    def _initJSON (self):
    def _initJSON (self, oJSON):
        "initialize with a JSON text file"
        self.__dict__.update(json.loads(self.by.decode("utf-8")))
        self.__dict__.update(oJSON)
        #self.__dict__.update(json.loads(self.by))                  # In Python 3.6, can read directly binary strings
        self.byDic = binascii.unhexlify(self.sByDic)

    def getInfo (self):
        return  "  Language: {0.sLangName}   Lang code: {0.sLangCode}   Dictionary name: {0.sDicName}" \
                "  Compression method: {0.nCompressionMethod:>2}   Date: {0.sDate}   Stemming: {0.cStemming}FX\n" \
                "  Arcs values:  {0.nArcVal:>10,} = {0.nChar:>5,} characters,  {0.nAff:>6,} affixes,  {0.nTag:>6,} tags\n" \
                "  Dictionary: {0.nEntry:>12,} entries,    {0.nNode:>11,} nodes,   {0.nArc:>11,} arcs\n" \