Grammalecte  Diff

Differences From Artifact [c41b426a86]:

To Artifact [145a26a0d0]:


75
76
77
78
79
80
81
82
83
84
85





86
87
88
89
90
91
92








93
94

95
96
97
98
99
100
101
75
76
77
78
79
80
81




82
83
84
85
86
87






88
89
90
91
92
93
94
95
96

97
98
99
100
101
102
103
104







-
-
-
-
+
+
+
+
+

-
-
-
-
-
-
+
+
+
+
+
+
+
+

-
+







        self.aSugg.clear()
        self.dSugg.clear()


class IBDAWG:
    """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""

    def __init__ (self, sfDict):
        self.by = pkgutil.get_data(__package__, "_dictionaries/" + sfDict)
        if not self.by:
            raise OSError("# Error. File not found or not loadable: "+sfDict)
    def __init__ (self, source):
        if type(source) is str:
            self.by = pkgutil.get_data(__package__, "_dictionaries/" + source)
            if not self.by:
                raise OSError("# Error. File not found or not loadable: "+source)

        if sfDict.endswith(".bdic"):
            self._initBinary()
        elif sfDict.endswith(".json"):
            self._initJSON()
        else:
            raise OSError("# Error. Unknown file type: "+sfDict)
            if source.endswith(".bdic"):
                self._initBinary()
            elif source.endswith(".json"):
                self._initJSON(json.loads(self.by.decode("utf-8")))     #json.loads(self.by)    # In Python 3.6, can read directly binary strings
            else:
                raise OSError("# Error. Unknown file type: "+source)
        else:
            self._initJSON(source)

        self.sFileName = sfDict
        self.sFileName = source  if type(source) is str  else "[None]"

        self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1
        self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1)
        self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2)
        self._addrBitMask = 1 << ((self.nBytesArc * 8) - 3)  # version 2

        # function to decode the affix/suffix code
165
166
167
168
169
170
171
172

173
174

175
176
177
178
179
180
181
182
168
169
170
171
172
173
174

175
176

177

178
179
180
181
182
183
184







-
+

-
+
-







        # <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
        self.dChar = {}
        for i in range(1, self.nChar+1):
            self.dChar[self.lArcVal[i]] = i
        self.dCharVal = { v: k  for k, v in self.dChar.items() }
        self.nBytesOffset = 1 # version 3

    def _initJSON (self):
    def _initJSON (self, oJSON):
        "initialize with a JSON text file"
        self.__dict__.update(json.loads(self.by.decode("utf-8")))
        self.__dict__.update(oJSON)
        #self.__dict__.update(json.loads(self.by))                  # In Python 3.6, can read directly binary strings
        self.byDic = binascii.unhexlify(self.sByDic)

    def getInfo (self):
        return  "  Language: {0.sLangName}   Lang code: {0.sLangCode}   Dictionary name: {0.sDicName}" \
                "  Compression method: {0.nCompressionMethod:>2}   Date: {0.sDate}   Stemming: {0.cStemming}FX\n" \
                "  Arcs values:  {0.nArcVal:>10,} = {0.nChar:>5,} characters,  {0.nAff:>6,} affixes,  {0.nTag:>6,} tags\n" \
                "  Dictionary: {0.nEntry:>12,} entries,    {0.nNode:>11,} nodes,   {0.nArc:>11,} arcs\n" \