89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
raise ValueError("# Error. Unknown dictionary version: {}".format(self.by[7:8]))
try:
header, info, values, bdic = self.by.split(b"\0\0\0\0", 3)
except Exception:
raise Exception
self.sName = sDicName
self.nVersion = int(self.by[7:8].decode("utf-8"))
self.sHeader = header.decode("utf-8")
self.lArcVal = values.decode("utf-8").split("\t")
self.nArcVal = len(self.lArcVal)
self.byDic = bdic
l = info.decode("utf-8").split("/")
self.sLang = l[0]
|
|
|
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
raise ValueError("# Error. Unknown dictionary version: {}".format(self.by[7:8]))
try:
header, info, values, bdic = self.by.split(b"\0\0\0\0", 3)
except Exception:
raise Exception
self.sName = sDicName
self.nCompressionMethod = int(self.by[7:8].decode("utf-8"))
self.sHeader = header.decode("utf-8")
self.lArcVal = values.decode("utf-8").split("\t")
self.nArcVal = len(self.lArcVal)
self.byDic = bdic
l = info.decode("utf-8").split("/")
self.sLang = l[0]
|
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
|
self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1
self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1)
self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2)
self._addrBitMask = 1 << ((self.nBytesArc * 8) - 3) # version 2
self.nBytesOffset = 1 # version 3
# Configuring DAWG functions according to nVersion
if self.nVersion == 1:
self.morph = self._morph1
self.stem = self._stem1
self._lookupArcNode = self._lookupArcNode1
self._getArcs = self._getArcs1
self._writeNodes = self._writeNodes1
elif self.nVersion == 2:
self.morph = self._morph2
self.stem = self._stem2
self._lookupArcNode = self._lookupArcNode2
self._getArcs = self._getArcs2
self._writeNodes = self._writeNodes2
elif self.nVersion == 3:
self.morph = self._morph3
self.stem = self._stem3
self._lookupArcNode = self._lookupArcNode3
self._getArcs = self._getArcs3
self._writeNodes = self._writeNodes3
else:
raise ValueError(" # Error: unknown code: {}".format(self.nVersion))
self.bOptNumSigle = False
self.bOptNumAtLast = False
def getInfo (self):
return " Language: {0.sLang:>10} Version: {0.nVersion:>2} Date: {0.sDate} Stemming: {0.cStemming}FX\n" \
" Arcs values: {0.nArcVal:>10,} = {0.nChar:>5,} characters, {0.nAff:>6,} affixes, {0.nTag:>6,} tags\n" \
" Dictionary: {0.nEntries:>12,} entries, {0.nNode:>11,} nodes, {0.nArc:>11,} arcs\n" \
" Address size: {0.nBytesNodeAddress:>1} bytes, Arc size: {0.nBytesArc:>1} bytes\n".format(self)
def writeAsJSObject (self, spfDest, bInJSModule=False, bBinaryDictAsHexString=False):
"write IBDAWG as a JavaScript object in a JavaScript module"
import json
with open(spfDest, "w", encoding="utf-8", newline="\n") as hDst:
if bInJSModule:
hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ')
hDst.write(json.dumps({
"sName": self.sName,
"nVersion": self.nVersion,
"sDate": str(datetime.datetime.now())[:-7],
"sHeader": self.sHeader,
"lArcVal": self.lArcVal,
"nArcVal": self.nArcVal,
# JavaScript is a pile of shit, so Mozilla’s JS parser don’t like file bigger than 4 Mb!
# So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension.
# https://github.com/mozilla/addons-linter/issues/1361
|
|
|
|
|
|
|
|
|
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
|
self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1
self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1)
self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2)
self._addrBitMask = 1 << ((self.nBytesArc * 8) - 3) # version 2
self.nBytesOffset = 1 # version 3
# Configuring DAWG functions according to nCompressionMethod
if self.nCompressionMethod == 1:
self.morph = self._morph1
self.stem = self._stem1
self._lookupArcNode = self._lookupArcNode1
self._getArcs = self._getArcs1
self._writeNodes = self._writeNodes1
elif self.nCompressionMethod == 2:
self.morph = self._morph2
self.stem = self._stem2
self._lookupArcNode = self._lookupArcNode2
self._getArcs = self._getArcs2
self._writeNodes = self._writeNodes2
elif self.nCompressionMethod == 3:
self.morph = self._morph3
self.stem = self._stem3
self._lookupArcNode = self._lookupArcNode3
self._getArcs = self._getArcs3
self._writeNodes = self._writeNodes3
else:
raise ValueError(" # Error: unknown code: {}".format(self.nCompressionMethod))
self.bOptNumSigle = False
self.bOptNumAtLast = False
def getInfo (self):
return " Language: {0.sLang:>10} Version: {0.nCompressionMethod:>2} Date: {0.sDate} Stemming: {0.cStemming}FX\n" \
" Arcs values: {0.nArcVal:>10,} = {0.nChar:>5,} characters, {0.nAff:>6,} affixes, {0.nTag:>6,} tags\n" \
" Dictionary: {0.nEntries:>12,} entries, {0.nNode:>11,} nodes, {0.nArc:>11,} arcs\n" \
" Address size: {0.nBytesNodeAddress:>1} bytes, Arc size: {0.nBytesArc:>1} bytes\n".format(self)
def writeAsJSObject (self, spfDest, bInJSModule=False, bBinaryDictAsHexString=False):
"write IBDAWG as a JavaScript object in a JavaScript module"
import json
with open(spfDest, "w", encoding="utf-8", newline="\n") as hDst:
if bInJSModule:
hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ')
hDst.write(json.dumps({
"sName": self.sName,
"nCompressionMethod": self.nCompressionMethod,
"sDate": str(datetime.datetime.now())[:-7],
"sHeader": self.sHeader,
"lArcVal": self.lArcVal,
"nArcVal": self.nArcVal,
# JavaScript is a pile of shit, so Mozilla’s JS parser don’t like file bigger than 4 Mb!
# So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension.
# https://github.com/mozilla/addons-linter/issues/1361
|