Overview
| Comment: | [graphspell] new header <grammalecte-fsa> for binary dictionaries |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | graphspell | multid |
| Files: | files | file ages | folders |
| SHA3-256: |
8333a8bf1bb8a85bc0b361a59d2fe3c8 |
| User & Date: | olr on 2018-04-01 08:16:11 |
| Other Links: | branch diff | manifest | tags |
Context
|
2018-04-01
| ||
| 09:31 | [fr][bug] conj: test if tTags exists check-in: a89587a82c user: olr tags: fr, multid | |
| 08:16 | [graphspell] new header <grammalecte-fsa> for binary dictionaries check-in: 8333a8bf1b user: olr tags: graphspell, multid | |
| 08:01 | [fx] main panel: dictionaries check-in: 24a9f4dab6 user: olr tags: fx, multid | |
Changes
Modified graphspell-js/dawg.js from [e2cd530970] to [3711bc314d].
| ︙ | ︙ | |||
372 373 374 375 376 377 378 |
if (nCompressionMethod == 1) {
sByDic = this.oRoot.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
for (let oNode of this.dMinimizedNodes.values()) {
sByDic += oNode.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
}
}
let oJSON = {
| | | 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 |
if (nCompressionMethod == 1) {
sByDic = this.oRoot.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
for (let oNode of this.dMinimizedNodes.values()) {
sByDic += oNode.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
}
}
let oJSON = {
"sHeader": "/grammalecte-fsa/",
"sLangCode": this.sLangCode,
"sLangName": this.sLangName,
"sDicName": this.sDicName,
"sFileName": "[none]",
"sDate": this._getDate(),
"nEntry": this.nEntry,
"nChar": this.nChar,
|
| ︙ | ︙ |
Modified graphspell-js/ibdawg.js from [050f6e0036] to [1b3dff6227].
| ︙ | ︙ | |||
115 116 117 118 119 120 121 |
for (let i = 0; i < this.sByDic.length; i+=2) {
lTemp.push(parseInt(this.sByDic.slice(i, i+2), 16));
}
this.byDic = lTemp;
//this.byDic = new Uint8Array(lTemp); // not quicker, even slower
/* end of bug workaround */
| | | | 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
for (let i = 0; i < this.sByDic.length; i+=2) {
lTemp.push(parseInt(this.sByDic.slice(i, i+2), 16));
}
this.byDic = lTemp;
//this.byDic = new Uint8Array(lTemp); // not quicker, even slower
/* end of bug workaround */
if (!this.sHeader.startsWith("/grammalecte-fsa/")) {
throw TypeError("# Error. Not a grammalecte-fsa binary dictionary. Header: " + this.sHeader);
}
if (!(this.nCompressionMethod == 1 || this.nCompressionMethod == 2 || this.nCompressionMethod == 3)) {
throw RangeError("# Error. Unknown dictionary compression method: " + this.nCompressionMethod);
}
// <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
this.dChar = helpers.objectToMap(this.dChar);
this.dCharVal = this.dChar.gl_reverse();
|
| ︙ | ︙ | |||
179 180 181 182 183 184 185 |
` Arcs values: ${this.nArcVal} = ${this.nChar} characters, ${this.nAff} affixes, ${this.nTag} tags\n` +
` Dictionary: ${this.nEntry} entries, ${this.nNode} nodes, ${this.nArc} arcs\n` +
` Address size: ${this.nBytesNodeAddress} bytes, Arc size: ${this.nBytesArc} bytes\n`;
}
getJSON () {
let oJSON = {
| | | 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
` Arcs values: ${this.nArcVal} = ${this.nChar} characters, ${this.nAff} affixes, ${this.nTag} tags\n` +
` Dictionary: ${this.nEntry} entries, ${this.nNode} nodes, ${this.nArc} arcs\n` +
` Address size: ${this.nBytesNodeAddress} bytes, Arc size: ${this.nBytesArc} bytes\n`;
}
getJSON () {
let oJSON = {
"sHeader": "/grammalecte-fsa/",
"sLangCode": this.sLangCode,
"sLangName": this.sLangName,
"sDicName": this.sDicName,
"sFileName": this.sFileName,
"sDate": this.sDate,
"nEntry": this.nEntry,
"nChar": this.nChar,
|
| ︙ | ︙ |
Modified graphspell/dawg.py from [7cd1b4dc56] to [63684196d2].
| ︙ | ︙ | |||
396 397 398 399 400 401 402 |
for oNode in self.lSortedNodes:
byDic += oNode.convToBytes2(self.nBytesArc, self.nBytesNodeAddress)
elif nCompressionMethod == 3:
byDic = self.oRoot.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset)
for oNode in self.lSortedNodes:
byDic += oNode.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset)
return {
| | | 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 |
for oNode in self.lSortedNodes:
byDic += oNode.convToBytes2(self.nBytesArc, self.nBytesNodeAddress)
elif nCompressionMethod == 3:
byDic = self.oRoot.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset)
for oNode in self.lSortedNodes:
byDic += oNode.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset)
return {
"sHeader": "/grammalecte-fsa/",
"sLangCode": self.sLangCode,
"sLangName": self.sLangName,
"sDicName": self.sDicName,
"sFileName": self.sFileName,
"sDate": self._getDate(),
"nEntry": self.nEntry,
"nChar": self.nChar,
|
| ︙ | ︙ | |||
438 439 440 441 442 443 444 |
def writeBinary (self, sPathFile, nCompressionMethod, bDebug=False):
"""
Format of the binary indexable dictionary:
Each section is separated with 4 bytes of \0
- Section Header:
| | | 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 |
def writeBinary (self, sPathFile, nCompressionMethod, bDebug=False):
"""
Format of the binary indexable dictionary:
Each section is separated with 4 bytes of \0
- Section Header:
/grammalecte-fsa/[compression method]
* compression method is an ASCII string
- Section Informations:
/[lang code]
/[lang name]
/[dictionary name]
/[date creation]
|
| ︙ | ︙ | |||
472 473 474 475 476 477 478 |
See DawgNode.convToBytes() for details.
"""
self._calculateBinary(nCompressionMethod)
if not sPathFile.endswith(".bdic"):
sPathFile += "."+str(nCompressionMethod)+".bdic"
with open(sPathFile, 'wb') as hDst:
# header
| | | 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 |
See DawgNode.convToBytes() for details.
"""
self._calculateBinary(nCompressionMethod)
if not sPathFile.endswith(".bdic"):
sPathFile += "."+str(nCompressionMethod)+".bdic"
with open(sPathFile, 'wb') as hDst:
# header
hDst.write("/grammalecte-fsa/{}/".format(nCompressionMethod).encode("utf-8"))
hDst.write(b"\0\0\0\0")
# infos
sInfo = "{}//{}//{}//{}//{}//{}//{}//{}//{}//{}//{}//{}//".format(self.sLangCode, self.sLangName, self.sDicName, self._getDate(), \
self.nChar, self.nBytesArc, self.nBytesNodeAddress, \
self.nEntry, self.nNode, self.nArc, self.nAff, self.cStemming)
hDst.write(sInfo.encode("utf-8"))
hDst.write(b"\0\0\0\0")
|
| ︙ | ︙ |
Modified graphspell/ibdawg.py from [b794aeec36] to [f523996e8f].
| ︙ | ︙ | |||
132 133 134 135 136 137 138 |
raise ValueError(" # Error: unknown code: {}".format(self.nCompressionMethod))
self.bOptNumSigle = False
self.bOptNumAtLast = False
def _initBinary (self):
"initialize with binary structure file"
| | | | | | | 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
raise ValueError(" # Error: unknown code: {}".format(self.nCompressionMethod))
self.bOptNumSigle = False
self.bOptNumAtLast = False
def _initBinary (self):
"initialize with binary structure file"
if self.by[0:17] != b"/grammalecte-fsa/":
raise TypeError("# Error. Not a grammalecte-fsa binary dictionary. Header: {}".format(self.by[0:9]))
if not(self.by[17:18] == b"1" or self.by[17:18] == b"2" or self.by[17:18] == b"3"):
raise ValueError("# Error. Unknown dictionary version: {}".format(self.by[17:18]))
try:
header, info, values, bdic = self.by.split(b"\0\0\0\0", 3)
except Exception:
raise Exception
self.nCompressionMethod = int(self.by[17:18].decode("utf-8"))
self.sHeader = header.decode("utf-8")
self.lArcVal = values.decode("utf-8").split("\t")
self.nArcVal = len(self.lArcVal)
self.byDic = bdic
l = info.decode("utf-8").split("//")
self.sLangCode = l.pop(0)
|
| ︙ | ︙ | |||
187 188 189 190 191 192 193 |
def writeAsJSObject (self, spfDest, bInJSModule=False, bBinaryDictAsHexString=False):
"write IBDAWG as a JavaScript object in a JavaScript module"
with open(spfDest, "w", encoding="utf-8", newline="\n") as hDst:
if bInJSModule:
hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ')
hDst.write(json.dumps({
| | | 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
def writeAsJSObject (self, spfDest, bInJSModule=False, bBinaryDictAsHexString=False):
"write IBDAWG as a JavaScript object in a JavaScript module"
with open(spfDest, "w", encoding="utf-8", newline="\n") as hDst:
if bInJSModule:
hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ')
hDst.write(json.dumps({
"sHeader": "/grammalecte-fsa/",
"sLangCode": self.sLangCode,
"sLangName": self.sLangName,
"sDicName": self.sDicName,
"sFileName": self.sFileName,
"sDate": self.sDate,
"nEntry": self.nEntry,
"nChar": self.nChar,
|
| ︙ | ︙ |