Overview
Comment: | [graphspell] new header <grammalecte-fsa> for binary dictionaries |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | graphspell | multid |
Files: | files | file ages | folders |
SHA3-256: |
8333a8bf1bb8a85bc0b361a59d2fe3c8 |
User & Date: | olr on 2018-04-01 08:16:11 |
Other Links: | branch diff | manifest | tags |
Context
2018-04-01
| ||
09:31 | [fr][bug] conj: test if tTags exists check-in: a89587a82c user: olr tags: fr, multid | |
08:16 | [graphspell] new header <grammalecte-fsa> for binary dictionaries check-in: 8333a8bf1b user: olr tags: graphspell, multid | |
08:01 | [fx] main panel: dictionaries check-in: 24a9f4dab6 user: olr tags: fx, multid | |
Changes
Modified graphspell-js/dawg.js from [e2cd530970] to [3711bc314d].
︙ | ︙ | |||
372 373 374 375 376 377 378 | if (nCompressionMethod == 1) { sByDic = this.oRoot.convToBytes1(this.nBytesArc, this.nBytesNodeAddress); for (let oNode of this.dMinimizedNodes.values()) { sByDic += oNode.convToBytes1(this.nBytesArc, this.nBytesNodeAddress); } } let oJSON = { | | | 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 | if (nCompressionMethod == 1) { sByDic = this.oRoot.convToBytes1(this.nBytesArc, this.nBytesNodeAddress); for (let oNode of this.dMinimizedNodes.values()) { sByDic += oNode.convToBytes1(this.nBytesArc, this.nBytesNodeAddress); } } let oJSON = { "sHeader": "/grammalecte-fsa/", "sLangCode": this.sLangCode, "sLangName": this.sLangName, "sDicName": this.sDicName, "sFileName": "[none]", "sDate": this._getDate(), "nEntry": this.nEntry, "nChar": this.nChar, |
︙ | ︙ |
Modified graphspell-js/ibdawg.js from [050f6e0036] to [1b3dff6227].
︙ | ︙ | |||
115 116 117 118 119 120 121 | for (let i = 0; i < this.sByDic.length; i+=2) { lTemp.push(parseInt(this.sByDic.slice(i, i+2), 16)); } this.byDic = lTemp; //this.byDic = new Uint8Array(lTemp); // not quicker, even slower /* end of bug workaround */ | | | | 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 | for (let i = 0; i < this.sByDic.length; i+=2) { lTemp.push(parseInt(this.sByDic.slice(i, i+2), 16)); } this.byDic = lTemp; //this.byDic = new Uint8Array(lTemp); // not quicker, even slower /* end of bug workaround */ if (!this.sHeader.startsWith("/grammalecte-fsa/")) { throw TypeError("# Error. Not a grammalecte-fsa binary dictionary. Header: " + this.sHeader); } if (!(this.nCompressionMethod == 1 || this.nCompressionMethod == 2 || this.nCompressionMethod == 3)) { throw RangeError("# Error. Unknown dictionary compression method: " + this.nCompressionMethod); } // <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value this.dChar = helpers.objectToMap(this.dChar); this.dCharVal = this.dChar.gl_reverse(); |
︙ | ︙ | |||
179 180 181 182 183 184 185 | ` Arcs values: ${this.nArcVal} = ${this.nChar} characters, ${this.nAff} affixes, ${this.nTag} tags\n` + ` Dictionary: ${this.nEntry} entries, ${this.nNode} nodes, ${this.nArc} arcs\n` + ` Address size: ${this.nBytesNodeAddress} bytes, Arc size: ${this.nBytesArc} bytes\n`; } getJSON () { let oJSON = { | | | 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 | ` Arcs values: ${this.nArcVal} = ${this.nChar} characters, ${this.nAff} affixes, ${this.nTag} tags\n` + ` Dictionary: ${this.nEntry} entries, ${this.nNode} nodes, ${this.nArc} arcs\n` + ` Address size: ${this.nBytesNodeAddress} bytes, Arc size: ${this.nBytesArc} bytes\n`; } getJSON () { let oJSON = { "sHeader": "/grammalecte-fsa/", "sLangCode": this.sLangCode, "sLangName": this.sLangName, "sDicName": this.sDicName, "sFileName": this.sFileName, "sDate": this.sDate, "nEntry": this.nEntry, "nChar": this.nChar, |
︙ | ︙ |
Modified graphspell/dawg.py from [7cd1b4dc56] to [63684196d2].
︙ | ︙ | |||
396 397 398 399 400 401 402 | for oNode in self.lSortedNodes: byDic += oNode.convToBytes2(self.nBytesArc, self.nBytesNodeAddress) elif nCompressionMethod == 3: byDic = self.oRoot.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset) for oNode in self.lSortedNodes: byDic += oNode.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset) return { | | | 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 | for oNode in self.lSortedNodes: byDic += oNode.convToBytes2(self.nBytesArc, self.nBytesNodeAddress) elif nCompressionMethod == 3: byDic = self.oRoot.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset) for oNode in self.lSortedNodes: byDic += oNode.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset) return { "sHeader": "/grammalecte-fsa/", "sLangCode": self.sLangCode, "sLangName": self.sLangName, "sDicName": self.sDicName, "sFileName": self.sFileName, "sDate": self._getDate(), "nEntry": self.nEntry, "nChar": self.nChar, |
︙ | ︙ | |||
438 439 440 441 442 443 444 | def writeBinary (self, sPathFile, nCompressionMethod, bDebug=False): """ Format of the binary indexable dictionary: Each section is separated with 4 bytes of \0 - Section Header: | | | 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 | def writeBinary (self, sPathFile, nCompressionMethod, bDebug=False): """ Format of the binary indexable dictionary: Each section is separated with 4 bytes of \0 - Section Header: /grammalecte-fsa/[compression method] * compression method is an ASCII string - Section Informations: /[lang code] /[lang name] /[dictionary name] /[date creation] |
︙ | ︙ | |||
472 473 474 475 476 477 478 | See DawgNode.convToBytes() for details. """ self._calculateBinary(nCompressionMethod) if not sPathFile.endswith(".bdic"): sPathFile += "."+str(nCompressionMethod)+".bdic" with open(sPathFile, 'wb') as hDst: # header | | | 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 | See DawgNode.convToBytes() for details. """ self._calculateBinary(nCompressionMethod) if not sPathFile.endswith(".bdic"): sPathFile += "."+str(nCompressionMethod)+".bdic" with open(sPathFile, 'wb') as hDst: # header hDst.write("/grammalecte-fsa/{}/".format(nCompressionMethod).encode("utf-8")) hDst.write(b"\0\0\0\0") # infos sInfo = "{}//{}//{}//{}//{}//{}//{}//{}//{}//{}//{}//{}//".format(self.sLangCode, self.sLangName, self.sDicName, self._getDate(), \ self.nChar, self.nBytesArc, self.nBytesNodeAddress, \ self.nEntry, self.nNode, self.nArc, self.nAff, self.cStemming) hDst.write(sInfo.encode("utf-8")) hDst.write(b"\0\0\0\0") |
︙ | ︙ |
Modified graphspell/ibdawg.py from [b794aeec36] to [f523996e8f].
︙ | ︙ | |||
132 133 134 135 136 137 138 | raise ValueError(" # Error: unknown code: {}".format(self.nCompressionMethod)) self.bOptNumSigle = False self.bOptNumAtLast = False def _initBinary (self): "initialize with binary structure file" | | | | | | | 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 | raise ValueError(" # Error: unknown code: {}".format(self.nCompressionMethod)) self.bOptNumSigle = False self.bOptNumAtLast = False def _initBinary (self): "initialize with binary structure file" if self.by[0:17] != b"/grammalecte-fsa/": raise TypeError("# Error. Not a grammalecte-fsa binary dictionary. Header: {}".format(self.by[0:9])) if not(self.by[17:18] == b"1" or self.by[17:18] == b"2" or self.by[17:18] == b"3"): raise ValueError("# Error. Unknown dictionary version: {}".format(self.by[17:18])) try: header, info, values, bdic = self.by.split(b"\0\0\0\0", 3) except Exception: raise Exception self.nCompressionMethod = int(self.by[17:18].decode("utf-8")) self.sHeader = header.decode("utf-8") self.lArcVal = values.decode("utf-8").split("\t") self.nArcVal = len(self.lArcVal) self.byDic = bdic l = info.decode("utf-8").split("//") self.sLangCode = l.pop(0) |
︙ | ︙ | |||
187 188 189 190 191 192 193 | def writeAsJSObject (self, spfDest, bInJSModule=False, bBinaryDictAsHexString=False): "write IBDAWG as a JavaScript object in a JavaScript module" with open(spfDest, "w", encoding="utf-8", newline="\n") as hDst: if bInJSModule: hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ') hDst.write(json.dumps({ | | | 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 | def writeAsJSObject (self, spfDest, bInJSModule=False, bBinaryDictAsHexString=False): "write IBDAWG as a JavaScript object in a JavaScript module" with open(spfDest, "w", encoding="utf-8", newline="\n") as hDst: if bInJSModule: hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ') hDst.write(json.dumps({ "sHeader": "/grammalecte-fsa/", "sLangCode": self.sLangCode, "sLangName": self.sLangName, "sDicName": self.sDicName, "sFileName": self.sFileName, "sDate": self.sDate, "nEntry": self.nEntry, "nChar": self.nChar, |
︙ | ︙ |