Overview
Comment: | [core][fx][js] Binary dictionary as a hexadecimal string instead of a list of integers (JavaScript crap again: Mozilla’s parser can’t deal with file bigger than 4 Mb) |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | core | fx |
Files: | files | file ages | folders |
SHA3-256: |
9b380a48cec69f7b8ba925ad8193ff0f |
User & Date: | olr on 2017-09-19 11:34:40 |
Original Comment: | [fx][js] Binary dictionary as a hexadecimal string instead of a list of integers (JavaScript crap again: Mozilla’s parser can’t deal with file bigger than 4 Mb) |
Other Links: | manifest | tags |
Context
2017-09-19
| ||
11:36 | [core][js] remove useless logs check-in: 8beaa4879a user: olr tags: trunk, core | |
11:34 | [core][fx][js] Binary dictionary as a hexadecimal string instead of a list of integers (JavaScript crap again: Mozilla’s parser can’t deal with file bigger than 4 Mb) check-in: 9b380a48ce user: olr tags: trunk, core, fx | |
09:19 | [fr] dictionary update: 6.2 check-in: 8bff04e371 user: olr tags: trunk, fr | |
Changes
Modified gc_core/js/ibdawg.js from [8b6ee4e0fe] to [5baf4dbff2].
︙ | ︙ | |||
31 32 33 34 35 36 37 38 39 40 41 42 43 44 | throw Error("# Error. File not found or not loadable.\n" + e.message + "\n"); } /* Properties: sName, nVersion, sHeader, lArcVal, nArcVal, byDic, sLang, nChar, nBytesArc, nBytesNodeAddress, nEntries, nNode, nArc, nAff, cStemming, nTag, dChar, _arcMask, _finalNodeMask, _lastArcMask, _addrBitMask, nBytesOffset, */ if (!this.sHeader.startsWith("/pyfsa/")) { throw TypeError("# Error. Not a pyfsa binary dictionary. Header: " + this.sHeader); } if (!(this.nVersion == "1" || this.nVersion == "2" || this.nVersion == "3")) { throw RangeError("# Error. Unknown dictionary version: " + this.nVersion); } // <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value | > > > > > > > > > > > > > > > > | 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | throw Error("# Error. File not found or not loadable.\n" + e.message + "\n"); } /* Properties: sName, nVersion, sHeader, lArcVal, nArcVal, byDic, sLang, nChar, nBytesArc, nBytesNodeAddress, nEntries, nNode, nArc, nAff, cStemming, nTag, dChar, _arcMask, _finalNodeMask, _lastArcMask, _addrBitMask, nBytesOffset, */ /* Bug workaround. Mozilla’s JS parser sucks. Can’t read file bigger than 4 Mb! So we convert huge hexadecimal string to list of numbers… https://github.com/mozilla/addons-linter/issues/1361 */ console.log(this.byDic); let lTemp = []; for (let i = 0; i < this.byDic.length; i+=2) { lTemp.push(parseInt(this.byDic.slice(i, i+2), 16)); } this.byDic = lTemp; console.log("DONE"); /* end of bug workaround */ if (!this.sHeader.startsWith("/pyfsa/")) { throw TypeError("# Error. Not a pyfsa binary dictionary. Header: " + this.sHeader); } if (!(this.nVersion == "1" || this.nVersion == "2" || this.nVersion == "3")) { throw RangeError("# Error. Unknown dictionary version: " + this.nVersion); } // <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value |
︙ | ︙ |
Modified gc_core/py/ibdawg.py from [8aac99b428] to [e68f28dc22].
︙ | ︙ | |||
96 97 98 99 100 101 102 | def getInfo (self): return " Language: {0.sLang:>10} Version: {0.nVersion:>2} Stemming: {0.cStemming}FX\n" \ " Arcs values: {0.nArcVal:>10,} = {0.nChar:>5,} characters, {0.nAff:>6,} affixes, {0.nTag:>6,} tags\n" \ " Dictionary: {0.nEntries:>12,} entries, {0.nNode:>11,} nodes, {0.nArc:>11,} arcs\n" \ " Address size: {0.nBytesNodeAddress:>1} bytes, Arc size: {0.nBytesArc:>1} bytes\n".format(self) | | > > > | | 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | def getInfo (self): return " Language: {0.sLang:>10} Version: {0.nVersion:>2} Stemming: {0.cStemming}FX\n" \ " Arcs values: {0.nArcVal:>10,} = {0.nChar:>5,} characters, {0.nAff:>6,} affixes, {0.nTag:>6,} tags\n" \ " Dictionary: {0.nEntries:>12,} entries, {0.nNode:>11,} nodes, {0.nArc:>11,} arcs\n" \ " Address size: {0.nBytesNodeAddress:>1} bytes, Arc size: {0.nBytesArc:>1} bytes\n".format(self) def writeAsJSObject (self, spfDest, bInJSModule=False, bBinaryDictAsHexString=False): "write IBDAWG as a JavaScript object in a JavaScript module" import json with open(spfDest, "w", encoding="utf-8", newline="\n") as hDst: if bInJSModule: hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ') hDst.write(json.dumps({ "sName": self.sName, "nVersion": self.nVersion, "sHeader": self.sHeader, "lArcVal": self.lArcVal, "nArcVal": self.nArcVal, # JavaScript is a pile of shit, so Mozilla’s JS parser don’t like file bigger than 4 Mb! # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension. # https://github.com/mozilla/addons-linter/issues/1361 "byDic": self.byDic.hex() if bBinaryDictAsHexString else [ e for e in self.byDic ], "sLang": self.sLang, "nChar": self.nChar, "nBytesArc": self.nBytesArc, "nBytesNodeAddress": self.nBytesNodeAddress, "nEntries": self.nEntries, "nNode": self.nNode, "nArc": self.nArc, |
︙ | ︙ |
Modified lex_build.py from [a7e2785f63] to [c786502779].
︙ | ︙ | |||
14 15 16 17 18 19 20 | oDAWG = fsa.DAWG(spfSrc, sLangName, cStemmingMethod) dir_util.mkpath("grammalecte/_dictionaries") oDAWG.writeInfo("grammalecte/_dictionaries/" + sDicName + ".info.txt") oDAWG.createBinary("grammalecte/_dictionaries/" + sDicName + ".bdic", int(nCompressMethod)) if bJSON: dir_util.mkpath("grammalecte-js/_dictionaries") oDic = IBDAWG(sDicName + ".bdic") | | | 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | oDAWG = fsa.DAWG(spfSrc, sLangName, cStemmingMethod) dir_util.mkpath("grammalecte/_dictionaries") oDAWG.writeInfo("grammalecte/_dictionaries/" + sDicName + ".info.txt") oDAWG.createBinary("grammalecte/_dictionaries/" + sDicName + ".bdic", int(nCompressMethod)) if bJSON: dir_util.mkpath("grammalecte-js/_dictionaries") oDic = IBDAWG(sDicName + ".bdic") oDic.writeAsJSObject("grammalecte-js/_dictionaries/" + sDicName + ".json", bBinaryDictAsHexString=True) def main (): xParser = argparse.ArgumentParser() xParser.add_argument("src_lexicon", type=str, help="path and file name of the source lexicon") xParser.add_argument("lang_name", type=str, help="language name") xParser.add_argument("dic_name", type=str, help="dictionary file name (without extension)") |
︙ | ︙ |