Grammalecte  Check-in [9b380a48ce]

Overview
Comment:[core][fx][js] Binary dictionary as a hexadecimal string instead of a list of integers (JavaScript crap again: Mozilla’s parser can’t deal with file bigger than 4 Mb)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | core | fx
Files: files | file ages | folders
SHA3-256: 9b380a48cec69f7b8ba925ad8193ff0fea16b612fb767fe4483fda9a332e58de
User & Date: olr on 2017-09-19 11:34:40
Original Comment: [fx][js] Binary dictionary as a hexadecimal string instead of a list of integers (JavaScript crap again: Mozilla’s parser can’t deal with file bigger than 4 Mb)
Other Links: manifest | tags
Context
2017-09-19
11:36
[core][js] remove useless logs check-in: 8beaa4879a user: olr tags: trunk, core
11:34
[core][fx][js] Binary dictionary as a hexadecimal string instead of a list of integers (JavaScript crap again: Mozilla’s parser can’t deal with file bigger than 4 Mb) check-in: 9b380a48ce user: olr tags: trunk, core, fx
09:19
[fr] dictionary update: 6.2 check-in: 8bff04e371 user: olr tags: trunk, fr
Changes

Modified gc_core/js/ibdawg.js from [8b6ee4e0fe] to [5baf4dbff2].

31
32
33
34
35
36
37
















38
39
40
41
42
43
44
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







            throw Error("# Error. File not found or not loadable.\n" + e.message + "\n");
        }
        /*
            Properties:
            sName, nVersion, sHeader, lArcVal, nArcVal, byDic, sLang, nChar, nBytesArc, nBytesNodeAddress,
            nEntries, nNode, nArc, nAff, cStemming, nTag, dChar, _arcMask, _finalNodeMask, _lastArcMask, _addrBitMask, nBytesOffset,
        */

        /*
            Bug workaround.
            Mozilla’s JS parser sucks. Can’t read file bigger than 4 Mb!
            So we convert huge hexadecimal string to list of numbers…
            https://github.com/mozilla/addons-linter/issues/1361
        */
        console.log(this.byDic);
        let lTemp = [];
        for (let i = 0;  i < this.byDic.length;  i+=2) {
            lTemp.push(parseInt(this.byDic.slice(i, i+2), 16));
        }
        this.byDic = lTemp;
        console.log("DONE");
        /* end of bug workaround */

        if (!this.sHeader.startsWith("/pyfsa/")) {
            throw TypeError("# Error. Not a pyfsa binary dictionary. Header: " + this.sHeader);
        }
        if (!(this.nVersion == "1" || this.nVersion == "2" || this.nVersion == "3")) {
            throw RangeError("# Error. Unknown dictionary version: " + this.nVersion);
        }
        // <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value

Modified gc_core/py/ibdawg.py from [8aac99b428] to [e68f28dc22].

96
97
98
99
100
101
102
103

104
105
106
107
108
109
110
111
112
113
114



115

116
117
118
119
120
121
122
96
97
98
99
100
101
102

103
104
105
106
107
108
109
110
111
112
113
114
115
116
117

118
119
120
121
122
123
124
125







-
+











+
+
+
-
+








    def getInfo (self):
        return  "  Language: {0.sLang:>10}      Version: {0.nVersion:>2}      Stemming: {0.cStemming}FX\n" \
                "  Arcs values:  {0.nArcVal:>10,} = {0.nChar:>5,} characters,  {0.nAff:>6,} affixes,  {0.nTag:>6,} tags\n" \
                "  Dictionary: {0.nEntries:>12,} entries,    {0.nNode:>11,} nodes,   {0.nArc:>11,} arcs\n" \
                "  Address size: {0.nBytesNodeAddress:>1} bytes,  Arc size: {0.nBytesArc:>1} bytes\n".format(self)

    def writeAsJSObject (self, spfDest, bInJSModule=False):
    def writeAsJSObject (self, spfDest, bInJSModule=False, bBinaryDictAsHexString=False):
        "write IBDAWG as a JavaScript object in a JavaScript module"
        import json
        with open(spfDest, "w", encoding="utf-8", newline="\n") as hDst:
            if bInJSModule:
                hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ')
            hDst.write(json.dumps({
                            "sName": self.sName,
                            "nVersion": self.nVersion,
                            "sHeader": self.sHeader,
                            "lArcVal": self.lArcVal,
                            "nArcVal": self.nArcVal,
                            # JavaScript is a pile of shit, so Mozilla’s JS parser don’t like file bigger than 4 Mb!
                            # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension.
                            # https://github.com/mozilla/addons-linter/issues/1361
                            "byDic": [ e  for e in self.byDic ],
                            "byDic": self.byDic.hex()  if bBinaryDictAsHexString  else [ e  for e in self.byDic ],
                            "sLang": self.sLang,
                            "nChar": self.nChar,
                            "nBytesArc": self.nBytesArc,
                            "nBytesNodeAddress": self.nBytesNodeAddress,
                            "nEntries": self.nEntries,
                            "nNode": self.nNode,
                            "nArc": self.nArc,

Modified lex_build.py from [a7e2785f63] to [c786502779].

14
15
16
17
18
19
20
21

22
23
24
25
26
27
28
14
15
16
17
18
19
20

21
22
23
24
25
26
27
28







-
+







    oDAWG = fsa.DAWG(spfSrc, sLangName, cStemmingMethod)
    dir_util.mkpath("grammalecte/_dictionaries")
    oDAWG.writeInfo("grammalecte/_dictionaries/" + sDicName + ".info.txt")
    oDAWG.createBinary("grammalecte/_dictionaries/" + sDicName + ".bdic", int(nCompressMethod))
    if bJSON:
        dir_util.mkpath("grammalecte-js/_dictionaries")
        oDic = IBDAWG(sDicName + ".bdic")
        oDic.writeAsJSObject("grammalecte-js/_dictionaries/" + sDicName + ".json")
        oDic.writeAsJSObject("grammalecte-js/_dictionaries/" + sDicName + ".json", bBinaryDictAsHexString=True)


def main ():
    xParser = argparse.ArgumentParser()
    xParser.add_argument("src_lexicon", type=str, help="path and file name of the source lexicon")
    xParser.add_argument("lang_name", type=str, help="language name")
    xParser.add_argument("dic_name", type=str, help="dictionary file name (without extension)")