Grammalecte  Check-in [9b380a48ce]

Overview
Comment:[core][fx][js] Binary dictionary as a hexadecimal string instead of a list of integers (JavaScript crap again: Mozilla’s parser can’t deal with file bigger than 4 Mb)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | core | fx
Files: files | file ages | folders
SHA3-256: 9b380a48cec69f7b8ba925ad8193ff0fea16b612fb767fe4483fda9a332e58de
User & Date: olr on 2017-09-19 11:34:40
Original Comment: [fx][js] Binary dictionary as a hexadecimal string instead of a list of integers (JavaScript crap again: Mozilla’s parser can’t deal with file bigger than 4 Mb)
Other Links: manifest | tags
Context
2017-09-19
11:36
[core][js] remove useless logs check-in: 8beaa4879a user: olr tags: trunk, core
11:34
[core][fx][js] Binary dictionary as a hexadecimal string instead of a list of integers (JavaScript crap again: Mozilla’s parser can’t deal with file bigger than 4 Mb) check-in: 9b380a48ce user: olr tags: trunk, core, fx
09:19
[fr] dictionary update: 6.2 check-in: 8bff04e371 user: olr tags: trunk, fr
Changes

Modified gc_core/js/ibdawg.js from [8b6ee4e0fe] to [5baf4dbff2].

31
32
33
34
35
36
37
















38
39
40
41
42
43
44
            throw Error("# Error. File not found or not loadable.\n" + e.message + "\n");
        }
        /*
            Properties:
            sName, nVersion, sHeader, lArcVal, nArcVal, byDic, sLang, nChar, nBytesArc, nBytesNodeAddress,
            nEntries, nNode, nArc, nAff, cStemming, nTag, dChar, _arcMask, _finalNodeMask, _lastArcMask, _addrBitMask, nBytesOffset,
        */
















        if (!this.sHeader.startsWith("/pyfsa/")) {
            throw TypeError("# Error. Not a pyfsa binary dictionary. Header: " + this.sHeader);
        }
        if (!(this.nVersion == "1" || this.nVersion == "2" || this.nVersion == "3")) {
            throw RangeError("# Error. Unknown dictionary version: " + this.nVersion);
        }
        // <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
            throw Error("# Error. File not found or not loadable.\n" + e.message + "\n");
        }
        /*
            Properties:
            sName, nVersion, sHeader, lArcVal, nArcVal, byDic, sLang, nChar, nBytesArc, nBytesNodeAddress,
            nEntries, nNode, nArc, nAff, cStemming, nTag, dChar, _arcMask, _finalNodeMask, _lastArcMask, _addrBitMask, nBytesOffset,
        */

        /*
            Bug workaround.
            Mozilla’s JS parser sucks. Can’t read file bigger than 4 Mb!
            So we convert huge hexadecimal string to list of numbers…
            https://github.com/mozilla/addons-linter/issues/1361
        */
        console.log(this.byDic);
        let lTemp = [];
        for (let i = 0;  i < this.byDic.length;  i+=2) {
            lTemp.push(parseInt(this.byDic.slice(i, i+2), 16));
        }
        this.byDic = lTemp;
        console.log("DONE");
        /* end of bug workaround */

        if (!this.sHeader.startsWith("/pyfsa/")) {
            throw TypeError("# Error. Not a pyfsa binary dictionary. Header: " + this.sHeader);
        }
        if (!(this.nVersion == "1" || this.nVersion == "2" || this.nVersion == "3")) {
            throw RangeError("# Error. Unknown dictionary version: " + this.nVersion);
        }
        // <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value

Modified gc_core/py/ibdawg.py from [8aac99b428] to [e68f28dc22].

96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114



115
116
117
118
119
120
121
122

    def getInfo (self):
        return  "  Language: {0.sLang:>10}      Version: {0.nVersion:>2}      Stemming: {0.cStemming}FX\n" \
                "  Arcs values:  {0.nArcVal:>10,} = {0.nChar:>5,} characters,  {0.nAff:>6,} affixes,  {0.nTag:>6,} tags\n" \
                "  Dictionary: {0.nEntries:>12,} entries,    {0.nNode:>11,} nodes,   {0.nArc:>11,} arcs\n" \
                "  Address size: {0.nBytesNodeAddress:>1} bytes,  Arc size: {0.nBytesArc:>1} bytes\n".format(self)

    def writeAsJSObject (self, spfDest, bInJSModule=False):
        "write IBDAWG as a JavaScript object in a JavaScript module"
        import json
        with open(spfDest, "w", encoding="utf-8", newline="\n") as hDst:
            if bInJSModule:
                hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ')
            hDst.write(json.dumps({
                            "sName": self.sName,
                            "nVersion": self.nVersion,
                            "sHeader": self.sHeader,
                            "lArcVal": self.lArcVal,
                            "nArcVal": self.nArcVal,



                            "byDic": [ e  for e in self.byDic ],
                            "sLang": self.sLang,
                            "nChar": self.nChar,
                            "nBytesArc": self.nBytesArc,
                            "nBytesNodeAddress": self.nBytesNodeAddress,
                            "nEntries": self.nEntries,
                            "nNode": self.nNode,
                            "nArc": self.nArc,







|











>
>
>
|







96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125

    def getInfo (self):
        return  "  Language: {0.sLang:>10}      Version: {0.nVersion:>2}      Stemming: {0.cStemming}FX\n" \
                "  Arcs values:  {0.nArcVal:>10,} = {0.nChar:>5,} characters,  {0.nAff:>6,} affixes,  {0.nTag:>6,} tags\n" \
                "  Dictionary: {0.nEntries:>12,} entries,    {0.nNode:>11,} nodes,   {0.nArc:>11,} arcs\n" \
                "  Address size: {0.nBytesNodeAddress:>1} bytes,  Arc size: {0.nBytesArc:>1} bytes\n".format(self)

    def writeAsJSObject (self, spfDest, bInJSModule=False, bBinaryDictAsHexString=False):
        "write IBDAWG as a JavaScript object in a JavaScript module"
        import json
        with open(spfDest, "w", encoding="utf-8", newline="\n") as hDst:
            if bInJSModule:
                hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ')
            hDst.write(json.dumps({
                            "sName": self.sName,
                            "nVersion": self.nVersion,
                            "sHeader": self.sHeader,
                            "lArcVal": self.lArcVal,
                            "nArcVal": self.nArcVal,
                            # JavaScript is a pile of shit, so Mozilla’s JS parser don’t like file bigger than 4 Mb!
                            # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension.
                            # https://github.com/mozilla/addons-linter/issues/1361
                            "byDic": self.byDic.hex()  if bBinaryDictAsHexString  else [ e  for e in self.byDic ],
                            "sLang": self.sLang,
                            "nChar": self.nChar,
                            "nBytesArc": self.nBytesArc,
                            "nBytesNodeAddress": self.nBytesNodeAddress,
                            "nEntries": self.nEntries,
                            "nNode": self.nNode,
                            "nArc": self.nArc,

Modified lex_build.py from [a7e2785f63] to [c786502779].

14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
    oDAWG = fsa.DAWG(spfSrc, sLangName, cStemmingMethod)
    dir_util.mkpath("grammalecte/_dictionaries")
    oDAWG.writeInfo("grammalecte/_dictionaries/" + sDicName + ".info.txt")
    oDAWG.createBinary("grammalecte/_dictionaries/" + sDicName + ".bdic", int(nCompressMethod))
    if bJSON:
        dir_util.mkpath("grammalecte-js/_dictionaries")
        oDic = IBDAWG(sDicName + ".bdic")
        oDic.writeAsJSObject("grammalecte-js/_dictionaries/" + sDicName + ".json")


def main ():
    xParser = argparse.ArgumentParser()
    xParser.add_argument("src_lexicon", type=str, help="path and file name of the source lexicon")
    xParser.add_argument("lang_name", type=str, help="language name")
    xParser.add_argument("dic_name", type=str, help="dictionary file name (without extension)")







|







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
    oDAWG = fsa.DAWG(spfSrc, sLangName, cStemmingMethod)
    dir_util.mkpath("grammalecte/_dictionaries")
    oDAWG.writeInfo("grammalecte/_dictionaries/" + sDicName + ".info.txt")
    oDAWG.createBinary("grammalecte/_dictionaries/" + sDicName + ".bdic", int(nCompressMethod))
    if bJSON:
        dir_util.mkpath("grammalecte-js/_dictionaries")
        oDic = IBDAWG(sDicName + ".bdic")
        oDic.writeAsJSObject("grammalecte-js/_dictionaries/" + sDicName + ".json", bBinaryDictAsHexString=True)


def main ():
    xParser = argparse.ArgumentParser()
    xParser.add_argument("src_lexicon", type=str, help="path and file name of the source lexicon")
    xParser.add_argument("lang_name", type=str, help="language name")
    xParser.add_argument("dic_name", type=str, help="dictionary file name (without extension)")