Grammalecte  Check-in [ac98115a31]

Overview
Comment:[graphspell][build] dawg builder: new parameters + consistency
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | build | graphspell
Files: files | file ages | folders
SHA3-256: ac98115a3112c7b2725ac9aa0a4e8ef89e420a6de84b38732c368b05ad13a45a
User & Date: olr on 2018-02-11 10:20:42
Other Links: manifest | tags
Context
2018-02-11
16:06
[graphspell][py] ibdawg: initialization from binary file or JSON check-in: 96290e4468 user: olr tags: trunk, graphspell
10:20
[graphspell][build] dawg builder: new parameters + consistency check-in: ac98115a31 user: olr tags: trunk, build, graphspell
09:22
[graphspell] JSON data clarification check-in: 9c995c9f7f user: olr tags: trunk, graphspell
Changes

Modified gc_core/js/lang_core/gc_engine.js from [c5ee3ec605] to [2f5e964b5d].

319
320
321
322
323
324
325
326

327
328

329
330
331
332
333
334
335
319
320
321
322
323
324
325

326
327

328
329
330
331
332
333
334
335







-
+

-
+








    //// Initialization

    load: function (sContext="JavaScript", sPath="") {
        try {
            if (typeof(require) !== 'undefined') {
                var ibdawg = require("resource://grammalecte/graphspell/ibdawg.js");
                _oDict = new ibdawg.IBDAWG("${dic_name}.json");
                _oDict = new ibdawg.IBDAWG("${dic_filename}.json");
            } else {
                _oDict = new IBDAWG("${dic_name}.json", sPath);
                _oDict = new IBDAWG("${dic_filename}.json", sPath);
            }
            _sAppContext = sContext;
            _dOptions = gc_options.getOptions(sContext).gl_shallowCopy();     // duplication necessary, to be able to reset to default
        }
        catch (e) {
            helpers.logerror(e);
        }

Modified gc_core/py/lang_core/gc_engine.py from [e1c3ad1859] to [9fc11201d4].

288
289
290
291
292
293
294
295

296
297
298
299
300
301
302
288
289
290
291
292
293
294

295
296
297
298
299
300
301
302







-
+









def load (sContext="Python"):
    global _oDict
    global _sAppContext
    global _dOptions
    try:
        _oDict = IBDAWG("${dic_name}.bdic")
        _oDict = IBDAWG("${dic_filename}.bdic")
        _sAppContext = sContext
        _dOptions = dict(gc_options.getOptions(sContext))   # duplication necessary, to be able to reset to default
    except:
        traceback.print_exc()


def setOption (sOpt, bVal):

Modified gc_lang/fr/build.py from [9eae4b1757] to [248d1906b2].

75
76
77
78
79
80
81
82

83
84
85
86
87
88
89
75
76
77
78
79
80
81

82
83
84
85
86
87
88
89







-
+








def createThunderbirdExtension (sLang, dVars, spLangPack):
    "create extension for Thunderbird"
    print("Building extension for Thunderbird")
    sExtensionName = dVars['tb_identifier'] + "-v" + dVars['version'] + '.xpi'
    spfZip = "_build/" + sExtensionName
    hZip = zipfile.ZipFile(spfZip, mode='w', compression=zipfile.ZIP_DEFLATED)
    _copyGrammalecteJSPackageInZipFile(hZip, spLangPack, dVars['dic_name']+".json")
    _copyGrammalecteJSPackageInZipFile(hZip, spLangPack, dVars['dic_filename']+".json")
    for spf in ["LICENSE.txt", "LICENSE.fr.txt"]:
        hZip.write(spf)
    dVars = _createOptionsForThunderbird(dVars)
    helpers.addFolderToZipAndFileFile(hZip, "gc_lang/"+sLang+"/tb", "", dVars, True)
    spDict = "gc_lang/"+sLang+"/xpi/data/dictionaries"
    for sp in os.listdir(spDict):
        if os.path.isdir(spDict+"/"+sp):

Modified gc_lang/fr/config.ini from [874aca7947] to [a48aa6be74].

12
13
14
15
16
17
18


19
20

21
22
23
24
25
26
27
12
13
14
15
16
17
18
19
20
21

22
23
24
25
26
27
28
29







+
+

-
+







link = http://grammalecte.net
description = Correcteur grammatical pour le français.
extras = README_fr.txt
logo = logo.png

# lexicon source
lexicon_src = lexicons/French.lex
# binary dictionary file name
dic_filename = fr
# binary dictionary name
dic_name = fr
dic_name = French
# Finite state automaton compression: 1, 2 (experimental) or 3 (experimental)
fsa_method = 1
# stemming method: S for suffixes only, A for prefixes and suffixes
stemming_method = S

# LibreOffice
unopkg = C:/Program Files/LibreOffice 5/program/unopkg.com

Modified gc_lang/fr/modules/tests.py from [f1c386c508] to [43d45242b9].

20
21
22
23
24
25
26
27

28
29
30
31
32
33
34
20
21
22
23
24
25
26

27
28
29
30
31
32
33
34







-
+







    return s.replace("\u2019", "'").replace("\u2013", "–").replace("\u2014", "—")


class TestDictionary (unittest.TestCase):

    @classmethod
    def setUpClass (cls):
        cls.oDic = IBDAWG("${dic_name}.bdic")
        cls.oDic = IBDAWG("${dic_filename}.bdic")

    def test_lookup (self):
        for sWord in ["branche", "Émilie"]:
            self.assertTrue(self.oDic.lookup(sWord), sWord)

    def test_lookup_failed (self):
        for sWord in ["Branche", "BRANCHE", "BranchE", "BRanche", "BRAnCHE", "émilie"]:

Modified gc_lang/fr/webext/panel/lex_editor.js from [eea97bdcc4] to [83999992e4].

596
597
598
599
600
601
602
603

604
605
606
607
608
609
610
596
597
598
599
600
601
602

603
604
605
606
607
608
609
610







-
+







        oWidgets.setDictData(lEntry.length, oJSON.sDate);
        oWidgets.showElement("export_button");
    },

    build: function (lEntry) {
        oWidgets.showElement("build_progress");
        let xProgressNode = document.getElementById("build_progress");
        let oDAWG = new DAWG(lEntry, "Français - dictionnaire personnel", "S", xProgressNode);
        let oDAWG = new DAWG(lEntry, "S", "fr", "Français", "Dictionnaire personnel", xProgressNode);
        this.oJSON = oDAWG.createBinary(1);
        this.save();
        oWidgets.hideElement("build_progress");
        oWidgets.showElement("export_button");
    },

    save: function () {

Modified graphspell-js/dawg.js from [287e75028f] to [4f989404b7].

24
25
26
27
28
29
30
31

32
33
34
35
36
37
38
24
25
26
27
28
29
30

31
32
33
34
35
36
37
38







-
+







        This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)
        We store suffix/affix codes and tags within the graph after the “real” word.
        A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags]
        Each arc is an index in this.lArcVal, where are stored characters, suffix/affix codes for stemming and tags.
        Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final.
    */

    constructor (lEntrySrc, sLangCode, sLangName, sDicName, cStemming, xProgressBarNode=null) {
    constructor (lEntrySrc, cStemming, sLangCode, sLangName="", sDicName="", xProgressBarNode=null) {
        console.log("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton =====");
        let funcStemmingGen = null;
        switch (cStemming.toUpperCase()) {
            case "A":
                funcStemmingGen = str_transform.defineAffixCode; break;
            case "S":
                funcStemmingGen = str_transform.defineSuffixCode; break;
374
375
376
377
378
379
380
381

382
383
384
385
386
387
388
374
375
376
377
378
379
380

381
382
383
384
385
386
387
388







-
+







        let oJSON = {
            "sHeader": "/pyfsa/",
            "sLangCode": this.sLangCode,
            "sLangName": this.sLangName,
            "sDicName": this.sDicName,
            "sFileName": "[none]",
            "sDate": this._getDate(),
            "nEntries": this.nEntry,
            "nEntry": this.nEntry,
            "nChar": this.nChar,
            "nAff": this.nAff,
            "nTag": this.nTag,
            "cStemming": this.cStemming,
            "dChar": helpers.mapToObject(this.dChar),
            "nNode": this.nNode,
            "nArc": this.nArc,

Modified graphspell-js/ibdawg.js from [35c6ada92f] to [4ebdc2968e].

99
100
101
102
103
104
105
106

107
108
109
110
111
112
113
99
100
101
102
103
104
105

106
107
108
109
110
111
112
113







-
+







        }
        catch (e) {
            throw Error("# Error. File not found or not loadable.\n" + e.message + "\n");
        }
        /*
            Properties:
            sName, nCompressionMethod, sHeader, lArcVal, nArcVal, sByDic, sLang, nChar, nBytesArc, nBytesNodeAddress,
            nEntries, nNode, nArc, nAff, cStemming, nTag, dChar, nBytesOffset,
            nEntry, nNode, nArc, nAff, cStemming, nTag, dChar, nBytesOffset,
        */

        /*
            Bug workaround.
            Mozilla’s JS parser sucks. Can’t read file bigger than 4 Mb!
            So we convert huge hexadecimal string to list of numbers…
            https://github.com/mozilla/addons-linter/issues/1361
175
176
177
178
179
180
181
182

183
184
185
186
187
188
189
175
176
177
178
179
180
181

182
183
184
185
186
187
188
189







-
+







        this.bOptNumAtLast = false;
    }

    getInfo () {
        return  `  Language: ${this.sLangName}   Lang code: ${this.sLangCode}   Dictionary name: ${this.sDicName}\n` +
                `  Compression method: ${this.nCompressionMethod}   Date: ${this.sDate}   Stemming: ${this.cStemming}FX\n` +
                `  Arcs values:  ${this.nArcVal} = ${this.nChar} characters,  ${this.nAff} affixes,  ${this.nTag} tags\n` +
                `  Dictionary: ${this.nEntries} entries,    ${this.nNode} nodes,   ${this.nArc} arcs\n` +
                `  Dictionary: ${this.nEntry} entries,    ${this.nNode} nodes,   ${this.nArc} arcs\n` +
                `  Address size: ${this.nBytesNodeAddress} bytes,  Arc size: ${this.nBytesArc} bytes\n`;
    }

    isValidToken (sToken) {
        // checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)
        if (this.isValid(sToken)) {
            return true;

Modified graphspell/dawg.py from [8edab9530f] to [daf4f76e4f].

37
38
39
40
41
42
43
44

45
46
47
48
49
50
51
37
38
39
40
41
42
43

44
45
46
47
48
49
50
51







-
+







    """DIRECT ACYCLIC WORD GRAPH"""
    # This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)
    # We store suffix/affix codes and tags within the graph after the “real” word.
    # A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags]
    # Each arc is an index in self.lArcVal, where are stored characters, suffix/affix codes for stemming and tags.
    # Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final.

    def __init__ (self, spfSrc, sLangCode, sLangName, sDicName, cStemming):
    def __init__ (self, spfSrc, cStemming, sLangCode, sLangName="", sDicName=""):
        print("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton =====")
        cStemming = cStemming.upper()
        if cStemming == "A":
            funcStemmingGen = st.defineAffixCode
        elif cStemming == "S":
            funcStemmingGen = st.defineSuffixCode
        elif cStemming == "N":
412
413
414
415
416
417
418
419

420
421
422
423
424
425
426
427
428
429
430
431
432

433
434
435
436

437
438
439
440
441
442
443
412
413
414
415
416
417
418

419
420
421
422
423
424
425
426
427
428
429
430
431

432
433
434
435

436
437
438
439
440
441
442
443







-
+












-
+



-
+







            hDst.write(json.dumps({
                            "sHeader": "/pyfsa/",
                            "sLangCode": self.sLangCode,
                            "sLangName": self.sLangName,
                            "sDicName": self.sDicName,
                            "sFileName": self.sFileName,
                            "sDate": str(datetime.datetime.now())[:-7],
                            "nEntries": self.nEntry,
                            "nEntry": self.nEntry,
                            "nChar": self.nChar,
                            "nAff": self.nAff,
                            "nTag": self.nTag,
                            "cStemming": self.cStemming,
                            "dChar": self.dChar,
                            "nNode": self.nNode,
                            "nArc": self.nArc,
                            "nArcVal": self.nArcVal,
                            "lArcVal": self.lArcVal,
                            "nCompressionMethod": nCompressionMethod,
                            "nBytesArc": self.nBytesArc,
                            "nBytesNodeAddress": self.nBytesNodeAddress,
                            "nBytesOffset": self.nBytesOffset
                            "nBytesOffset": self.nBytesOffset,
                            # JavaScript is a pile of shit, so Mozilla’s JS parser don’t like file bigger than 4 Mb!
                            # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension.
                            # https://github.com/mozilla/addons-linter/issues/1361
                            "sByDic": byDic.hex()  if bBinaryDictAsHexString  else [ e  for e in byDic ],
                            "sByDic": byDic.hex()  if bBinaryDictAsHexString  else [ e  for e in byDic ]
                        }, ensure_ascii=False))
            if bInJSModule:
                hDst.write(";\n\nexports.dictionary = dictionary;\n")

    def _writeBinary (self, sPathFile, nCompressionMethod):
        """
        Format of the binary indexable dictionary:

Modified graphspell/ibdawg.py from [433de414a7] to [4820a60f14].

74
75
76
77
78
79
80
81
82


83
84

85
86
87
88
89
90
91
92
93
94
95

96
97
98
99
100
101
102
103



104
105
106
107

108
109
110
111
112
113
114
74
75
76
77
78
79
80


81
82
83

84
85
86
87
88
89
90
91
92
93
94

95
96
97
98
99
100
101
102

103
104
105
106
107
108

109
110
111
112
113
114
115
116







-
-
+
+

-
+










-
+







-
+
+
+



-
+







        self.aSugg.clear()
        self.dSugg.clear()


class IBDAWG:
    """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""

    def __init__ (self, sDicName):
        self.by = pkgutil.get_data(__package__, "_dictionaries/" + sDicName)
    def __init__ (self, sfDict):
        self.by = pkgutil.get_data(__package__, "_dictionaries/" + sfDict)
        if not self.by:
            raise OSError("# Error. File not found or not loadable: "+sDicName)
            raise OSError("# Error. File not found or not loadable: "+sfDict)

        if self.by[0:7] != b"/pyfsa/":
            raise TypeError("# Error. Not a pyfsa binary dictionary. Header: {}".format(self.by[0:9]))
        if not(self.by[7:8] == b"1" or self.by[7:8] == b"2" or self.by[7:8] == b"3"):
            raise ValueError("# Error. Unknown dictionary version: {}".format(self.by[7:8]))
        try:
            header, info, values, bdic = self.by.split(b"\0\0\0\0", 3)
        except Exception:
            raise Exception

        self.sName = sDicName
        self.sFileName = sfDict
        self.nCompressionMethod = int(self.by[7:8].decode("utf-8"))
        self.sHeader = header.decode("utf-8")
        self.lArcVal = values.decode("utf-8").split("\t")
        self.nArcVal = len(self.lArcVal)
        self.byDic = bdic

        l = info.decode("utf-8").split("/")
        self.sLang = l[0]
        self.sLangCode = "xx"
        self.sLangName = l[0]
        self.sDicName = ""
        self.nChar = int(l[1])
        self.nBytesArc = int(l[2])
        self.nBytesNodeAddress = int(l[3])
        self.nEntries = int(l[4])
        self.nEntry = int(l[4])
        self.nNode = int(l[5])
        self.nArc = int(l[6])
        self.nAff = int(l[7])
        self.cStemming = l[8]
        if self.cStemming == "S":
            self.funcStemming = st.changeWordWithSuffixCode
        elif self.cStemming == "A":
154
155
156
157
158
159
160
161

162
163
164
165
166
167
168
169
170



171
172


173

174
175







176





177
178
179
180

181
182
183
184
185
186
187
188
189
190
191
192
193
194
195




196
197
198
199
200
201
202
156
157
158
159
160
161
162

163
164
165
166
167
168
169
170
171
172
173
174
175


176
177
178
179


180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195

196












197
198
199
200
201
202
203
204
205
206
207
208
209
210







-
+









+
+
+
-
-
+
+

+
-
-
+
+
+
+
+
+
+

+
+
+
+
+



-
+
-
-
-
-
-
-
-
-
-
-
-
-



+
+
+
+







        self.bOptNumSigle = False
        self.bOptNumAtLast = False

    def getInfo (self):
        return  "  Language: {0.sLangName}   Lang code: {0.sLangCode}   Dictionary name: {0.sDicName}" \
                "  Compression method: {0.nCompressionMethod:>2}   Date: {0.sDate}   Stemming: {0.cStemming}FX\n" \
                "  Arcs values:  {0.nArcVal:>10,} = {0.nChar:>5,} characters,  {0.nAff:>6,} affixes,  {0.nTag:>6,} tags\n" \
                "  Dictionary: {0.nEntries:>12,} entries,    {0.nNode:>11,} nodes,   {0.nArc:>11,} arcs\n" \
                "  Dictionary: {0.nEntry:>12,} entries,    {0.nNode:>11,} nodes,   {0.nArc:>11,} arcs\n" \
                "  Address size: {0.nBytesNodeAddress:>1} bytes,  Arc size: {0.nBytesArc:>1} bytes\n".format(self)

    def writeAsJSObject (self, spfDest, bInJSModule=False, bBinaryDictAsHexString=False):
        "write IBDAWG as a JavaScript object in a JavaScript module"
        import json
        with open(spfDest, "w", encoding="utf-8", newline="\n") as hDst:
            if bInJSModule:
                hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ')
            hDst.write(json.dumps({
                            "sHeader": "/pyfsa/",
                            "sLangCode": self.sLangCode,
                            "sLangName": self.sLangName,
                            "sName": self.sName,
                            "nCompressionMethod": self.nCompressionMethod,
                            "sDicName": self.sDicName,
                            "sFileName": self.sFileName,
                            "sDate": str(datetime.datetime.now())[:-7],
                            "nEntry": self.nEntry,
                            "sHeader": self.sHeader,
                            "lArcVal": self.lArcVal,
                            "nChar": self.nChar,
                            "nAff": self.nAff,
                            "nTag": self.nTag,
                            "cStemming": self.cStemming,
                            "dChar": self.dChar,
                            "nNode": self.nNode,
                            "nArc": self.nArc,
                            "nArcVal": self.nArcVal,
                            "lArcVal": self.lArcVal,
                            "nCompressionMethod": self.nCompressionMethod,
                            "nBytesArc": self.nBytesArc,
                            "nBytesNodeAddress": self.nBytesNodeAddress,
                            "nBytesOffset": self.nBytesOffset,
                            # JavaScript is a pile of shit, so Mozilla’s JS parser don’t like file bigger than 4 Mb!
                            # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension.
                            # https://github.com/mozilla/addons-linter/issues/1361
                            "byDic": self.byDic.hex()  if bBinaryDictAsHexString  else [ e  for e in self.byDic ],
                            "sByDic": self.byDic.hex()  if bBinaryDictAsHexString  else [ e  for e in self.byDic ]
                            "sLang": self.sLang,
                            "nChar": self.nChar,
                            "nBytesArc": self.nBytesArc,
                            "nBytesNodeAddress": self.nBytesNodeAddress,
                            "nEntries": self.nEntries,
                            "nNode": self.nNode,
                            "nArc": self.nArc,
                            "nAff": self.nAff,
                            "cStemming": self.cStemming,
                            "nTag": self.nTag,
                            "dChar": self.dChar,
                            "nBytesOffset": self.nBytesOffset
                        }, ensure_ascii=False))
            if bInJSModule:
                hDst.write(";\n\nexports.dictionary = dictionary;\n")

                            
                            


    def isValidToken (self, sToken):
        "checks if <sToken> is valid (if there is hyphens in <sToken>, <sToken> is split, each part is checked)"
        if self.isValid(sToken):
            return True
        if "-" in sToken:
            if sToken.count("-") > 4:

Modified lex_build.py from [57c70320f0] to [41cc230f34].

1
2
3
4
5
6
7
8
9
10
11
12

13
14

15
16
17


18
19
20
21


22
23
24
25
26

27
28

29
30
31
32
33

34
35
36
37
1
2
3
4
5
6
7
8
9
10
11

12
13

14
15


16
17
18
19


20
21
22
23
24
25
26
27
28

29
30
31
32
33

34
35
36
37
38











-
+

-
+

-
-
+
+


-
-
+
+





+

-
+




-
+




#!python3

# Lexicon builder

import argparse
from distutils import dir_util

import graphspell.dawg as fsa
from graphspell.ibdawg import IBDAWG


def build (spfSrc, sLangName, sDicName, bJSON=False, cStemmingMethod="S", nCompressMethod=1):
def build (spfSrc, sLangCode, sLangName, sfDict, bJSON=False, sDicName="", cStemmingMethod="S", nCompressMethod=1):
    "transform a text lexicon as a binary indexable dictionary"
    oDAWG = fsa.DAWG(spfSrc, sLangName, cStemmingMethod)
    oDAWG = fsa.DAWG(spfSrc, cStemmingMethod, sLangCode, sLangName, sDicName)
    dir_util.mkpath("graphspell/_dictionaries")
    oDAWG.writeInfo("graphspell/_dictionaries/" + sDicName + ".info.txt")
    oDAWG.createBinary("graphspell/_dictionaries/" + sDicName + ".bdic", int(nCompressMethod))
    oDAWG.writeInfo("graphspell/_dictionaries/" + sfDict + ".info.txt")
    oDAWG.createBinary("graphspell/_dictionaries/" + sfDict + ".bdic", int(nCompressMethod))
    if bJSON:
        dir_util.mkpath("graphspell-js/_dictionaries")
        oDic = IBDAWG(sDicName + ".bdic")
        oDic.writeAsJSObject("graphspell-js/_dictionaries/" + sDicName + ".json", bBinaryDictAsHexString=True)
        oDic = IBDAWG(sfDict + ".bdic")
        oDic.writeAsJSObject("graphspell-js/_dictionaries/" + sfDict + ".json", bBinaryDictAsHexString=True)


def main ():
    xParser = argparse.ArgumentParser()
    xParser.add_argument("src_lexicon", type=str, help="path and file name of the source lexicon")
    xParser.add_argument("lang_code", type=str, help="language code")
    xParser.add_argument("lang_name", type=str, help="language name")
    xParser.add_argument("dic_name", type=str, help="dictionary file name (without extension)")
    xParser.add_argument("dic_filename", type=str, help="dictionary file name (without extension)")
    xParser.add_argument("-js", "--json", help="Build dictionary in JSON", action="store_true")
    xParser.add_argument("-s", "--stemming", help="stemming method: S=suffixes, A=affixes, N=no stemming", type=str, choices=["S", "A", "N"], default="S")
    xParser.add_argument("-c", "--compress", help="compression method: 1, 2 (beta), 3, (beta)", type=int, choices=[1, 2, 3], default=1)
    xArgs = xParser.parse_args()
    build(xArgs.src_lexicon, xArgs.lang_name, xArgs.dic_name, xArgs.json)
    build(xArgs.src_lexicon, xArgs.lang_code, xArgs.lang_name, xArgs.dic_filename, xArgs.json)
    

if __name__ == '__main__':
    main()

Modified make.py from [72eca8b4d4] to [b310b82ca7].

74
75
76
77
78
79
80
81

82
83
84
85
86
87
88
74
75
76
77
78
79
80

81
82
83
84
85
86
87
88







-
+







def createOXT (spLang, dVars, dOxt, spLangPack, bInstall):
    "create extension for Writer"
    print("Building extension for Writer")
    spfZip = "_build/" + dVars['name'] + "-"+ dVars['lang'] +"-v" + dVars['version'] + '.oxt'
    hZip = zipfile.ZipFile(spfZip, mode='w', compression=zipfile.ZIP_DEFLATED)

    # Package and parser
    copyGrammalectePyPackageInZipFile(hZip, spLangPack, dVars['dic_name']+".bdic", "pythonpath/")
    copyGrammalectePyPackageInZipFile(hZip, spLangPack, dVars['dic_filename']+".bdic", "pythonpath/")
    hZip.write("grammalecte-cli.py", "pythonpath/grammalecte-cli.py")

    # Extension files
    hZip.writestr("META-INF/manifest.xml", helpers.fileFile("gc_core/py/oxt/manifest.xml", dVars))
    hZip.writestr("description.xml", helpers.fileFile("gc_core/py/oxt/description.xml", dVars))
    hZip.writestr("Linguistic.xcu", helpers.fileFile("gc_core/py/oxt/Linguistic.xcu", dVars))
    hZip.writestr("Grammalecte.py", helpers.fileFile("gc_core/py/oxt/Grammalecte.py", dVars))
152
153
154
155
156
157
158
159

160
161
162
163
164
165
166
167

168
169
170
171
172
173
174

175
176
177
178
179
180
181
152
153
154
155
156
157
158

159
160
161
162
163
164
165
166

167
168
169
170
171
172
173

174
175
176
177
178
179
180
181







-
+







-
+






-
+







        hDst.write("html = 1\n")


def createPackageZip (sLang, dVars, spLangPack):
    "create server zip"
    spfZip = "_build/" + dVars['name'] + "-"+ dVars['lang'] +"-v" + dVars['version'] + '.zip'
    hZip = zipfile.ZipFile(spfZip, mode='w', compression=zipfile.ZIP_DEFLATED)
    copyGrammalectePyPackageInZipFile(hZip, spLangPack, dVars['dic_name']+".bdic")
    copyGrammalectePyPackageInZipFile(hZip, spLangPack, dVars['dic_filename']+".bdic")
    for spf in ["grammalecte-cli.py", "grammalecte-server.py", "bottle.py", \
                "grammalecte-server-options._global.ini", "grammalecte-server-options."+sLang+".ini", \
                "README.txt", "LICENSE.txt", "LICENSE.fr.txt"]:
        hZip.write(spf)
    hZip.writestr("setup.py", helpers.fileFile("gc_lang/fr/setup.py", dVars))


def copyGrammalectePyPackageInZipFile (hZip, spLangPack, sDicName, sAddPath=""):
def copyGrammalectePyPackageInZipFile (hZip, spLangPack, sfDict, sAddPath=""):
    for sf in os.listdir("grammalecte"):
        if not os.path.isdir("grammalecte/"+sf):
            hZip.write("grammalecte/"+sf, sAddPath+"grammalecte/"+sf)
    for sf in os.listdir("grammalecte/graphspell"):
        if not os.path.isdir("grammalecte/graphspell/"+sf):
            hZip.write("grammalecte/graphspell/"+sf, sAddPath+"grammalecte/graphspell/"+sf)
    hZip.write("grammalecte/graphspell/_dictionaries/"+sDicName, sAddPath+"grammalecte/graphspell/_dictionaries/"+sDicName)
    hZip.write("grammalecte/graphspell/_dictionaries/"+sfDict, sAddPath+"grammalecte/graphspell/_dictionaries/"+sfDict)
    for sf in os.listdir(spLangPack):
        if not os.path.isdir(spLangPack+"/"+sf):
            hZip.write(spLangPack+"/"+sf, sAddPath+spLangPack+"/"+sf)


def create (sLang, xConfig, bInstallOXT, bJavaScript):
    oNow = datetime.datetime.now()
301
302
303
304
305
306
307
308
309


310
311
312
313
314
315
316
317
318
319


320
321
322
323
324
325
326
301
302
303
304
305
306
307


308
309
310
311
312
313
314
315
316
317
318

319
320
321
322
323
324
325
326
327







-
-
+
+









-
+
+







        for sf in os.listdir("graphspell-js"):
            if not os.path.isdir("graphspell-js/"+sf):
                file_util.copy_file("graphspell-js/"+sf, "grammalecte-js/graphspell")
                helpers.copyAndFileTemplate("graphspell-js/"+sf, "grammalecte-js/graphspell/"+sf, dVars)


def copyGraphspellDictionary (dVars, bJavaScript=False):
    spfPyDic = "graphspell/_dictionaries/"+dVars['dic_name']+".bdic"
    spfJSDic = "graphspell-js/_dictionaries/"+dVars['dic_name']+".json"
    spfPyDic = "graphspell/_dictionaries/"+dVars['dic_filename']+".bdic"
    spfJSDic = "graphspell-js/_dictionaries/"+dVars['dic_filename']+".json"
    if not os.path.isfile(spfPyDic) or (bJavaScript and not os.path.isfile(spfJSDic)):
        buildDictionary(dVars, bJavaScript)
    file_util.copy_file(spfPyDic, "grammalecte/graphspell/_dictionaries")
    file_util.copy_file(spfPyDic[:-5]+".info.txt", "grammalecte/graphspell/_dictionaries")
    if bJavaScript:
        file_util.copy_file(spfJSDic, "grammalecte-js/graphspell/_dictionaries")


def buildDictionary (dVars, bJavaScript):
    lex_build.build(dVars['lexicon_src'], dVars['lang_name'], dVars['dic_name'], bJavaScript, dVars['stemming_method'], int(dVars['fsa_method']))
    lex_build.build(dVars['lexicon_src'], dVars['lang'], dVars['lang_name'], dVars['dic_filename'], \
                    bJavaScript, dVars['dic_name'], dVars['stemming_method'], int(dVars['fsa_method']))


def main ():
    print("Python: " + sys.version)
    xParser = argparse.ArgumentParser()
    xParser.add_argument("lang", type=str, nargs='+', help="lang project to generate (name of folder in /lang)")
    xParser.add_argument("-b", "--build_data", help="launch build_data.py (part 1 and 2)", action="store_true")