Grammalecte  Check-in [03448dc173]

Overview
Comment:[build] separate dictionary builder from make.py
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | build
Files: files | file ages | folders
SHA3-256: 03448dc173fd9dfae33d9b65b3c00036b9ca3111be395a373966e080e1fc5659
User & Date: olr on 2017-06-23 12:19:55
Other Links: manifest | tags
Context
2017-06-23
12:55
[build] change arguments order for dictionary building check-in: 4bc364b3ac user: olr tags: trunk, build
12:19
[build] separate dictionary builder from make.py check-in: 03448dc173 user: olr tags: trunk, build
08:04
[fr] faux positif concernant les nombres suivant un déterminant pluriel check-in: 469f8c6d69 user: olr tags: trunk, fr
Changes

Modified gc_lang/fr/config.ini from [649530543c] to [5ccf0657e5].

12
13
14
15
16
17
18

19
20
21
22
23
24
25
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26







+







description = Correcteur grammatical pour le français.
extras = README_fr.txt
logo = logo.png

# lexicon source
lexicon_src = lexicons/French.lex
# binary dictionary name
dic_name = French
py_binary_dic = French.bdic
js_binary_dic = French.json
# Finite state automaton compression: 1, 2 (experimental) or 3 (experimental)
fsa_method = 1
# stemming method: S for suffixes only, A for prefixes and suffixes
stemming_method = S

Added lex_build.py version [e2774a881f].






























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
#!python3

# Lexicon builder

from distutils import dir_util

import grammalecte.dawg as fsa
from grammalecte.ibdawg import IBDAWG


def build (spfSrc, sLangName, sDicName, cStemmingMethod, nCompressMethod, bJSON=False):
    "transform a text lexicon as a binary indexable dictionary"
    oDAWG = fsa.DAWG(spfSrc, sLangName, cStemmingMethod)
    dir_util.mkpath("grammalecte/_dictionaries")
    oDAWG.writeInfo("grammalecte/_dictionaries/" + sDicName + ".info.txt")
    oDAWG.createBinary("grammalecte/_dictionaries/" + sDicName + ".bdic", int(nCompressMethod))
    if bJSON:
        dir_util.mkpath("grammalecte-js/_dictionaries")
        oDic = IBDAWG(sDicName + ".bdic")
        #oDic.writeAsJSObject("gc_lang/"+sLang+"/modules-js/dictionary.js")
        oDic.writeAsJSObject("grammalecte-js/_dictionaries/" + sDicName + ".json")


def main ():
    print("todo")


if __name__ == '__main__':
    main()

Modified make.py from [418a36d3bb] to [993f7b57c8].

310
311
312
313
314
315
316
317
318
319
320


321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
310
311
312
313
314
315
316




317
318








319
320
321
322
323
324
325







-
-
-
-
+
+
-
-
-
-
-
-
-
-







                try:
                    build_data_module = importlib.import_module("gc_lang."+sLang+".build_data")
                except ImportError:
                    print("# Error. Couldn’t import file build_data.py in folder gc_lang/"+sLang)
            if build_data_module:
                build_data_module.before('gc_lang/'+sLang, dVars, xArgs.javascript)
            if xArgs.dict or not os.path.exists("grammalecte/_dictionaries"):
                import grammalecte.dawg as fsa
                from grammalecte.ibdawg import IBDAWG
                # fsa builder
                oDAWG = fsa.DAWG(dVars['lexicon_src'], dVars['lang_name'], dVars['stemming_method'])
                import lex_build
                lex_build.build(dVars['lexicon_src'], dVars['lang_name'], dVars['dic_name'], dVars['stemming_method'], int(dVars['fsa_method']), xArgs.javascript)
                dir_util.mkpath("grammalecte/_dictionaries")
                oDAWG.writeInfo("grammalecte/_dictionaries/" + dVars['py_binary_dic'] + ".info.txt")
                oDAWG.createBinary("grammalecte/_dictionaries/" + dVars['py_binary_dic'], int(dVars['fsa_method']))
                if xArgs.javascript:
                    dir_util.mkpath("grammalecte-js/_dictionaries")
                    oDic = IBDAWG(dVars['py_binary_dic'])
                    #oDic.writeAsJSObject("gc_lang/"+sLang+"/modules-js/dictionary.js")
                    oDic.writeAsJSObject("grammalecte-js/_dictionaries/"+dVars['js_binary_dic'])
            if build_data_module:
                build_data_module.after('gc_lang/'+sLang, dVars, xArgs.javascript)

            # make
            sVersion = create(sLang, xConfig, xArgs.install, xArgs.javascript, )

            # tests