Index: gc_lang/fr/config.ini ================================================================== --- gc_lang/fr/config.ini +++ gc_lang/fr/config.ini @@ -14,10 +14,11 @@ logo = logo.png # lexicon source lexicon_src = lexicons/French.lex # binary dictionary name +dic_name = French py_binary_dic = French.bdic js_binary_dic = French.json # Finite state automaton compression: 1, 2 (experimental) or 3 (experimental) fsa_method = 1 # stemming method: S for suffixes only, A for prefixes and suffixes ADDED lex_build.py Index: lex_build.py ================================================================== --- lex_build.py +++ lex_build.py @@ -0,0 +1,29 @@ +#!python3 + +# Lexicon builder + +from distutils import dir_util + +import grammalecte.dawg as fsa +from grammalecte.ibdawg import IBDAWG + + +def build (spfSrc, sLangName, sDicName, cStemmingMethod, nCompressMethod, bJSON=False): + "transform a text lexicon as a binary indexable dictionary" + oDAWG = fsa.DAWG(spfSrc, sLangName, cStemmingMethod) + dir_util.mkpath("grammalecte/_dictionaries") + oDAWG.writeInfo("grammalecte/_dictionaries/" + sDicName + ".info.txt") + oDAWG.createBinary("grammalecte/_dictionaries/" + sDicName + ".bdic", int(nCompressMethod)) + if bJSON: + dir_util.mkpath("grammalecte-js/_dictionaries") + oDic = IBDAWG(sDicName + ".bdic") + #oDic.writeAsJSObject("gc_lang/"+sLang+"/modules-js/dictionary.js") + oDic.writeAsJSObject("grammalecte-js/_dictionaries/" + sDicName + ".json") + + +def main (): + print("todo") + + +if __name__ == '__main__': + main() Index: make.py ================================================================== --- make.py +++ make.py @@ -312,22 +312,12 @@ except ImportError: print("# Error. Couldn’t import file build_data.py in folder gc_lang/"+sLang) if build_data_module: build_data_module.before('gc_lang/'+sLang, dVars, xArgs.javascript) if xArgs.dict or not os.path.exists("grammalecte/_dictionaries"): - import grammalecte.dawg as fsa - from grammalecte.ibdawg import IBDAWG - # fsa builder - oDAWG = fsa.DAWG(dVars['lexicon_src'], dVars['lang_name'], dVars['stemming_method']) - dir_util.mkpath("grammalecte/_dictionaries") - oDAWG.writeInfo("grammalecte/_dictionaries/" + dVars['py_binary_dic'] + ".info.txt") - oDAWG.createBinary("grammalecte/_dictionaries/" + dVars['py_binary_dic'], int(dVars['fsa_method'])) - if xArgs.javascript: - dir_util.mkpath("grammalecte-js/_dictionaries") - oDic = IBDAWG(dVars['py_binary_dic']) - #oDic.writeAsJSObject("gc_lang/"+sLang+"/modules-js/dictionary.js") - oDic.writeAsJSObject("grammalecte-js/_dictionaries/"+dVars['js_binary_dic']) + import lex_build + lex_build.build(dVars['lexicon_src'], dVars['lang_name'], dVars['dic_name'], dVars['stemming_method'], int(dVars['fsa_method']), xArgs.javascript) if build_data_module: build_data_module.after('gc_lang/'+sLang, dVars, xArgs.javascript) # make sVersion = create(sLang, xConfig, xArgs.install, xArgs.javascript, )