Comment: | [graphspell][build] dawg builder: new parameters + consistency |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | build | graphspell |
Files: | files | file ages | folders |
SHA3-256: |
ac98115a3112c7b2725ac9aa0a4e8ef8 |
User & Date: | olr on 2018-02-11 10:20:42 |
Other Links: | manifest | tags |
2018-02-11
| ||
16:06 | [graphspell][py] ibdawg: initialization from binary file or JSON check-in: 96290e4468 user: olr tags: trunk, graphspell | |
10:20 | [graphspell][build] dawg builder: new parameters + consistency check-in: ac98115a31 user: olr tags: trunk, build, graphspell | |
09:22 | [graphspell] JSON data clarification check-in: 9c995c9f7f user: olr tags: trunk, graphspell | |
Modified gc_core/js/lang_core/gc_engine.js from [c5ee3ec605] to [2f5e964b5d].
︙ | ︙ | |||
319 320 321 322 323 324 325 | //// Initialization load: function (sContext="JavaScript", sPath="") { try { if (typeof(require) !== 'undefined') { var ibdawg = require("resource://grammalecte/graphspell/ibdawg.js"); | | | | 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 | //// Initialization load: function (sContext="JavaScript", sPath="") { try { if (typeof(require) !== 'undefined') { var ibdawg = require("resource://grammalecte/graphspell/ibdawg.js"); _oDict = new ibdawg.IBDAWG("${dic_filename}.json"); } else { _oDict = new IBDAWG("${dic_filename}.json", sPath); } _sAppContext = sContext; _dOptions = gc_options.getOptions(sContext).gl_shallowCopy(); // duplication necessary, to be able to reset to default } catch (e) { helpers.logerror(e); } |
︙ | ︙ |
Modified gc_core/py/lang_core/gc_engine.py from [e1c3ad1859] to [9fc11201d4].
︙ | ︙ | |||
288 289 290 291 292 293 294 | def load (sContext="Python"): global _oDict global _sAppContext global _dOptions try: | | | 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 | def load (sContext="Python"): global _oDict global _sAppContext global _dOptions try: _oDict = IBDAWG("${dic_filename}.bdic") _sAppContext = sContext _dOptions = dict(gc_options.getOptions(sContext)) # duplication necessary, to be able to reset to default except: traceback.print_exc() def setOption (sOpt, bVal): |
︙ | ︙ |
Modified gc_lang/fr/build.py from [9eae4b1757] to [248d1906b2].
︙ | ︙ | |||
75 76 77 78 79 80 81 | def createThunderbirdExtension (sLang, dVars, spLangPack): "create extension for Thunderbird" print("Building extension for Thunderbird") sExtensionName = dVars['tb_identifier'] + "-v" + dVars['version'] + '.xpi' spfZip = "_build/" + sExtensionName hZip = zipfile.ZipFile(spfZip, mode='w', compression=zipfile.ZIP_DEFLATED) | | | 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | def createThunderbirdExtension (sLang, dVars, spLangPack): "create extension for Thunderbird" print("Building extension for Thunderbird") sExtensionName = dVars['tb_identifier'] + "-v" + dVars['version'] + '.xpi' spfZip = "_build/" + sExtensionName hZip = zipfile.ZipFile(spfZip, mode='w', compression=zipfile.ZIP_DEFLATED) _copyGrammalecteJSPackageInZipFile(hZip, spLangPack, dVars['dic_filename']+".json") for spf in ["LICENSE.txt", "LICENSE.fr.txt"]: hZip.write(spf) dVars = _createOptionsForThunderbird(dVars) helpers.addFolderToZipAndFileFile(hZip, "gc_lang/"+sLang+"/tb", "", dVars, True) spDict = "gc_lang/"+sLang+"/xpi/data/dictionaries" for sp in os.listdir(spDict): if os.path.isdir(spDict+"/"+sp): |
︙ | ︙ |
Modified gc_lang/fr/config.ini from [874aca7947] to [a48aa6be74].
︙ | ︙ | |||
12 13 14 15 16 17 18 19 | link = http://grammalecte.net description = Correcteur grammatical pour le français. extras = README_fr.txt logo = logo.png # lexicon source lexicon_src = lexicons/French.lex # binary dictionary name | > > | | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 | link = http://grammalecte.net description = Correcteur grammatical pour le français. extras = README_fr.txt logo = logo.png # lexicon source lexicon_src = lexicons/French.lex # binary dictionary file name dic_filename = fr # binary dictionary name dic_name = French # Finite state automaton compression: 1, 2 (experimental) or 3 (experimental) fsa_method = 1 # stemming method: S for suffixes only, A for prefixes and suffixes stemming_method = S # LibreOffice unopkg = C:/Program Files/LibreOffice 5/program/unopkg.com |
︙ | ︙ |
Modified gc_lang/fr/modules/tests.py from [f1c386c508] to [43d45242b9].
︙ | ︙ | |||
20 21 22 23 24 25 26 | return s.replace("\u2019", "'").replace("\u2013", "–").replace("\u2014", "—") class TestDictionary (unittest.TestCase): @classmethod def setUpClass (cls): | | | 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | return s.replace("\u2019", "'").replace("\u2013", "–").replace("\u2014", "—") class TestDictionary (unittest.TestCase): @classmethod def setUpClass (cls): cls.oDic = IBDAWG("${dic_filename}.bdic") def test_lookup (self): for sWord in ["branche", "Émilie"]: self.assertTrue(self.oDic.lookup(sWord), sWord) def test_lookup_failed (self): for sWord in ["Branche", "BRANCHE", "BranchE", "BRanche", "BRAnCHE", "émilie"]: |
︙ | ︙ |
Modified gc_lang/fr/webext/panel/lex_editor.js from [eea97bdcc4] to [83999992e4].
︙ | ︙ | |||
596 597 598 599 600 601 602 | oWidgets.setDictData(lEntry.length, oJSON.sDate); oWidgets.showElement("export_button"); }, build: function (lEntry) { oWidgets.showElement("build_progress"); let xProgressNode = document.getElementById("build_progress"); | | | 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 | oWidgets.setDictData(lEntry.length, oJSON.sDate); oWidgets.showElement("export_button"); }, build: function (lEntry) { oWidgets.showElement("build_progress"); let xProgressNode = document.getElementById("build_progress"); let oDAWG = new DAWG(lEntry, "S", "fr", "Français", "Dictionnaire personnel", xProgressNode); this.oJSON = oDAWG.createBinary(1); this.save(); oWidgets.hideElement("build_progress"); oWidgets.showElement("export_button"); }, save: function () { |
︙ | ︙ |
Modified graphspell-js/dawg.js from [287e75028f] to [4f989404b7].
︙ | ︙ | |||
24 25 26 27 28 29 30 | This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115) We store suffix/affix codes and tags within the graph after the “real” word. A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags] Each arc is an index in this.lArcVal, where are stored characters, suffix/affix codes for stemming and tags. Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final. */ | | | 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115) We store suffix/affix codes and tags within the graph after the “real” word. A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags] Each arc is an index in this.lArcVal, where are stored characters, suffix/affix codes for stemming and tags. Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final. */ constructor (lEntrySrc, cStemming, sLangCode, sLangName="", sDicName="", xProgressBarNode=null) { console.log("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton ====="); let funcStemmingGen = null; switch (cStemming.toUpperCase()) { case "A": funcStemmingGen = str_transform.defineAffixCode; break; case "S": funcStemmingGen = str_transform.defineSuffixCode; break; |
︙ | ︙ | |||
374 375 376 377 378 379 380 | let oJSON = { "sHeader": "/pyfsa/", "sLangCode": this.sLangCode, "sLangName": this.sLangName, "sDicName": this.sDicName, "sFileName": "[none]", "sDate": this._getDate(), | | | 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 | let oJSON = { "sHeader": "/pyfsa/", "sLangCode": this.sLangCode, "sLangName": this.sLangName, "sDicName": this.sDicName, "sFileName": "[none]", "sDate": this._getDate(), "nEntry": this.nEntry, "nChar": this.nChar, "nAff": this.nAff, "nTag": this.nTag, "cStemming": this.cStemming, "dChar": helpers.mapToObject(this.dChar), "nNode": this.nNode, "nArc": this.nArc, |
︙ | ︙ |
Modified graphspell-js/ibdawg.js from [35c6ada92f] to [4ebdc2968e].
︙ | ︙ | |||
99 100 101 102 103 104 105 | } catch (e) { throw Error("# Error. File not found or not loadable.\n" + e.message + "\n"); } /* Properties: sName, nCompressionMethod, sHeader, lArcVal, nArcVal, sByDic, sLang, nChar, nBytesArc, nBytesNodeAddress, | | | 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | } catch (e) { throw Error("# Error. File not found or not loadable.\n" + e.message + "\n"); } /* Properties: sName, nCompressionMethod, sHeader, lArcVal, nArcVal, sByDic, sLang, nChar, nBytesArc, nBytesNodeAddress, nEntry, nNode, nArc, nAff, cStemming, nTag, dChar, nBytesOffset, */ /* Bug workaround. Mozilla’s JS parser sucks. Can’t read file bigger than 4 Mb! So we convert huge hexadecimal string to list of numbers… https://github.com/mozilla/addons-linter/issues/1361 |
︙ | ︙ | |||
175 176 177 178 179 180 181 | this.bOptNumAtLast = false; } getInfo () { return ` Language: ${this.sLangName} Lang code: ${this.sLangCode} Dictionary name: ${this.sDicName}\n` + ` Compression method: ${this.nCompressionMethod} Date: ${this.sDate} Stemming: ${this.cStemming}FX\n` + ` Arcs values: ${this.nArcVal} = ${this.nChar} characters, ${this.nAff} affixes, ${this.nTag} tags\n` + | | | 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 | this.bOptNumAtLast = false; } getInfo () { return ` Language: ${this.sLangName} Lang code: ${this.sLangCode} Dictionary name: ${this.sDicName}\n` + ` Compression method: ${this.nCompressionMethod} Date: ${this.sDate} Stemming: ${this.cStemming}FX\n` + ` Arcs values: ${this.nArcVal} = ${this.nChar} characters, ${this.nAff} affixes, ${this.nTag} tags\n` + ` Dictionary: ${this.nEntry} entries, ${this.nNode} nodes, ${this.nArc} arcs\n` + ` Address size: ${this.nBytesNodeAddress} bytes, Arc size: ${this.nBytesArc} bytes\n`; } isValidToken (sToken) { // checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked) if (this.isValid(sToken)) { return true; |
︙ | ︙ |
Modified graphspell/dawg.py from [8edab9530f] to [daf4f76e4f].
︙ | ︙ | |||
37 38 39 40 41 42 43 | """DIRECT ACYCLIC WORD GRAPH""" # This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115) # We store suffix/affix codes and tags within the graph after the “real” word. # A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags] # Each arc is an index in self.lArcVal, where are stored characters, suffix/affix codes for stemming and tags. # Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final. | | | 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | """DIRECT ACYCLIC WORD GRAPH""" # This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115) # We store suffix/affix codes and tags within the graph after the “real” word. # A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags] # Each arc is an index in self.lArcVal, where are stored characters, suffix/affix codes for stemming and tags. # Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final. def __init__ (self, spfSrc, cStemming, sLangCode, sLangName="", sDicName=""): print("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton =====") cStemming = cStemming.upper() if cStemming == "A": funcStemmingGen = st.defineAffixCode elif cStemming == "S": funcStemmingGen = st.defineSuffixCode elif cStemming == "N": |
︙ | ︙ | |||
412 413 414 415 416 417 418 | hDst.write(json.dumps({ "sHeader": "/pyfsa/", "sLangCode": self.sLangCode, "sLangName": self.sLangName, "sDicName": self.sDicName, "sFileName": self.sFileName, "sDate": str(datetime.datetime.now())[:-7], | | | | | 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 | hDst.write(json.dumps({ "sHeader": "/pyfsa/", "sLangCode": self.sLangCode, "sLangName": self.sLangName, "sDicName": self.sDicName, "sFileName": self.sFileName, "sDate": str(datetime.datetime.now())[:-7], "nEntry": self.nEntry, "nChar": self.nChar, "nAff": self.nAff, "nTag": self.nTag, "cStemming": self.cStemming, "dChar": self.dChar, "nNode": self.nNode, "nArc": self.nArc, "nArcVal": self.nArcVal, "lArcVal": self.lArcVal, "nCompressionMethod": nCompressionMethod, "nBytesArc": self.nBytesArc, "nBytesNodeAddress": self.nBytesNodeAddress, "nBytesOffset": self.nBytesOffset, # JavaScript is a pile of shit, so Mozilla’s JS parser don’t like file bigger than 4 Mb! # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension. # https://github.com/mozilla/addons-linter/issues/1361 "sByDic": byDic.hex() if bBinaryDictAsHexString else [ e for e in byDic ] }, ensure_ascii=False)) if bInJSModule: hDst.write(";\n\nexports.dictionary = dictionary;\n") def _writeBinary (self, sPathFile, nCompressionMethod): """ Format of the binary indexable dictionary: |
︙ | ︙ |
Modified graphspell/ibdawg.py from [433de414a7] to [4820a60f14].
︙ | ︙ | |||
74 75 76 77 78 79 80 | self.aSugg.clear() self.dSugg.clear() class IBDAWG: """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH""" | | | | | | > > | | 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | self.aSugg.clear() self.dSugg.clear() class IBDAWG: """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH""" def __init__ (self, sfDict): self.by = pkgutil.get_data(__package__, "_dictionaries/" + sfDict) if not self.by: raise OSError("# Error. File not found or not loadable: "+sfDict) if self.by[0:7] != b"/pyfsa/": raise TypeError("# Error. Not a pyfsa binary dictionary. Header: {}".format(self.by[0:9])) if not(self.by[7:8] == b"1" or self.by[7:8] == b"2" or self.by[7:8] == b"3"): raise ValueError("# Error. Unknown dictionary version: {}".format(self.by[7:8])) try: header, info, values, bdic = self.by.split(b"\0\0\0\0", 3) except Exception: raise Exception self.sFileName = sfDict self.nCompressionMethod = int(self.by[7:8].decode("utf-8")) self.sHeader = header.decode("utf-8") self.lArcVal = values.decode("utf-8").split("\t") self.nArcVal = len(self.lArcVal) self.byDic = bdic l = info.decode("utf-8").split("/") self.sLangCode = "xx" self.sLangName = l[0] self.sDicName = "" self.nChar = int(l[1]) self.nBytesArc = int(l[2]) self.nBytesNodeAddress = int(l[3]) self.nEntry = int(l[4]) self.nNode = int(l[5]) self.nArc = int(l[6]) self.nAff = int(l[7]) self.cStemming = l[8] if self.cStemming == "S": self.funcStemming = st.changeWordWithSuffixCode elif self.cStemming == "A": |
︙ | ︙ | |||
154 155 156 157 158 159 160 | self.bOptNumSigle = False self.bOptNumAtLast = False def getInfo (self): return " Language: {0.sLangName} Lang code: {0.sLangCode} Dictionary name: {0.sDicName}" \ " Compression method: {0.nCompressionMethod:>2} Date: {0.sDate} Stemming: {0.cStemming}FX\n" \ " Arcs values: {0.nArcVal:>10,} = {0.nChar:>5,} characters, {0.nAff:>6,} affixes, {0.nTag:>6,} tags\n" \ | | > > > | | > | | > > > > > > > > > > | < < < < < < < < < < < < > > > > | 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 | self.bOptNumSigle = False self.bOptNumAtLast = False def getInfo (self): return " Language: {0.sLangName} Lang code: {0.sLangCode} Dictionary name: {0.sDicName}" \ " Compression method: {0.nCompressionMethod:>2} Date: {0.sDate} Stemming: {0.cStemming}FX\n" \ " Arcs values: {0.nArcVal:>10,} = {0.nChar:>5,} characters, {0.nAff:>6,} affixes, {0.nTag:>6,} tags\n" \ " Dictionary: {0.nEntry:>12,} entries, {0.nNode:>11,} nodes, {0.nArc:>11,} arcs\n" \ " Address size: {0.nBytesNodeAddress:>1} bytes, Arc size: {0.nBytesArc:>1} bytes\n".format(self) def writeAsJSObject (self, spfDest, bInJSModule=False, bBinaryDictAsHexString=False): "write IBDAWG as a JavaScript object in a JavaScript module" import json with open(spfDest, "w", encoding="utf-8", newline="\n") as hDst: if bInJSModule: hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ') hDst.write(json.dumps({ "sHeader": "/pyfsa/", "sLangCode": self.sLangCode, "sLangName": self.sLangName, "sDicName": self.sDicName, "sFileName": self.sFileName, "sDate": str(datetime.datetime.now())[:-7], "nEntry": self.nEntry, "nChar": self.nChar, "nAff": self.nAff, "nTag": self.nTag, "cStemming": self.cStemming, "dChar": self.dChar, "nNode": self.nNode, "nArc": self.nArc, "nArcVal": self.nArcVal, "lArcVal": self.lArcVal, "nCompressionMethod": self.nCompressionMethod, "nBytesArc": self.nBytesArc, "nBytesNodeAddress": self.nBytesNodeAddress, "nBytesOffset": self.nBytesOffset, # JavaScript is a pile of shit, so Mozilla’s JS parser don’t like file bigger than 4 Mb! # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension. # https://github.com/mozilla/addons-linter/issues/1361 "sByDic": self.byDic.hex() if bBinaryDictAsHexString else [ e for e in self.byDic ] }, ensure_ascii=False)) if bInJSModule: hDst.write(";\n\nexports.dictionary = dictionary;\n") def isValidToken (self, sToken): "checks if <sToken> is valid (if there is hyphens in <sToken>, <sToken> is split, each part is checked)" if self.isValid(sToken): return True if "-" in sToken: if sToken.count("-") > 4: |
︙ | ︙ |
Modified lex_build.py from [57c70320f0] to [41cc230f34].
1 2 3 4 5 6 7 8 9 10 11 | #!python3 # Lexicon builder import argparse from distutils import dir_util import graphspell.dawg as fsa from graphspell.ibdawg import IBDAWG | | | | | | | > | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | #!python3 # Lexicon builder import argparse from distutils import dir_util import graphspell.dawg as fsa from graphspell.ibdawg import IBDAWG def build (spfSrc, sLangCode, sLangName, sfDict, bJSON=False, sDicName="", cStemmingMethod="S", nCompressMethod=1): "transform a text lexicon as a binary indexable dictionary" oDAWG = fsa.DAWG(spfSrc, cStemmingMethod, sLangCode, sLangName, sDicName) dir_util.mkpath("graphspell/_dictionaries") oDAWG.writeInfo("graphspell/_dictionaries/" + sfDict + ".info.txt") oDAWG.createBinary("graphspell/_dictionaries/" + sfDict + ".bdic", int(nCompressMethod)) if bJSON: dir_util.mkpath("graphspell-js/_dictionaries") oDic = IBDAWG(sfDict + ".bdic") oDic.writeAsJSObject("graphspell-js/_dictionaries/" + sfDict + ".json", bBinaryDictAsHexString=True) def main (): xParser = argparse.ArgumentParser() xParser.add_argument("src_lexicon", type=str, help="path and file name of the source lexicon") xParser.add_argument("lang_code", type=str, help="language code") xParser.add_argument("lang_name", type=str, help="language name") xParser.add_argument("dic_filename", type=str, help="dictionary file name (without extension)") xParser.add_argument("-js", "--json", help="Build dictionary in JSON", action="store_true") xParser.add_argument("-s", "--stemming", help="stemming method: S=suffixes, A=affixes, N=no stemming", type=str, choices=["S", "A", "N"], default="S") xParser.add_argument("-c", "--compress", help="compression method: 1, 2 (beta), 3, (beta)", type=int, choices=[1, 2, 3], default=1) xArgs = xParser.parse_args() build(xArgs.src_lexicon, xArgs.lang_code, xArgs.lang_name, xArgs.dic_filename, xArgs.json) if __name__ == '__main__': main() |
Modified make.py from [72eca8b4d4] to [b310b82ca7].
︙ | ︙ | |||
74 75 76 77 78 79 80 | def createOXT (spLang, dVars, dOxt, spLangPack, bInstall): "create extension for Writer" print("Building extension for Writer") spfZip = "_build/" + dVars['name'] + "-"+ dVars['lang'] +"-v" + dVars['version'] + '.oxt' hZip = zipfile.ZipFile(spfZip, mode='w', compression=zipfile.ZIP_DEFLATED) # Package and parser | | | 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | def createOXT (spLang, dVars, dOxt, spLangPack, bInstall): "create extension for Writer" print("Building extension for Writer") spfZip = "_build/" + dVars['name'] + "-"+ dVars['lang'] +"-v" + dVars['version'] + '.oxt' hZip = zipfile.ZipFile(spfZip, mode='w', compression=zipfile.ZIP_DEFLATED) # Package and parser copyGrammalectePyPackageInZipFile(hZip, spLangPack, dVars['dic_filename']+".bdic", "pythonpath/") hZip.write("grammalecte-cli.py", "pythonpath/grammalecte-cli.py") # Extension files hZip.writestr("META-INF/manifest.xml", helpers.fileFile("gc_core/py/oxt/manifest.xml", dVars)) hZip.writestr("description.xml", helpers.fileFile("gc_core/py/oxt/description.xml", dVars)) hZip.writestr("Linguistic.xcu", helpers.fileFile("gc_core/py/oxt/Linguistic.xcu", dVars)) hZip.writestr("Grammalecte.py", helpers.fileFile("gc_core/py/oxt/Grammalecte.py", dVars)) |
︙ | ︙ | |||
152 153 154 155 156 157 158 | hDst.write("html = 1\n") def createPackageZip (sLang, dVars, spLangPack): "create server zip" spfZip = "_build/" + dVars['name'] + "-"+ dVars['lang'] +"-v" + dVars['version'] + '.zip' hZip = zipfile.ZipFile(spfZip, mode='w', compression=zipfile.ZIP_DEFLATED) | | | | | 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 | hDst.write("html = 1\n") def createPackageZip (sLang, dVars, spLangPack): "create server zip" spfZip = "_build/" + dVars['name'] + "-"+ dVars['lang'] +"-v" + dVars['version'] + '.zip' hZip = zipfile.ZipFile(spfZip, mode='w', compression=zipfile.ZIP_DEFLATED) copyGrammalectePyPackageInZipFile(hZip, spLangPack, dVars['dic_filename']+".bdic") for spf in ["grammalecte-cli.py", "grammalecte-server.py", "bottle.py", \ "grammalecte-server-options._global.ini", "grammalecte-server-options."+sLang+".ini", \ "README.txt", "LICENSE.txt", "LICENSE.fr.txt"]: hZip.write(spf) hZip.writestr("setup.py", helpers.fileFile("gc_lang/fr/setup.py", dVars)) def copyGrammalectePyPackageInZipFile (hZip, spLangPack, sfDict, sAddPath=""): for sf in os.listdir("grammalecte"): if not os.path.isdir("grammalecte/"+sf): hZip.write("grammalecte/"+sf, sAddPath+"grammalecte/"+sf) for sf in os.listdir("grammalecte/graphspell"): if not os.path.isdir("grammalecte/graphspell/"+sf): hZip.write("grammalecte/graphspell/"+sf, sAddPath+"grammalecte/graphspell/"+sf) hZip.write("grammalecte/graphspell/_dictionaries/"+sfDict, sAddPath+"grammalecte/graphspell/_dictionaries/"+sfDict) for sf in os.listdir(spLangPack): if not os.path.isdir(spLangPack+"/"+sf): hZip.write(spLangPack+"/"+sf, sAddPath+spLangPack+"/"+sf) def create (sLang, xConfig, bInstallOXT, bJavaScript): oNow = datetime.datetime.now() |
︙ | ︙ | |||
301 302 303 304 305 306 307 | for sf in os.listdir("graphspell-js"): if not os.path.isdir("graphspell-js/"+sf): file_util.copy_file("graphspell-js/"+sf, "grammalecte-js/graphspell") helpers.copyAndFileTemplate("graphspell-js/"+sf, "grammalecte-js/graphspell/"+sf, dVars) def copyGraphspellDictionary (dVars, bJavaScript=False): | | | | > | 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 | for sf in os.listdir("graphspell-js"): if not os.path.isdir("graphspell-js/"+sf): file_util.copy_file("graphspell-js/"+sf, "grammalecte-js/graphspell") helpers.copyAndFileTemplate("graphspell-js/"+sf, "grammalecte-js/graphspell/"+sf, dVars) def copyGraphspellDictionary (dVars, bJavaScript=False): spfPyDic = "graphspell/_dictionaries/"+dVars['dic_filename']+".bdic" spfJSDic = "graphspell-js/_dictionaries/"+dVars['dic_filename']+".json" if not os.path.isfile(spfPyDic) or (bJavaScript and not os.path.isfile(spfJSDic)): buildDictionary(dVars, bJavaScript) file_util.copy_file(spfPyDic, "grammalecte/graphspell/_dictionaries") file_util.copy_file(spfPyDic[:-5]+".info.txt", "grammalecte/graphspell/_dictionaries") if bJavaScript: file_util.copy_file(spfJSDic, "grammalecte-js/graphspell/_dictionaries") def buildDictionary (dVars, bJavaScript): lex_build.build(dVars['lexicon_src'], dVars['lang'], dVars['lang_name'], dVars['dic_filename'], \ bJavaScript, dVars['dic_name'], dVars['stemming_method'], int(dVars['fsa_method'])) def main (): print("Python: " + sys.version) xParser = argparse.ArgumentParser() xParser.add_argument("lang", type=str, nargs='+', help="lang project to generate (name of folder in /lang)") xParser.add_argument("-b", "--build_data", help="launch build_data.py (part 1 and 2)", action="store_true") |
︙ | ︙ |