Index: graphspell/ibdawg.py ================================================================== --- graphspell/ibdawg.py +++ graphspell/ibdawg.py @@ -176,45 +176,10 @@ " Compression method: {0.nCompressionMethod:>2} Date: {0.sDate} Stemming: {0.cStemming}FX\n" \ " Arcs values: {0.nArcVal:>10,} = {0.nChar:>5,} characters, {0.nAff:>6,} affixes, {0.nTag:>6,} tags\n" \ " Dictionary: {0.nEntry:>12,} entries, {0.nNode:>11,} nodes, {0.nArc:>11,} arcs\n" \ " Address size: {0.nBytesNodeAddress:>1} bytes, Arc size: {0.nBytesArc:>1} bytes\n".format(self) - def writeAsJSObject (self, spfDest, bInJSModule=False, bBinaryDictAsHexString=False): - "write IBDAWG as a JavaScript object in a JavaScript module" - with open(spfDest, "w", encoding="utf-8", newline="\n") as hDst: - if bInJSModule: - hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ') - hDst.write(json.dumps({ - "sHeader": "/grammalecte-fsa/", - "sLangCode": self.sLangCode, - "sLangName": self.sLangName, - "sDicName": self.sDicName, - "sDescription": self.sDescription, - "sFileName": self.sFileName, - "sDate": self.sDate, - "nEntry": self.nEntry, - "nChar": self.nChar, - "nAff": self.nAff, - "nTag": self.nTag, - "cStemming": self.cStemming, - "dChar": self.dChar, - "nNode": self.nNode, - "nArc": self.nArc, - "nArcVal": self.nArcVal, - "lArcVal": self.lArcVal, - "nCompressionMethod": self.nCompressionMethod, - "nBytesArc": self.nBytesArc, - "nBytesNodeAddress": self.nBytesNodeAddress, - # JavaScript is a pile of shit, so Mozilla’s JS parser don’t like file bigger than 4 Mb! - # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension. - # https://github.com/mozilla/addons-linter/issues/1361 - "sByDic": self.byDic.hex() if bBinaryDictAsHexString else [ e for e in self.byDic ], - "l2grams": list(self.a2grams) - }, ensure_ascii=False)) - if bInJSModule: - hDst.write(";\n\nexports.dictionary = dictionary;\n") - def isValidToken (self, sToken): "checks if is valid (if there is hyphens in , is split, each part is checked)" sToken = st.spellingNormalization(sToken) if self.isValid(sToken): return True Index: lex_build.py ================================================================== --- lex_build.py +++ lex_build.py @@ -13,18 +13,14 @@ def build (spfSrc, sLangCode, sLangName, sfDict, bJavaScript=False, sDicName="", sDescription="", sFilter="", cStemmingMethod="S", nCompressMethod=1): "transform a text lexicon as a binary indexable dictionary" oDAWG = fsa.DAWG(spfSrc, cStemmingMethod, sLangCode, sLangName, sDicName, sDescription, sFilter) dir_util.mkpath("graphspell/_dictionaries") - #oDAWG.writeInfo("graphspell/_dictionaries/" + sfDict + ".info.txt") - #oDAWG.writeBinary("graphspell/_dictionaries/" + sfDict + ".bdic", int(nCompressMethod)) oDAWG.writeAsJSObject("graphspell/_dictionaries/" + sfDict + ".json") if bJavaScript: dir_util.mkpath("graphspell-js/_dictionaries") oDAWG.writeAsJSObject("graphspell-js/_dictionaries/" + sfDict + ".json") - #oDic = IBDAWG(sfDict + ".bdic") - #oDic.writeAsJSObject("graphspell-js/_dictionaries/" + sfDict + ".json", bBinaryDictAsHexString=True) def main (): "parse args from CLI" xParser = argparse.ArgumentParser()