Index: gc_lang/fr/webext/panel/lex_editor.js ================================================================== --- gc_lang/fr/webext/panel/lex_editor.js +++ gc_lang/fr/webext/panel/lex_editor.js @@ -759,11 +759,11 @@ build: function () { let xProgressNode = document.getElementById("wait_progress"); let lEntry = oLexiconTable.getEntries(); if (lEntry.length > 0) { let oDAWG = new DAWG(lEntry, "S", "fr", "Français", this.sName, this.sDescription, xProgressNode); - let oJSON = oDAWG.createBinaryJSON(1); + let oJSON = oDAWG.createBinaryJSON(); oDictHandler.saveDictionary(this.sName, oJSON); this.oIBDAWG = new IBDAWG(oJSON); this.setDictData(this.oIBDAWG.nEntry, this.oIBDAWG.sDate); } else { oDictHandler.saveDictionary(this.sName, null); Index: graphspell-js/dawg.js ================================================================== --- graphspell-js/dawg.js +++ graphspell-js/dawg.js @@ -342,11 +342,11 @@ } } } // BINARY CONVERSION - _calculateBinary (nCompressionMethod=1) { + _calculateBinary () { console.log("Write DAWG as an indexable binary dictionary"); this.nBytesArc = Math.floor( (this.nArcVal.toString(2).length + 2) / 8 ) + 1; // We add 2 bits. See DawgNode.convToBytes() this.nBytesOffset = 0; this._calcNumBytesNodeAddress(); this._calcNodesAddress(); @@ -405,12 +405,12 @@ nWeight = nWeight - 8; } return nVal; } - createBinaryJSON (nCompressionMethod=1) { - this._calculateBinary(nCompressionMethod); + createBinaryJSON () { + this._calculateBinary(); this._binaryToList(); let oJSON = { "sHeader": "/grammalecte-fsa/", "sLangCode": this.sLangCode, "sLangName": this.sLangName, @@ -426,11 +426,10 @@ "dChar": helpers.mapToObject(this.dChar), "nNode": this.nNode, "nArc": this.nArc, "lArcVal": this.lArcVal, "nArcVal": this.nArcVal, - "nCompressionMethod": nCompressionMethod, "nBytesArc": this.nBytesArc, "nBytesNodeAddress": this.nBytesNodeAddress, "nBytesOffset": this.nBytesOffset, //"sByDic": this.sByDic, // binary word graph "lByDic": this.lByDic, Index: graphspell-js/dic_merger.js ================================================================== --- graphspell-js/dic_merger.js +++ graphspell-js/dic_merger.js @@ -36,11 +36,11 @@ if (xProgressBar) { xProgressBar.value = xProgressBar.max; } try { let oDAWG = new DAWG(lEntry, cStemming, sLangCode, sLangName, sDicName, sDescription, xProgressBar); - let oDict = oDAWG.createBinaryJSON(1); + let oDict = oDAWG.createBinaryJSON(); return oDict; } catch (e) { console.log("Dictionaries merger: unable to generate merged dictionary"); console.error(e); Index: graphspell-js/ibdawg.js ================================================================== --- graphspell-js/ibdawg.js +++ graphspell-js/ibdawg.js @@ -145,11 +145,11 @@ console.log("dic:" + source.slice(0, 1000)); throw Error("# Error. File not found or not loadable.\n" + e.message + "\n"); } /* Properties: - sName, nCompressionMethod, sHeader, lArcVal, nArcVal, sByDic, sLang, nChar, nBytesArc, nBytesNodeAddress, + sName, sHeader, lArcVal, nArcVal, sByDic, sLang, nChar, nBytesArc, nBytesNodeAddress, nEntry, nNode, nArc, nAff, cStemming, nTag, dChar, nBytesOffset, */ if (!(this.sHeader.startsWith("/grammalecte-fsa/") || this.sHeader.startsWith("/pyfsa/"))) { throw TypeError("# Error. Not a grammalecte-fsa binary dictionary. Header: " + this.sHeader); @@ -209,11 +209,11 @@ } } getInfo () { return ` Language: ${this.sLangName} Lang code: ${this.sLangCode} Dictionary name: ${this.sDicName}\n` + - ` Compression method: ${this.nCompressionMethod} Date: ${this.sDate} Stemming: ${this.cStemming}FX\n` + + ` Date: ${this.sDate} Stemming: ${this.cStemming}FX\n` + ` Arcs values: ${this.nArcVal} = ${this.nChar} characters, ${this.nAff} affixes, ${this.nTag} tags\n` + ` Dictionary: ${this.nEntry} entries, ${this.nNode} nodes, ${this.nArc} arcs\n` + ` Address size: ${this.nBytesNodeAddress} bytes, Arc size: ${this.nBytesArc} bytes\n`; } @@ -234,11 +234,10 @@ "dChar": helpers.mapToObject(this.dChar), "nNode": this.nNode, "nArc": this.nArc, "lArcVal": this.lArcVal, "nArcVal": this.nArcVal, - "nCompressionMethod": this.nCompressionMethod, "nBytesArc": this.nBytesArc, "nBytesNodeAddress": this.nBytesNodeAddress, "nBytesOffset": this.nBytesOffset, "sByDic": this.sByDic, // binary word graph "l2grams": this.l2grams Index: graphspell/dawg.py ================================================================== --- graphspell/dawg.py +++ graphspell/dawg.py @@ -356,11 +356,11 @@ if not zPattern or zPattern.search(self.lArcVal[nMorphVal]): yield sEntry + "\t" + self.lArcVal[nMorphVal] # BINARY CONVERSION - def _calculateBinary (self, nCompressionMethod=1): + def _calculateBinary (self): print(" > Write DAWG as an indexable binary dictionary") self.nBytesArc = ( (self.nArcVal.bit_length() + 2) // 8 ) + 1 # We add 2 bits. See DawgNode.convToBytes() self.nBytesOffset = 0 self._calcNumBytesNodeAddress() self._calcNodesAddress() @@ -385,10 +385,17 @@ for oNode in self.lMinimizedNodes: oNode.addr = iAddr iAddr += max(len(oNode.arcs), 1) * nBytesNode def _binaryToList (self): + """ + Convert binary string to binary list + BEFORE: Arc Address Arc Address + ||||||||| ||||||||| ||||||||| ||||||||| ||||||||| ||||||||| ||||||||| ||||||||| ||||||||| ||||||||| ||||||||| ||||||||| ... + + AFTER: list of integers: [ arc, address, arc, address, arc, address, ... arc, address ] + """ self.lByDic = [] nAcc = 0 byBuffer = b"" nDivisor = (self.nBytesArc + self.nBytesNodeAddress) / 2 for i in range(0, len(self.byDic)): @@ -400,13 +407,13 @@ self.lByDic.append(round(int.from_bytes(byBuffer, byteorder="big") / nDivisor)) byBuffer = b"" nAcc = -1 nAcc = nAcc + 1 - def getBinaryAsJSON (self, nCompressionMethod=1): + def getBinaryAsJSON (self): "return a JSON string containing all necessary data of the dictionary (compressed as a binary string)" - self._calculateBinary(nCompressionMethod) + self._calculateBinary() self._binaryToList() return { "sHeader": "/grammalecte-fsa/", "sLangCode": self.sLangCode, "sLangName": self.sLangName, @@ -422,11 +429,10 @@ "dChar": self.dChar, "nNode": self.nNode, "nArc": self.nArc, "nArcVal": self.nArcVal, "lArcVal": self.lArcVal, - "nCompressionMethod": nCompressionMethod, "nBytesArc": self.nBytesArc, "nBytesNodeAddress": self.nBytesNodeAddress, "nBytesOffset": self.nBytesOffset, # Mozilla’s JS parser don’t like file bigger than 4 Mb! # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension. @@ -434,28 +440,24 @@ #"sByDic": self.byDic.hex(), "lByDic": self.lByDic, "l2grams": list(self.a2grams) } - def writeAsJSObject (self, spfDst, nCompressionMethod=1, bInJSModule=False): + def writeAsJSObject (self, spfDst): "write a file (JSON or JS module) with all the necessary data" if not spfDst.endswith(".json"): - spfDst += "."+str(nCompressionMethod)+".json" + spfDst += ".json" with open(spfDst, "w", encoding="utf-8", newline="\n") as hDst: - if bInJSModule: - hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ') - hDst.write( json.dumps(self.getBinaryAsJSON(nCompressionMethod), ensure_ascii=False) ) - if bInJSModule: - hDst.write(";\n\nexports.dictionary = dictionary;\n") + hDst.write( json.dumps(self.getBinaryAsJSON(), ensure_ascii=False) ) def _getDate (self): return time.strftime("%Y-%m-%d %H:%M:%S") - def _writeNodes (self, sPathFile, nCompressionMethod=1): + def _writeNodes (self, sPathFile): "for debugging only" print(" > Write nodes") - with open(sPathFile+".nodes."+str(nCompressionMethod)+".txt", 'w', encoding='utf-8', newline="\n") as hDst: + with open(sPathFile+".nodes.txt", 'w', encoding='utf-8', newline="\n") as hDst: hDst.write(self.oRoot.getTxtRepr(self.nBytesArc, self.lArcVal)+"\n") #hDst.write( ''.join( [ "%02X " % z for z in self.oRoot.convToBytes(self.nBytesArc, self.nBytesNodeAddress) ] ).strip() ) for oNode in self.lMinimizedNodes: hDst.write(oNode.getTxtRepr(self.nBytesArc, self.lArcVal)+"\n") Index: graphspell/ibdawg.py ================================================================== --- graphspell/ibdawg.py +++ graphspell/ibdawg.py @@ -172,11 +172,11 @@ print("# No module ") def getInfo (self): "return string about the IBDAWG" return " Language: {0.sLangName} Lang code: {0.sLangCode} Dictionary name: {0.sDicName}" \ - " Compression method: {0.nCompressionMethod:>2} Date: {0.sDate} Stemming: {0.cStemming}FX\n" \ + " Date: {0.sDate} Stemming: {0.cStemming}FX\n" \ " Arcs values: {0.nArcVal:>10,} = {0.nChar:>5,} characters, {0.nAff:>6,} affixes, {0.nTag:>6,} tags\n" \ " Dictionary: {0.nEntry:>12,} entries, {0.nNode:>11,} nodes, {0.nArc:>11,} arcs\n" \ " Address size: {0.nBytesNodeAddress:>1} bytes, Arc size: {0.nBytesArc:>1} bytes\n".format(self) def isValidToken (self, sToken):