Version ${version}
Site web
-- Grammalecte est un correcteur grammatical open source sous licence GPL 3 -
+Version ${version}
Site web
-- Grammalecte est un correcteur grammatical open source sous licence GPL 3 -
+Grammalecte remercie
Index: gc_lang/fr/webext/panel/main.js ================================================================== --- gc_lang/fr/webext/panel/main.js +++ gc_lang/fr/webext/panel/main.js @@ -56,10 +56,13 @@ else if (xElem.id.startsWith("ui_option_")) { storeUIOptions(); } else if (xElem.id.startsWith("link_")) { browser.tabs.create({url: xElem.dataset.url}); + } + else if (xElem.id == "conj_button") { + openConjugueurTab(); } } else if (xElem.className.startsWith("select")) { showPage(xElem.dataset.page); }/* else if (xElem.tagName === "A") { openURL(xElem.getAttribute("href")); @@ -137,10 +140,23 @@ function showTestResult (sText) { document.getElementById("tests_result").textContent = sText; } + +function openConjugueurTab () { + if (bChrome) { + browser.tabs.create({ + url: browser.extension.getURL("panel/conjugueur.html") + }); + return; + } + let xConjTab = browser.tabs.create({ + url: browser.extension.getURL("panel/conjugueur.html") + }); + xConjTab.then(onCreated, onError); +} /* UI options */ Index: graphspell/dawg.py ================================================================== --- graphspell/dawg.py +++ graphspell/dawg.py @@ -25,11 +25,11 @@ if os.path.isfile(spf): with open(spf, "r", encoding="utf-8") as hSrc: for sLine in hSrc: sLine = sLine.strip() if sLine and not sLine.startswith("#"): - yield sLine + yield sLine.split("\t") else: raise OSError("# Error. File not found or not loadable: " + spf) @@ -39,11 +39,11 @@ # We store suffix/affix codes and tags within the graph after the “real” word. # A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags] # Each arc is an index in self.lArcVal, where are stored characters, suffix/affix codes for stemming and tags. # Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final. - def __init__ (self, spfSrc, cStemming, sLangCode, sLangName="", sDicName=""): + def __init__ (self, src, cStemming, sLangCode, sLangName="", sDicName=""): print("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton =====") cStemming = cStemming.upper() if cStemming == "A": funcStemmingGen = st.defineAffixCode elif cStemming == "S": @@ -56,14 +56,17 @@ lEntry = [] lChar = ['']; dChar = {}; nChar = 1; dCharOccur = {} lAff = []; dAff = {}; nAff = 0; dAffOccur = {} lTag = []; dTag = {}; nTag = 0; dTagOccur = {} nErr = 0 - + # read lexicon - for sLine in readFile(spfSrc): - sFlex, sStem, sTag = sLine.split("\t") + if type(src) is str: + iterable = readFile(src) + else: + iterable = src + for sFlex, sStem, sTag in iterable: addWordToCharDict(sFlex) # chars for c in sFlex: if c not in dChar: dChar[c] = nChar @@ -95,16 +98,12 @@ # Dictionary of arc values occurrency, to sort arcs of each node dValOccur = dict( [ (dChar[c], dCharOccur[c]) for c in dChar ] \ + [ (dAff[aff]+nChar, dAffOccur[aff]) for aff in dAff ] \ + [ (dTag[tag]+nChar+nAff, dTagOccur[tag]) for tag in dTag ] ) - #with open(spfSrc[:-8]+".valuesfreq.txt", 'w', encoding='utf-8') as hFreqDst: # DEBUG - # for iKey, nOcc in sorted(dValOccur.items(), key=lambda t: t[1], reverse=True): - # hFreqDst.write("{}: {}\n".format(lVal[iKey], nOcc)) - # hFreqDst.close() - self.sFileName = spfSrc + self.sFileName = src if type(src) is str else "[None]" self.sLangCode = sLangCode self.sLangName = sLangName self.sDicName = sDicName self.nEntry = len(lWord) self.aPreviousEntry = [] @@ -308,11 +307,11 @@ if not zPattern or zPattern.search(self.lArcVal[nMorphVal]): yield sEntry + "\t" + self.lArcVal[nMorphVal] # BINARY CONVERSION - def createBinary (self, sPathFile, nCompressionMethod, bDebug=False): + def _calculateBinary (self, nCompressionMethod): print(" > Write DAWG as an indexable binary dictionary [method: %d]" % nCompressionMethod) if nCompressionMethod == 1: self.nBytesArc = ( (self.nArcVal.bit_length() + 2) // 8 ) + 1 # We add 2 bits. See DawgNode.convToBytes1() self.nBytesOffset = 0 self._calcNumBytesNodeAddress() @@ -332,14 +331,10 @@ print(" # Error: unknown compression method") print(" Arc values (chars, affixes and tags): {} -> {} bytes".format( self.nArcVal, len("\t".join(self.lArcVal).encode("utf-8")) )) print(" Arc size: {} bytes, Address size: {} bytes -> {} * {} = {} bytes".format( self.nBytesArc, self.nBytesNodeAddress, \ self.nBytesArc+self.nBytesNodeAddress, self.nArc, \ (self.nBytesArc+self.nBytesNodeAddress)*self.nArc )) - self._writeBinary(sPathFile, nCompressionMethod) - self._writeAsJSObject(sPathFile, nCompressionMethod) - if bDebug: - self._writeNodes(sPathFile, nCompressionMethod) def _calcNumBytesNodeAddress (self): "how many bytes needed to store all nodes/arcs in the binary dictionary" self.nBytesNodeAddress = 1 while ((self.nBytesArc + self.nBytesNodeAddress) * self.nArc) > (2 ** (self.nBytesNodeAddress * 8)): @@ -387,13 +382,12 @@ nSize -= nDiff if self.lSortedNodes[i].size != nSize: self.lSortedNodes[i].size = nSize bEnd = False - def _writeAsJSObject (self, spfDst, nCompressionMethod, bInJSModule=False, bBinaryDictAsHexString=True): - if not spfDst.endswith(".json"): - spfDst += "."+str(nCompressionMethod)+".json" + def getBinaryAsJSON (self, nCompressionMethod=1, bBinaryDictAsHexString=True): + self._calculateBinary(nCompressionMethod) byDic = b"" if nCompressionMethod == 1: byDic = self.oRoot.convToBytes1(self.nBytesArc, self.nBytesNodeAddress) for oNode in self.lMinimizedNodes: byDic += oNode.convToBytes1(self.nBytesArc, self.nBytesNodeAddress) @@ -403,44 +397,48 @@ byDic += oNode.convToBytes2(self.nBytesArc, self.nBytesNodeAddress) elif nCompressionMethod == 3: byDic = self.oRoot.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset) for oNode in self.lSortedNodes: byDic += oNode.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset) + return { + "sHeader": "/pyfsa/", + "sLangCode": self.sLangCode, + "sLangName": self.sLangName, + "sDicName": self.sDicName, + "sFileName": self.sFileName, + "sDate": self._getDate(), + "nEntry": self.nEntry, + "nChar": self.nChar, + "nAff": self.nAff, + "nTag": self.nTag, + "cStemming": self.cStemming, + "dChar": self.dChar, + "nNode": self.nNode, + "nArc": self.nArc, + "nArcVal": self.nArcVal, + "lArcVal": self.lArcVal, + "nCompressionMethod": nCompressionMethod, + "nBytesArc": self.nBytesArc, + "nBytesNodeAddress": self.nBytesNodeAddress, + "nBytesOffset": self.nBytesOffset, + # Mozilla’s JS parser don’t like file bigger than 4 Mb! + # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension. + # https://github.com/mozilla/addons-linter/issues/1361 + "sByDic": byDic.hex() if bBinaryDictAsHexString else [ e for e in byDic ] + } + def writeAsJSObject (self, spfDst, nCompressionMethod, bInJSModule=False, bBinaryDictAsHexString=True): + if not spfDst.endswith(".json"): + spfDst += "."+str(nCompressionMethod)+".json" with open(spfDst, "w", encoding="utf-8", newline="\n") as hDst: if bInJSModule: hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ') - hDst.write(json.dumps({ - "sHeader": "/pyfsa/", - "sLangCode": self.sLangCode, - "sLangName": self.sLangName, - "sDicName": self.sDicName, - "sFileName": self.sFileName, - "sDate": self._getDate(), - "nEntry": self.nEntry, - "nChar": self.nChar, - "nAff": self.nAff, - "nTag": self.nTag, - "cStemming": self.cStemming, - "dChar": self.dChar, - "nNode": self.nNode, - "nArc": self.nArc, - "nArcVal": self.nArcVal, - "lArcVal": self.lArcVal, - "nCompressionMethod": nCompressionMethod, - "nBytesArc": self.nBytesArc, - "nBytesNodeAddress": self.nBytesNodeAddress, - "nBytesOffset": self.nBytesOffset, - # JavaScript is a pile of shit, so Mozilla’s JS parser don’t like file bigger than 4 Mb! - # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension. - # https://github.com/mozilla/addons-linter/issues/1361 - "sByDic": byDic.hex() if bBinaryDictAsHexString else [ e for e in byDic ] - }, ensure_ascii=False)) + hDst.write( json.dumps(self.getBinaryAsJSON(nCompressionMethod, bBinaryDictAsHexString), ensure_ascii=False) ) if bInJSModule: hDst.write(";\n\nexports.dictionary = dictionary;\n") - def _writeBinary (self, sPathFile, nCompressionMethod): + def writeBinary (self, sPathFile, nCompressionMethod, bDebug=False): """ Format of the binary indexable dictionary: Each section is separated with 4 bytes of \0 - Section Header: @@ -471,10 +469,11 @@ - Section Word Graph (nodes / arcs) * A list of nodes which are a list of arcs with an address of the next node. See DawgNode.convToBytes() for details. """ + self._calculateBinary(nCompressionMethod) if not sPathFile.endswith(".bdic"): sPathFile += "."+str(nCompressionMethod)+".bdic" with open(sPathFile, 'wb') as hDst: # header hDst.write("/pyfsa/{}/".format(nCompressionMethod).encode("utf-8")) @@ -499,11 +498,12 @@ hDst.write(oNode.convToBytes2(self.nBytesArc, self.nBytesNodeAddress)) elif nCompressionMethod == 3: hDst.write(self.oRoot.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset)) for oNode in self.lSortedNodes: hDst.write(oNode.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset)) - hDst.close() + if bDebug: + self._writeNodes(sPathFile, nCompressionMethod) def _getDate (self): return time.strftime("%Y.%m.%d, %H:%M") def _writeNodes (self, sPathFile, nCompressionMethod): @@ -522,23 +522,10 @@ if nCompressionMethod == 3: hDst.write(self.oRoot.getTxtRepr3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset, self.lArcVal)+"\n") #hDst.write( ''.join( [ "%02X " % z for z in self.oRoot.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset) ] ).strip() ) for oNode in self.lSortedNodes: hDst.write(oNode.getTxtRepr3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset, self.lArcVal)+"\n") - hDst.close() - - def writeResults (self, sPathFile): - bFileExits = os.path.isfile("_lexicons.res.txt") - with open("_lexicons.res.txt", "a", encoding='utf-8', newline="\n") as hDst: - sFormat1 = "{:<12} {:>12} {:>5} {:>8} {:>8} {:>6} {:>8} {:>9} {:>9} {:>15} {:>12} {:>12}\n" - sFormat2 = "{:<12} {:>12,} {:>5,} {:>8,} {:>8} {:>6,} {:>8,} {:>9,} {:>9,} {:>15,} {:>12,} {:>12,}\n" - if not bFileExits: - hDst.write(sFormat1.format("Lexicon", "Entries", "Chars", "Affixes", "Stemming", "Tags", "Values", "Nodes", "Arcs", "Lexicon (Kb)", "Dict (Kb)", "LT Dict (Kb)")) - hDst.write(sFormat2.format(self.sLangName, self.nEntry, self.nChar, self.nAff, self.cStemming + "FX", self.nTag, self.nArcVal, \ - self.nNode, self.nArc, os.path.getsize(self.sFileName), os.path.getsize(sPathFile), \ - os.path.getsize("cfsa/dict/{}.dict".format(self.sLangName)) if os.path.isfile("cfsa/dict/{}.dict".format(self.sLangName)) else 0)) - hDst.close() class DawgNode: NextId = 0 Index: graphspell/ibdawg.py ================================================================== --- graphspell/ibdawg.py +++ graphspell/ibdawg.py @@ -77,23 +77,26 @@ class IBDAWG: """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH""" - def __init__ (self, sfDict): - self.by = pkgutil.get_data(__package__, "_dictionaries/" + sfDict) - if not self.by: - raise OSError("# Error. File not found or not loadable: "+sfDict) - - if sfDict.endswith(".bdic"): - self._initBinary() - elif sfDict.endswith(".json"): - self._initJSON() + def __init__ (self, source): + if type(source) is str: + self.by = pkgutil.get_data(__package__, "_dictionaries/" + source) + if not self.by: + raise OSError("# Error. File not found or not loadable: "+source) + + if source.endswith(".bdic"): + self._initBinary() + elif source.endswith(".json"): + self._initJSON(json.loads(self.by.decode("utf-8"))) #json.loads(self.by) # In Python 3.6, can read directly binary strings + else: + raise OSError("# Error. Unknown file type: "+source) else: - raise OSError("# Error. Unknown file type: "+sfDict) + self._initJSON(source) - self.sFileName = sfDict + self.sFileName = source if type(source) is str else "[None]" self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1 self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1) self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2) self._addrBitMask = 1 << ((self.nBytesArc * 8) - 3) # version 2 @@ -167,14 +170,13 @@ for i in range(1, self.nChar+1): self.dChar[self.lArcVal[i]] = i self.dCharVal = { v: k for k, v in self.dChar.items() } self.nBytesOffset = 1 # version 3 - def _initJSON (self): + def _initJSON (self, oJSON): "initialize with a JSON text file" - self.__dict__.update(json.loads(self.by.decode("utf-8"))) - #self.__dict__.update(json.loads(self.by)) # In Python 3.6, can read directly binary strings + self.__dict__.update(oJSON) self.byDic = binascii.unhexlify(self.sByDic) def getInfo (self): return " Language: {0.sLangName} Lang code: {0.sLangCode} Dictionary name: {0.sDicName}" \ " Compression method: {0.nCompressionMethod:>2} Date: {0.sDate} Stemming: {0.cStemming}FX\n" \ Index: graphspell/spellchecker.py ================================================================== --- graphspell/spellchecker.py +++ graphspell/spellchecker.py @@ -30,20 +30,20 @@ self.oMainDic = self._loadDictionary(sfMainDic, True) self.oExtendedDic = self._loadDictionary(sfExtendedDic) self.oPersonalDic = self._loadDictionary(sfPersonalDic) self.oTokenizer = None - def _loadDictionary (self, sfDictionary, bNecessary=False): + def _loadDictionary (self, source, bNecessary=False): "returns an IBDAWG object" - if not sfDictionary: + if not source: return None try: - return ibdawg.IBDAWG(sfDictionary) + return ibdawg.IBDAWG(source) except Exception as e: if bNecessary: - raise Exception(str(e), "Error: <" + sfDictionary + "> not loaded.") - print("Error: <" + sfDictionary + "> not loaded.") + raise Exception(str(e), "Error: <" + str(source) + "> not loaded.") + print("Error: <" + str(source) + "> not loaded.") traceback.print_exc() return None def loadTokenizer (self): self.oTokenizer = tokenizer.Tokenizer(self.sLangCode) @@ -51,23 +51,23 @@ def getTokenizer (self): if not self.oTokenizer: self.loadTokenizer() return self.oTokenizer - def setMainDictionary (self, sfDictionary): + def setMainDictionary (self, source): "returns True if the dictionary is loaded" - self.oMainDic = self._loadDictionary(sfDictionary) + self.oMainDic = self._loadDictionary(source) return bool(self.oMainDic) - def setExtendedDictionary (self, sfDictionary): + def setExtendedDictionary (self, source): "returns True if the dictionary is loaded" - self.oExtendedDic = self._loadDictionary(sfDictionary) + self.oExtendedDic = self._loadDictionary(source) return bool(self.oExtendedDic) - def setPersonalDictionary (self, sfDictionary): + def setPersonalDictionary (self, source): "returns True if the dictionary is loaded" - self.oPersonalDic = self._loadDictionary(sfDictionary) + self.oPersonalDic = self._loadDictionary(source) return bool(self.oPersonalDic) # parse text functions def parseParagraph (self, sText, bSpellSugg=False): Index: lex_build.py ================================================================== --- lex_build.py +++ lex_build.py @@ -12,11 +12,11 @@ def build (spfSrc, sLangCode, sLangName, sfDict, bJSON=False, sDicName="", cStemmingMethod="S", nCompressMethod=1): "transform a text lexicon as a binary indexable dictionary" oDAWG = fsa.DAWG(spfSrc, cStemmingMethod, sLangCode, sLangName, sDicName) dir_util.mkpath("graphspell/_dictionaries") oDAWG.writeInfo("graphspell/_dictionaries/" + sfDict + ".info.txt") - oDAWG.createBinary("graphspell/_dictionaries/" + sfDict + ".bdic", int(nCompressMethod)) + oDAWG.writeBinary("graphspell/_dictionaries/" + sfDict + ".bdic", int(nCompressMethod)) if bJSON: dir_util.mkpath("graphspell-js/_dictionaries") oDic = IBDAWG(sfDict + ".bdic") oDic.writeAsJSObject("graphspell-js/_dictionaries/" + sfDict + ".json", bBinaryDictAsHexString=True) Index: make.py ================================================================== --- make.py +++ make.py @@ -84,10 +84,11 @@ # Extension files hZip.writestr("META-INF/manifest.xml", helpers.fileFile("gc_core/py/oxt/manifest.xml", dVars)) hZip.writestr("description.xml", helpers.fileFile("gc_core/py/oxt/description.xml", dVars)) hZip.writestr("Linguistic.xcu", helpers.fileFile("gc_core/py/oxt/Linguistic.xcu", dVars)) hZip.writestr("Grammalecte.py", helpers.fileFile("gc_core/py/oxt/Grammalecte.py", dVars)) + hZip.writestr("pythonpath/helpers.py", helpers.fileFile("gc_core/py/oxt/helpers.py", dVars)) for sf in dVars["extras"].split(","): hZip.writestr(sf.strip(), helpers.fileFile(spLang + '/' + sf.strip(), dVars)) if "logo" in dVars.keys() and dVars["logo"].strip():