Grammalecte remercie
Index: gc_lang/fr/webext/panel/main.js
==================================================================
--- gc_lang/fr/webext/panel/main.js
+++ gc_lang/fr/webext/panel/main.js
@@ -56,10 +56,13 @@
else if (xElem.id.startsWith("ui_option_")) {
storeUIOptions();
}
else if (xElem.id.startsWith("link_")) {
browser.tabs.create({url: xElem.dataset.url});
+ }
+ else if (xElem.id == "conj_button") {
+ openConjugueurTab();
}
} else if (xElem.className.startsWith("select")) {
showPage(xElem.dataset.page);
}/* else if (xElem.tagName === "A") {
openURL(xElem.getAttribute("href"));
@@ -137,10 +140,23 @@
function showTestResult (sText) {
document.getElementById("tests_result").textContent = sText;
}
+
+function openConjugueurTab () {
+ if (bChrome) {
+ browser.tabs.create({
+ url: browser.extension.getURL("panel/conjugueur.html")
+ });
+ return;
+ }
+ let xConjTab = browser.tabs.create({
+ url: browser.extension.getURL("panel/conjugueur.html")
+ });
+ xConjTab.then(onCreated, onError);
+}
/*
UI options
*/
Index: graphspell/dawg.py
==================================================================
--- graphspell/dawg.py
+++ graphspell/dawg.py
@@ -25,11 +25,11 @@
if os.path.isfile(spf):
with open(spf, "r", encoding="utf-8") as hSrc:
for sLine in hSrc:
sLine = sLine.strip()
if sLine and not sLine.startswith("#"):
- yield sLine
+ yield sLine.split("\t")
else:
raise OSError("# Error. File not found or not loadable: " + spf)
@@ -39,11 +39,11 @@
# We store suffix/affix codes and tags within the graph after the “real” word.
# A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags]
# Each arc is an index in self.lArcVal, where are stored characters, suffix/affix codes for stemming and tags.
# Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final.
- def __init__ (self, spfSrc, cStemming, sLangCode, sLangName="", sDicName=""):
+ def __init__ (self, src, cStemming, sLangCode, sLangName="", sDicName=""):
print("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton =====")
cStemming = cStemming.upper()
if cStemming == "A":
funcStemmingGen = st.defineAffixCode
elif cStemming == "S":
@@ -56,14 +56,17 @@
lEntry = []
lChar = ['']; dChar = {}; nChar = 1; dCharOccur = {}
lAff = []; dAff = {}; nAff = 0; dAffOccur = {}
lTag = []; dTag = {}; nTag = 0; dTagOccur = {}
nErr = 0
-
+
# read lexicon
- for sLine in readFile(spfSrc):
- sFlex, sStem, sTag = sLine.split("\t")
+ if type(src) is str:
+ iterable = readFile(src)
+ else:
+ iterable = src
+ for sFlex, sStem, sTag in iterable:
addWordToCharDict(sFlex)
# chars
for c in sFlex:
if c not in dChar:
dChar[c] = nChar
@@ -95,16 +98,12 @@
# Dictionary of arc values occurrency, to sort arcs of each node
dValOccur = dict( [ (dChar[c], dCharOccur[c]) for c in dChar ] \
+ [ (dAff[aff]+nChar, dAffOccur[aff]) for aff in dAff ] \
+ [ (dTag[tag]+nChar+nAff, dTagOccur[tag]) for tag in dTag ] )
- #with open(spfSrc[:-8]+".valuesfreq.txt", 'w', encoding='utf-8') as hFreqDst: # DEBUG
- # for iKey, nOcc in sorted(dValOccur.items(), key=lambda t: t[1], reverse=True):
- # hFreqDst.write("{}: {}\n".format(lVal[iKey], nOcc))
- # hFreqDst.close()
- self.sFileName = spfSrc
+ self.sFileName = src if type(src) is str else "[None]"
self.sLangCode = sLangCode
self.sLangName = sLangName
self.sDicName = sDicName
self.nEntry = len(lWord)
self.aPreviousEntry = []
@@ -308,11 +307,11 @@
if not zPattern or zPattern.search(self.lArcVal[nMorphVal]):
yield sEntry + "\t" + self.lArcVal[nMorphVal]
# BINARY CONVERSION
- def createBinary (self, sPathFile, nCompressionMethod, bDebug=False):
+ def _calculateBinary (self, nCompressionMethod):
print(" > Write DAWG as an indexable binary dictionary [method: %d]" % nCompressionMethod)
if nCompressionMethod == 1:
self.nBytesArc = ( (self.nArcVal.bit_length() + 2) // 8 ) + 1 # We add 2 bits. See DawgNode.convToBytes1()
self.nBytesOffset = 0
self._calcNumBytesNodeAddress()
@@ -332,14 +331,10 @@
print(" # Error: unknown compression method")
print(" Arc values (chars, affixes and tags): {} -> {} bytes".format( self.nArcVal, len("\t".join(self.lArcVal).encode("utf-8")) ))
print(" Arc size: {} bytes, Address size: {} bytes -> {} * {} = {} bytes".format( self.nBytesArc, self.nBytesNodeAddress, \
self.nBytesArc+self.nBytesNodeAddress, self.nArc, \
(self.nBytesArc+self.nBytesNodeAddress)*self.nArc ))
- self._writeBinary(sPathFile, nCompressionMethod)
- self._writeAsJSObject(sPathFile, nCompressionMethod)
- if bDebug:
- self._writeNodes(sPathFile, nCompressionMethod)
def _calcNumBytesNodeAddress (self):
"how many bytes needed to store all nodes/arcs in the binary dictionary"
self.nBytesNodeAddress = 1
while ((self.nBytesArc + self.nBytesNodeAddress) * self.nArc) > (2 ** (self.nBytesNodeAddress * 8)):
@@ -387,13 +382,12 @@
nSize -= nDiff
if self.lSortedNodes[i].size != nSize:
self.lSortedNodes[i].size = nSize
bEnd = False
- def _writeAsJSObject (self, spfDst, nCompressionMethod, bInJSModule=False, bBinaryDictAsHexString=True):
- if not spfDst.endswith(".json"):
- spfDst += "."+str(nCompressionMethod)+".json"
+ def getBinaryAsJSON (self, nCompressionMethod=1, bBinaryDictAsHexString=True):
+ self._calculateBinary(nCompressionMethod)
byDic = b""
if nCompressionMethod == 1:
byDic = self.oRoot.convToBytes1(self.nBytesArc, self.nBytesNodeAddress)
for oNode in self.lMinimizedNodes:
byDic += oNode.convToBytes1(self.nBytesArc, self.nBytesNodeAddress)
@@ -403,44 +397,48 @@
byDic += oNode.convToBytes2(self.nBytesArc, self.nBytesNodeAddress)
elif nCompressionMethod == 3:
byDic = self.oRoot.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset)
for oNode in self.lSortedNodes:
byDic += oNode.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset)
+ return {
+ "sHeader": "/pyfsa/",
+ "sLangCode": self.sLangCode,
+ "sLangName": self.sLangName,
+ "sDicName": self.sDicName,
+ "sFileName": self.sFileName,
+ "sDate": self._getDate(),
+ "nEntry": self.nEntry,
+ "nChar": self.nChar,
+ "nAff": self.nAff,
+ "nTag": self.nTag,
+ "cStemming": self.cStemming,
+ "dChar": self.dChar,
+ "nNode": self.nNode,
+ "nArc": self.nArc,
+ "nArcVal": self.nArcVal,
+ "lArcVal": self.lArcVal,
+ "nCompressionMethod": nCompressionMethod,
+ "nBytesArc": self.nBytesArc,
+ "nBytesNodeAddress": self.nBytesNodeAddress,
+ "nBytesOffset": self.nBytesOffset,
+ # Mozilla’s JS parser don’t like file bigger than 4 Mb!
+ # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension.
+ # https://github.com/mozilla/addons-linter/issues/1361
+ "sByDic": byDic.hex() if bBinaryDictAsHexString else [ e for e in byDic ]
+ }
+ def writeAsJSObject (self, spfDst, nCompressionMethod, bInJSModule=False, bBinaryDictAsHexString=True):
+ if not spfDst.endswith(".json"):
+ spfDst += "."+str(nCompressionMethod)+".json"
with open(spfDst, "w", encoding="utf-8", newline="\n") as hDst:
if bInJSModule:
hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ')
- hDst.write(json.dumps({
- "sHeader": "/pyfsa/",
- "sLangCode": self.sLangCode,
- "sLangName": self.sLangName,
- "sDicName": self.sDicName,
- "sFileName": self.sFileName,
- "sDate": self._getDate(),
- "nEntry": self.nEntry,
- "nChar": self.nChar,
- "nAff": self.nAff,
- "nTag": self.nTag,
- "cStemming": self.cStemming,
- "dChar": self.dChar,
- "nNode": self.nNode,
- "nArc": self.nArc,
- "nArcVal": self.nArcVal,
- "lArcVal": self.lArcVal,
- "nCompressionMethod": nCompressionMethod,
- "nBytesArc": self.nBytesArc,
- "nBytesNodeAddress": self.nBytesNodeAddress,
- "nBytesOffset": self.nBytesOffset,
- # JavaScript is a pile of shit, so Mozilla’s JS parser don’t like file bigger than 4 Mb!
- # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension.
- # https://github.com/mozilla/addons-linter/issues/1361
- "sByDic": byDic.hex() if bBinaryDictAsHexString else [ e for e in byDic ]
- }, ensure_ascii=False))
+ hDst.write( json.dumps(self.getBinaryAsJSON(nCompressionMethod, bBinaryDictAsHexString), ensure_ascii=False) )
if bInJSModule:
hDst.write(";\n\nexports.dictionary = dictionary;\n")
- def _writeBinary (self, sPathFile, nCompressionMethod):
+ def writeBinary (self, sPathFile, nCompressionMethod, bDebug=False):
"""
Format of the binary indexable dictionary:
Each section is separated with 4 bytes of \0
- Section Header:
@@ -471,10 +469,11 @@
- Section Word Graph (nodes / arcs)
* A list of nodes which are a list of arcs with an address of the next node.
See DawgNode.convToBytes() for details.
"""
+ self._calculateBinary(nCompressionMethod)
if not sPathFile.endswith(".bdic"):
sPathFile += "."+str(nCompressionMethod)+".bdic"
with open(sPathFile, 'wb') as hDst:
# header
hDst.write("/pyfsa/{}/".format(nCompressionMethod).encode("utf-8"))
@@ -499,11 +498,12 @@
hDst.write(oNode.convToBytes2(self.nBytesArc, self.nBytesNodeAddress))
elif nCompressionMethod == 3:
hDst.write(self.oRoot.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset))
for oNode in self.lSortedNodes:
hDst.write(oNode.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset))
- hDst.close()
+ if bDebug:
+ self._writeNodes(sPathFile, nCompressionMethod)
def _getDate (self):
return time.strftime("%Y.%m.%d, %H:%M")
def _writeNodes (self, sPathFile, nCompressionMethod):
@@ -522,23 +522,10 @@
if nCompressionMethod == 3:
hDst.write(self.oRoot.getTxtRepr3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset, self.lArcVal)+"\n")
#hDst.write( ''.join( [ "%02X " % z for z in self.oRoot.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset) ] ).strip() )
for oNode in self.lSortedNodes:
hDst.write(oNode.getTxtRepr3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset, self.lArcVal)+"\n")
- hDst.close()
-
- def writeResults (self, sPathFile):
- bFileExits = os.path.isfile("_lexicons.res.txt")
- with open("_lexicons.res.txt", "a", encoding='utf-8', newline="\n") as hDst:
- sFormat1 = "{:<12} {:>12} {:>5} {:>8} {:>8} {:>6} {:>8} {:>9} {:>9} {:>15} {:>12} {:>12}\n"
- sFormat2 = "{:<12} {:>12,} {:>5,} {:>8,} {:>8} {:>6,} {:>8,} {:>9,} {:>9,} {:>15,} {:>12,} {:>12,}\n"
- if not bFileExits:
- hDst.write(sFormat1.format("Lexicon", "Entries", "Chars", "Affixes", "Stemming", "Tags", "Values", "Nodes", "Arcs", "Lexicon (Kb)", "Dict (Kb)", "LT Dict (Kb)"))
- hDst.write(sFormat2.format(self.sLangName, self.nEntry, self.nChar, self.nAff, self.cStemming + "FX", self.nTag, self.nArcVal, \
- self.nNode, self.nArc, os.path.getsize(self.sFileName), os.path.getsize(sPathFile), \
- os.path.getsize("cfsa/dict/{}.dict".format(self.sLangName)) if os.path.isfile("cfsa/dict/{}.dict".format(self.sLangName)) else 0))
- hDst.close()
class DawgNode:
NextId = 0
Index: graphspell/ibdawg.py
==================================================================
--- graphspell/ibdawg.py
+++ graphspell/ibdawg.py
@@ -77,23 +77,26 @@
class IBDAWG:
"""INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""
- def __init__ (self, sfDict):
- self.by = pkgutil.get_data(__package__, "_dictionaries/" + sfDict)
- if not self.by:
- raise OSError("# Error. File not found or not loadable: "+sfDict)
-
- if sfDict.endswith(".bdic"):
- self._initBinary()
- elif sfDict.endswith(".json"):
- self._initJSON()
+ def __init__ (self, source):
+ if type(source) is str:
+ self.by = pkgutil.get_data(__package__, "_dictionaries/" + source)
+ if not self.by:
+ raise OSError("# Error. File not found or not loadable: "+source)
+
+ if source.endswith(".bdic"):
+ self._initBinary()
+ elif source.endswith(".json"):
+ self._initJSON(json.loads(self.by.decode("utf-8"))) #json.loads(self.by) # In Python 3.6, can read directly binary strings
+ else:
+ raise OSError("# Error. Unknown file type: "+source)
else:
- raise OSError("# Error. Unknown file type: "+sfDict)
+ self._initJSON(source)
- self.sFileName = sfDict
+ self.sFileName = source if type(source) is str else "[None]"
self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1
self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1)
self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2)
self._addrBitMask = 1 << ((self.nBytesArc * 8) - 3) # version 2
@@ -167,14 +170,13 @@
for i in range(1, self.nChar+1):
self.dChar[self.lArcVal[i]] = i
self.dCharVal = { v: k for k, v in self.dChar.items() }
self.nBytesOffset = 1 # version 3
- def _initJSON (self):
+ def _initJSON (self, oJSON):
"initialize with a JSON text file"
- self.__dict__.update(json.loads(self.by.decode("utf-8")))
- #self.__dict__.update(json.loads(self.by)) # In Python 3.6, can read directly binary strings
+ self.__dict__.update(oJSON)
self.byDic = binascii.unhexlify(self.sByDic)
def getInfo (self):
return " Language: {0.sLangName} Lang code: {0.sLangCode} Dictionary name: {0.sDicName}" \
" Compression method: {0.nCompressionMethod:>2} Date: {0.sDate} Stemming: {0.cStemming}FX\n" \
Index: graphspell/spellchecker.py
==================================================================
--- graphspell/spellchecker.py
+++ graphspell/spellchecker.py
@@ -30,20 +30,20 @@
self.oMainDic = self._loadDictionary(sfMainDic, True)
self.oExtendedDic = self._loadDictionary(sfExtendedDic)
self.oPersonalDic = self._loadDictionary(sfPersonalDic)
self.oTokenizer = None
- def _loadDictionary (self, sfDictionary, bNecessary=False):
+ def _loadDictionary (self, source, bNecessary=False):
"returns an IBDAWG object"
- if not sfDictionary:
+ if not source:
return None
try:
- return ibdawg.IBDAWG(sfDictionary)
+ return ibdawg.IBDAWG(source)
except Exception as e:
if bNecessary:
- raise Exception(str(e), "Error: <" + sfDictionary + "> not loaded.")
- print("Error: <" + sfDictionary + "> not loaded.")
+ raise Exception(str(e), "Error: <" + str(source) + "> not loaded.")
+ print("Error: <" + str(source) + "> not loaded.")
traceback.print_exc()
return None
def loadTokenizer (self):
self.oTokenizer = tokenizer.Tokenizer(self.sLangCode)
@@ -51,23 +51,23 @@
def getTokenizer (self):
if not self.oTokenizer:
self.loadTokenizer()
return self.oTokenizer
- def setMainDictionary (self, sfDictionary):
+ def setMainDictionary (self, source):
"returns True if the dictionary is loaded"
- self.oMainDic = self._loadDictionary(sfDictionary)
+ self.oMainDic = self._loadDictionary(source)
return bool(self.oMainDic)
- def setExtendedDictionary (self, sfDictionary):
+ def setExtendedDictionary (self, source):
"returns True if the dictionary is loaded"
- self.oExtendedDic = self._loadDictionary(sfDictionary)
+ self.oExtendedDic = self._loadDictionary(source)
return bool(self.oExtendedDic)
- def setPersonalDictionary (self, sfDictionary):
+ def setPersonalDictionary (self, source):
"returns True if the dictionary is loaded"
- self.oPersonalDic = self._loadDictionary(sfDictionary)
+ self.oPersonalDic = self._loadDictionary(source)
return bool(self.oPersonalDic)
# parse text functions
def parseParagraph (self, sText, bSpellSugg=False):
Index: lex_build.py
==================================================================
--- lex_build.py
+++ lex_build.py
@@ -12,11 +12,11 @@
def build (spfSrc, sLangCode, sLangName, sfDict, bJSON=False, sDicName="", cStemmingMethod="S", nCompressMethod=1):
"transform a text lexicon as a binary indexable dictionary"
oDAWG = fsa.DAWG(spfSrc, cStemmingMethod, sLangCode, sLangName, sDicName)
dir_util.mkpath("graphspell/_dictionaries")
oDAWG.writeInfo("graphspell/_dictionaries/" + sfDict + ".info.txt")
- oDAWG.createBinary("graphspell/_dictionaries/" + sfDict + ".bdic", int(nCompressMethod))
+ oDAWG.writeBinary("graphspell/_dictionaries/" + sfDict + ".bdic", int(nCompressMethod))
if bJSON:
dir_util.mkpath("graphspell-js/_dictionaries")
oDic = IBDAWG(sfDict + ".bdic")
oDic.writeAsJSObject("graphspell-js/_dictionaries/" + sfDict + ".json", bBinaryDictAsHexString=True)
Index: make.py
==================================================================
--- make.py
+++ make.py
@@ -84,10 +84,11 @@
# Extension files
hZip.writestr("META-INF/manifest.xml", helpers.fileFile("gc_core/py/oxt/manifest.xml", dVars))
hZip.writestr("description.xml", helpers.fileFile("gc_core/py/oxt/description.xml", dVars))
hZip.writestr("Linguistic.xcu", helpers.fileFile("gc_core/py/oxt/Linguistic.xcu", dVars))
hZip.writestr("Grammalecte.py", helpers.fileFile("gc_core/py/oxt/Grammalecte.py", dVars))
+ hZip.writestr("pythonpath/helpers.py", helpers.fileFile("gc_core/py/oxt/helpers.py", dVars))
for sf in dVars["extras"].split(","):
hZip.writestr(sf.strip(), helpers.fileFile(spLang + '/' + sf.strip(), dVars))
if "logo" in dVars.keys() and dVars["logo"].strip():