| Comment: | [build][graphspell][lo] dictionary: drop support for binary file -> use JSON |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | build | lo | graphspell | dict2 |
| Files: | files | file ages | folders |
| SHA3-256: |
05fb167483021d19c671911fede3595b |
| User & Date: | olr on 2020-11-04 11:37:30 |
| Other Links: | branch diff | manifest | tags |
|
2020-11-04
| ||
| 12:02 | [graphspell] ibdawg: code cleaning, remove old code, useless compression versions check-in: 86250e8e6c user: olr tags: graphspell, dict2 | |
| 11:37 | [build][graphspell][lo] dictionary: drop support for binary file -> use JSON check-in: 05fb167483 user: olr tags: build, lo, graphspell, dict2 | |
|
2020-11-03
| ||
| 12:35 | [fr] ajustements check-in: 3cffdae3b0 user: olr tags: trunk, fr | |
Modified gc_lang/fr/build_data.py from [6e865955c0] to [3d9c0f4ca9].
| ︙ | ︙ | |||
48 49 50 51 52 53 54 |
raise OSError("# Error. File not found or not loadable: " + spf)
def loadDictionary ():
global oDict
if not oDict:
try:
| | | 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
raise OSError("# Error. File not found or not loadable: " + spf)
def loadDictionary ():
global oDict
if not oDict:
try:
oDict = ibdawg.IBDAWG("fr-allvars.json")
except:
traceback.print_exc()
def makeDictionaries (sp, sVersion):
with cd(sp+"/dictionnaire"):
if platform.system() == "Windows":
|
| ︙ | ︙ |
Modified gc_lang/fr/oxt/ContextMenu/ContextMenu.py from [c17c3b29b1] to [f3255b533a].
| ︙ | ︙ | |||
131 132 133 134 135 136 137 |
if not oSpellChecker:
xCurCtx = uno.getComponentContext()
oGC = self.ctx.ServiceManager.createInstanceWithContext("org.openoffice.comp.pyuno.Lightproof.grammalecte", self.ctx)
if hasattr(oGC, "getSpellChecker"):
# https://bugs.documentfoundation.org/show_bug.cgi?id=97790
oSpellChecker = oGC.getSpellChecker()
else:
| | | 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
if not oSpellChecker:
xCurCtx = uno.getComponentContext()
oGC = self.ctx.ServiceManager.createInstanceWithContext("org.openoffice.comp.pyuno.Lightproof.grammalecte", self.ctx)
if hasattr(oGC, "getSpellChecker"):
# https://bugs.documentfoundation.org/show_bug.cgi?id=97790
oSpellChecker = oGC.getSpellChecker()
else:
oSpellChecker = SpellChecker("${lang}", "fr-allvars.json")
except:
traceback.print_exc()
def execute (self, args):
if not args:
return
try:
|
| ︙ | ︙ |
Modified gc_lang/fr/oxt/DictOptions/LexiconEditor.py from [828f4f365e] to [5ef5214006].
| ︙ | ︙ | |||
404 405 406 407 408 409 410 |
@_waitPointer
def importDictionary (self):
spfImported = ""
try:
xFilePicker = self.xSvMgr.createInstanceWithContext('com.sun.star.ui.dialogs.FilePicker', self.ctx) # other possibility: com.sun.star.ui.dialogs.SystemFilePicker
xFilePicker.initialize([uno.getConstantByName("com.sun.star.ui.dialogs.TemplateDescription.FILEOPEN_SIMPLE")]) # seems useless
| | | 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 |
@_waitPointer
def importDictionary (self):
spfImported = ""
try:
xFilePicker = self.xSvMgr.createInstanceWithContext('com.sun.star.ui.dialogs.FilePicker', self.ctx) # other possibility: com.sun.star.ui.dialogs.SystemFilePicker
xFilePicker.initialize([uno.getConstantByName("com.sun.star.ui.dialogs.TemplateDescription.FILEOPEN_SIMPLE")]) # seems useless
xFilePicker.appendFilter("Supported files", "*.json")
xFilePicker.setDefaultName("fr.__personal__.json") # useless, doesn’t work
xFilePicker.setDisplayDirectory("")
xFilePicker.setMultiSelectionMode(False)
nResult = xFilePicker.execute()
if nResult == 1:
# lFile = xFilePicker.getSelectedFiles()
lFile = xFilePicker.getFiles()
|
| ︙ | ︙ | |||
459 460 461 462 463 464 465 |
self.xDateDic.Label = self.dUI.get("void", "#err")
MessageBox(self.xDocument, self.dUI.get('save_message', "#err"), self.dUI.get('save_title', "#err"))
def exportDictionary (self):
try:
xFilePicker = self.xSvMgr.createInstanceWithContext('com.sun.star.ui.dialogs.FilePicker', self.ctx) # other possibility: com.sun.star.ui.dialogs.SystemFilePicker
xFilePicker.initialize([uno.getConstantByName("com.sun.star.ui.dialogs.TemplateDescription.FILESAVE_SIMPLE")]) # seems useless
| | | 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 |
self.xDateDic.Label = self.dUI.get("void", "#err")
MessageBox(self.xDocument, self.dUI.get('save_message', "#err"), self.dUI.get('save_title', "#err"))
def exportDictionary (self):
try:
xFilePicker = self.xSvMgr.createInstanceWithContext('com.sun.star.ui.dialogs.FilePicker', self.ctx) # other possibility: com.sun.star.ui.dialogs.SystemFilePicker
xFilePicker.initialize([uno.getConstantByName("com.sun.star.ui.dialogs.TemplateDescription.FILESAVE_SIMPLE")]) # seems useless
xFilePicker.appendFilter("Supported files", "*.json")
xFilePicker.setDefaultName("fr.__personal__.json") # useless, doesn’t work
xFilePicker.setDisplayDirectory("")
xFilePicker.setMultiSelectionMode(False)
nResult = xFilePicker.execute()
if nResult == 1:
# lFile = xFilePicker.getSelectedFiles()
lFile = xFilePicker.getFiles()
|
| ︙ | ︙ |
Modified gc_lang/fr/oxt/DictOptions/SearchWords.py from [764a885065] to [2c4ada79ef].
| ︙ | ︙ | |||
182 183 184 185 186 187 188 |
elif xActionEvent.ActionCommand == "Close":
self.xContainer.endExecute()
except:
traceback.print_exc()
def initSpellChecker (self):
if not self.oSpellChecker:
| | | 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
elif xActionEvent.ActionCommand == "Close":
self.xContainer.endExecute()
except:
traceback.print_exc()
def initSpellChecker (self):
if not self.oSpellChecker:
self.oSpellChecker = sc.SpellChecker("fr", "fr-allvars.json", "", self.oPersonalDicJSON)
@_waitPointer
def searchSimilar (self):
self.initSpellChecker()
sWord = self.xWord.Text.strip()
if sWord:
xGridDataModel = self.xGridModel.GridDataModel
|
| ︙ | ︙ |
Modified gc_lang/fr/oxt/Graphspell.py from [810dc52bd8] to [76770ac233].
| ︙ | ︙ | |||
65 66 67 68 69 70 71 |
sPersonalDicJSON = self.xOptionNode.getPropertyValue("personal_dic")
if sPersonalDicJSON:
try:
personal_dic = json.loads(sPersonalDicJSON)
except:
print("Graphspell: wrong personal_dic")
traceback.print_exc()
| | | 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
sPersonalDicJSON = self.xOptionNode.getPropertyValue("personal_dic")
if sPersonalDicJSON:
try:
personal_dic = json.loads(sPersonalDicJSON)
except:
print("Graphspell: wrong personal_dic")
traceback.print_exc()
self.oGraphspell = SpellChecker("fr", "fr-"+sMainDicName+".json", "", personal_dic)
self.loadHunspell()
# print("Graphspell: init done")
except:
print("Graphspell: init failed")
traceback.print_exc()
def loadHunspell (self):
|
| ︙ | ︙ |
Modified gc_lang/fr/setup.py from [8a0db0631b] to [955f5741fe].
| ︙ | ︙ | |||
89 90 91 92 93 94 95 |
# 'test': ['coverage'],
# },
# If there are data files included in your packages that need to be
# installed, specify them here. If using Python 2.6 or less, then these
# have to be included in MANIFEST.in as well.
package_data={
| | | 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
# 'test': ['coverage'],
# },
# If there are data files included in your packages that need to be
# installed, specify them here. If using Python 2.6 or less, then these
# have to be included in MANIFEST.in as well.
package_data={
'grammalecte': ['graphspell/_dictionaries/*.json', '*.txt']
},
# Although 'package_data' is the preferred approach, in some case you may
# need to place data files outside of your packages. See:
# http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa
# In this case, 'data_file' will be installed into '<sys.prefix>/my_data'
# data_files=[('my_data', ['data/data_file'])],
|
| ︙ | ︙ |
Modified graphspell/dawg.py from [b60434a390] to [c083d6a347].
| ︙ | ︙ | |||
474 475 476 477 478 479 480 |
# Mozilla’s JS parser don’t like file bigger than 4 Mb!
# So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension.
# https://github.com/mozilla/addons-linter/issues/1361
"sByDic": byDic.hex() if bBinaryDictAsHexString else [ e for e in byDic ],
"l2grams": list(self.a2grams)
}
| | | 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 |
# Mozilla’s JS parser don’t like file bigger than 4 Mb!
# So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension.
# https://github.com/mozilla/addons-linter/issues/1361
"sByDic": byDic.hex() if bBinaryDictAsHexString else [ e for e in byDic ],
"l2grams": list(self.a2grams)
}
def writeAsJSObject (self, spfDst, nCompressionMethod=1, bInJSModule=False, bBinaryDictAsHexString=True):
"write a file (JSON or JS module) with all the necessary data"
if not spfDst.endswith(".json"):
spfDst += "."+str(nCompressionMethod)+".json"
with open(spfDst, "w", encoding="utf-8", newline="\n") as hDst:
if bInJSModule:
hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ')
hDst.write( json.dumps(self.getBinaryAsJSON(nCompressionMethod, bBinaryDictAsHexString), ensure_ascii=False) )
|
| ︙ | ︙ |
Modified graphspell/spellchecker.py from [2bdbe76996] to [9b47d651ea].
| ︙ | ︙ | |||
12 13 14 15 16 17 18 |
import traceback
from . import ibdawg
from . import tokenizer
dDefaultDictionaries = {
| | | | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
import traceback
from . import ibdawg
from . import tokenizer
dDefaultDictionaries = {
"fr": "fr-allvars.json",
"en": "en.json"
}
class SpellChecker ():
"SpellChecker: wrapper for the IBDAWG class"
def __init__ (self, sLangCode, sfMainDic="", sfCommunityDic="", sfPersonalDic=""):
|
| ︙ | ︙ |
Modified lex_build.py from [0d00b07703] to [1b2b5d0ea9].
1 2 3 4 5 6 7 8 9 10 11 12 13 | #!python3 """ Lexicon builder """ import argparse from distutils import dir_util import graphspell.dawg as fsa from graphspell.ibdawg import IBDAWG | | | | > | > | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
#!python3
"""
Lexicon builder
"""
import argparse
from distutils import dir_util
import graphspell.dawg as fsa
from graphspell.ibdawg import IBDAWG
def build (spfSrc, sLangCode, sLangName, sfDict, bJavaScript=False, sDicName="", sDescription="", sFilter="", cStemmingMethod="S", nCompressMethod=1):
"transform a text lexicon as a binary indexable dictionary"
oDAWG = fsa.DAWG(spfSrc, cStemmingMethod, sLangCode, sLangName, sDicName, sDescription, sFilter)
dir_util.mkpath("graphspell/_dictionaries")
#oDAWG.writeInfo("graphspell/_dictionaries/" + sfDict + ".info.txt")
#oDAWG.writeBinary("graphspell/_dictionaries/" + sfDict + ".bdic", int(nCompressMethod))
oDAWG.writeAsJSObject("graphspell/_dictionaries/" + sfDict + ".json")
if bJavaScript:
dir_util.mkpath("graphspell-js/_dictionaries")
oDAWG.writeAsJSObject("graphspell-js/_dictionaries/" + sfDict + ".json")
#oDic = IBDAWG(sfDict + ".bdic")
#oDic.writeAsJSObject("graphspell-js/_dictionaries/" + sfDict + ".json", bBinaryDictAsHexString=True)
def main ():
"parse args from CLI"
xParser = argparse.ArgumentParser()
xParser.add_argument("src_lexicon", type=str, help="path and file name of the source lexicon")
xParser.add_argument("lang_code", type=str, help="language code")
|
| ︙ | ︙ |
Modified make.py from [f59af684eb] to [a76be310e9].
| ︙ | ︙ | |||
313 314 315 316 317 318 319 |
dVars["dic_personal_filename_js"] = ""
lDict = [ ("main", s) for s in dVars['dic_filenames'].split(",") ]
if bCommunityDict:
lDict.append(("community", dVars['dic_community_filename']))
if bPersonalDict:
lDict.append(("personal", dVars['dic_personal_filename']))
for sType, sFileName in lDict:
| | | | | 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 |
dVars["dic_personal_filename_js"] = ""
lDict = [ ("main", s) for s in dVars['dic_filenames'].split(",") ]
if bCommunityDict:
lDict.append(("community", dVars['dic_community_filename']))
if bPersonalDict:
lDict.append(("personal", dVars['dic_personal_filename']))
for sType, sFileName in lDict:
spfPyDic = f"graphspell/_dictionaries/{sFileName}.json"
spfJSDic = f"graphspell-js/_dictionaries/{sFileName}.json"
if not os.path.isfile(spfPyDic) or (bJavaScript and not os.path.isfile(spfJSDic)):
buildDictionary(dVars, sType, bJavaScript)
print(" +", spfPyDic)
file_util.copy_file(spfPyDic, "grammalecte/graphspell/_dictionaries")
dVars['dic_'+sType+'_filename_py'] = sFileName + '.json'
if bJavaScript:
print(" +", spfJSDic)
file_util.copy_file(spfJSDic, "grammalecte-js/graphspell/_dictionaries")
dVars['dic_'+sType+'_filename_js'] = sFileName + '.json'
dVars['dic_main_filename_py'] = dVars['dic_default_filename_py'] + ".json"
dVars['dic_main_filename_js'] = dVars['dic_default_filename_js'] + ".json"
def buildDictionary (dVars, sType, bJavaScript=False):
"build binary dictionary for Graphspell from lexicons"
if sType == "main":
spfLexSrc = dVars['lexicon_src']
|
| ︙ | ︙ |
Modified reader.py from [66f5eb17ae] to [e2706fc6a2].
1 2 3 4 5 6 7 8 9 | #!python3 # Just a file for one-shot scripts import os import sys import re import graphspell.ibdawg as ibdawg | | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
#!python3
# Just a file for one-shot scripts
import os
import sys
import re
import graphspell.ibdawg as ibdawg
oDict = ibdawg.IBDAWG("fr-allvars.json")
def readFile (spf):
if os.path.isfile(spf):
with open(spf, "r", encoding="utf-8") as hSrc:
for sLine in hSrc:
yield sLine
else:
print("# Error: file not found.")
# --------------------------------------------------------------------------------------------------
def listUnknownWords (spf):
with open(spf+".res.txt", "w", encoding="utf-8") as hDst:
for sLine in readFile(spfSrc):
sLine = sLine.strip()
if sLine:
for sWord in sLine.split():
if not oDict.isValid(sWord):
hDst.write(sWord+"\n")
# --------------------------------------------------------------------------------------------------
def createLexStatFile (spf, dStat):
dWord = {}
for i, sLine in enumerate(readFile(spf)):
|
| ︙ | ︙ |