| Comment: | [build][graphspell][lo] dictionary: drop support for binary file -> use JSON | 
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive | 
| Timelines: | family | ancestors | descendants | both | build | lo | graphspell | dict2 | 
| Files: | files | file ages | folders | 
| SHA3-256: | 
05fb167483021d19c671911fede3595b | 
| User & Date: | olr on 2020-11-04 11:37:30 | 
| Other Links: | branch diff | manifest | tags | 
| 
   2020-11-04 
 | ||
| 12:02 | [graphspell] ibdawg: code cleaning, remove old code, useless compression versions check-in: 86250e8e6c user: olr tags: graphspell, dict2 | |
| 11:37 | [build][graphspell][lo] dictionary: drop support for binary file -> use JSON check-in: 05fb167483 user: olr tags: build, lo, graphspell, dict2 | |
| 
   2020-11-03 
 | ||
| 12:35 | [fr] ajustements check-in: 3cffdae3b0 user: olr tags: trunk, fr | |
Modified gc_lang/fr/build_data.py from [6e865955c0] to [3d9c0f4ca9].
| ︙ | ︙ | |||
48 49 50 51 52 53 54  | 
        raise OSError("# Error. File not found or not loadable: " + spf)
def loadDictionary ():
    global oDict
    if not oDict:
        try:
 | |  | 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62  | 
        raise OSError("# Error. File not found or not loadable: " + spf)
def loadDictionary ():
    global oDict
    if not oDict:
        try:
            oDict = ibdawg.IBDAWG("fr-allvars.json")
        except:
            traceback.print_exc()
def makeDictionaries (sp, sVersion):
    with cd(sp+"/dictionnaire"):
        if platform.system() == "Windows":
 | 
| ︙ | ︙ | 
Modified gc_lang/fr/oxt/ContextMenu/ContextMenu.py from [c17c3b29b1] to [f3255b533a].
| ︙ | ︙ | |||
131 132 133 134 135 136 137  | 
            if not oSpellChecker:
                xCurCtx = uno.getComponentContext()
                oGC = self.ctx.ServiceManager.createInstanceWithContext("org.openoffice.comp.pyuno.Lightproof.grammalecte", self.ctx)
                if hasattr(oGC, "getSpellChecker"):
                    # https://bugs.documentfoundation.org/show_bug.cgi?id=97790
                    oSpellChecker = oGC.getSpellChecker()
                else:
 | |  | 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145  | 
            if not oSpellChecker:
                xCurCtx = uno.getComponentContext()
                oGC = self.ctx.ServiceManager.createInstanceWithContext("org.openoffice.comp.pyuno.Lightproof.grammalecte", self.ctx)
                if hasattr(oGC, "getSpellChecker"):
                    # https://bugs.documentfoundation.org/show_bug.cgi?id=97790
                    oSpellChecker = oGC.getSpellChecker()
                else:
                    oSpellChecker = SpellChecker("${lang}", "fr-allvars.json")
        except:
            traceback.print_exc()
    def execute (self, args):
        if not args:
            return
        try:
 | 
| ︙ | ︙ | 
Modified gc_lang/fr/oxt/DictOptions/LexiconEditor.py from [828f4f365e] to [5ef5214006].
| ︙ | ︙ | |||
404 405 406 407 408 409 410  | 
    @_waitPointer
    def importDictionary (self):
        spfImported = ""
        try:
            xFilePicker = self.xSvMgr.createInstanceWithContext('com.sun.star.ui.dialogs.FilePicker', self.ctx)  # other possibility: com.sun.star.ui.dialogs.SystemFilePicker
            xFilePicker.initialize([uno.getConstantByName("com.sun.star.ui.dialogs.TemplateDescription.FILEOPEN_SIMPLE")]) # seems useless
 | |  | 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418  | 
    @_waitPointer
    def importDictionary (self):
        spfImported = ""
        try:
            xFilePicker = self.xSvMgr.createInstanceWithContext('com.sun.star.ui.dialogs.FilePicker', self.ctx)  # other possibility: com.sun.star.ui.dialogs.SystemFilePicker
            xFilePicker.initialize([uno.getConstantByName("com.sun.star.ui.dialogs.TemplateDescription.FILEOPEN_SIMPLE")]) # seems useless
            xFilePicker.appendFilter("Supported files", "*.json")
            xFilePicker.setDefaultName("fr.__personal__.json") # useless, doesn’t work
            xFilePicker.setDisplayDirectory("")
            xFilePicker.setMultiSelectionMode(False)
            nResult = xFilePicker.execute()
            if nResult == 1:
                # lFile = xFilePicker.getSelectedFiles()
                lFile = xFilePicker.getFiles()
 | 
| ︙ | ︙ | |||
459 460 461 462 463 464 465  | 
            self.xDateDic.Label = self.dUI.get("void", "#err")
        MessageBox(self.xDocument, self.dUI.get('save_message', "#err"), self.dUI.get('save_title', "#err"))
    def exportDictionary (self):
        try:
            xFilePicker = self.xSvMgr.createInstanceWithContext('com.sun.star.ui.dialogs.FilePicker', self.ctx)  # other possibility: com.sun.star.ui.dialogs.SystemFilePicker
            xFilePicker.initialize([uno.getConstantByName("com.sun.star.ui.dialogs.TemplateDescription.FILESAVE_SIMPLE")]) # seems useless
 | |  | 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473  | 
            self.xDateDic.Label = self.dUI.get("void", "#err")
        MessageBox(self.xDocument, self.dUI.get('save_message', "#err"), self.dUI.get('save_title', "#err"))
    def exportDictionary (self):
        try:
            xFilePicker = self.xSvMgr.createInstanceWithContext('com.sun.star.ui.dialogs.FilePicker', self.ctx)  # other possibility: com.sun.star.ui.dialogs.SystemFilePicker
            xFilePicker.initialize([uno.getConstantByName("com.sun.star.ui.dialogs.TemplateDescription.FILESAVE_SIMPLE")]) # seems useless
            xFilePicker.appendFilter("Supported files", "*.json")
            xFilePicker.setDefaultName("fr.__personal__.json") # useless, doesn’t work
            xFilePicker.setDisplayDirectory("")
            xFilePicker.setMultiSelectionMode(False)
            nResult = xFilePicker.execute()
            if nResult == 1:
                # lFile = xFilePicker.getSelectedFiles()
                lFile = xFilePicker.getFiles()
 | 
| ︙ | ︙ | 
Modified gc_lang/fr/oxt/DictOptions/SearchWords.py from [764a885065] to [2c4ada79ef].
| ︙ | ︙ | |||
182 183 184 185 186 187 188  | 
            elif xActionEvent.ActionCommand == "Close":
                self.xContainer.endExecute()
        except:
            traceback.print_exc()
    def initSpellChecker (self):
        if not self.oSpellChecker:
 | |  | 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196  | 
            elif xActionEvent.ActionCommand == "Close":
                self.xContainer.endExecute()
        except:
            traceback.print_exc()
    def initSpellChecker (self):
        if not self.oSpellChecker:
            self.oSpellChecker = sc.SpellChecker("fr", "fr-allvars.json", "", self.oPersonalDicJSON)
    @_waitPointer
    def searchSimilar (self):
        self.initSpellChecker()
        sWord = self.xWord.Text.strip()
        if sWord:
            xGridDataModel = self.xGridModel.GridDataModel
 | 
| ︙ | ︙ | 
Modified gc_lang/fr/oxt/Graphspell.py from [810dc52bd8] to [76770ac233].
| ︙ | ︙ | |||
65 66 67 68 69 70 71  | 
                sPersonalDicJSON = self.xOptionNode.getPropertyValue("personal_dic")
                if sPersonalDicJSON:
                    try:
                        personal_dic = json.loads(sPersonalDicJSON)
                    except:
                        print("Graphspell: wrong personal_dic")
                        traceback.print_exc()
 | |  | 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79  | 
                sPersonalDicJSON = self.xOptionNode.getPropertyValue("personal_dic")
                if sPersonalDicJSON:
                    try:
                        personal_dic = json.loads(sPersonalDicJSON)
                    except:
                        print("Graphspell: wrong personal_dic")
                        traceback.print_exc()
            self.oGraphspell = SpellChecker("fr", "fr-"+sMainDicName+".json", "", personal_dic)
            self.loadHunspell()
            # print("Graphspell: init done")
        except:
            print("Graphspell: init failed")
            traceback.print_exc()
    def loadHunspell (self):
 | 
| ︙ | ︙ | 
Modified gc_lang/fr/setup.py from [8a0db0631b] to [955f5741fe].
| ︙ | ︙ | |||
89 90 91 92 93 94 95  | 
    #     'test': ['coverage'],
    # },
    # If there are data files included in your packages that need to be
    # installed, specify them here.  If using Python 2.6 or less, then these
    # have to be included in MANIFEST.in as well.
    package_data={
 | |  | 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103  | 
    #     'test': ['coverage'],
    # },
    # If there are data files included in your packages that need to be
    # installed, specify them here.  If using Python 2.6 or less, then these
    # have to be included in MANIFEST.in as well.
    package_data={
        'grammalecte': ['graphspell/_dictionaries/*.json', '*.txt']
    },
    # Although 'package_data' is the preferred approach, in some case you may
    # need to place data files outside of your packages. See:
    # http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa
    # In this case, 'data_file' will be installed into '<sys.prefix>/my_data'
    # data_files=[('my_data', ['data/data_file'])],
 | 
| ︙ | ︙ | 
Modified graphspell/dawg.py from [b60434a390] to [c083d6a347].
| ︙ | ︙ | |||
474 475 476 477 478 479 480  | 
            # Mozilla’s JS parser don’t like file bigger than 4 Mb!
            # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension.
            # https://github.com/mozilla/addons-linter/issues/1361
            "sByDic": byDic.hex()  if bBinaryDictAsHexString  else [ e  for e in byDic ],
            "l2grams": list(self.a2grams)
        }
 | |  | 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488  | 
            # Mozilla’s JS parser don’t like file bigger than 4 Mb!
            # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension.
            # https://github.com/mozilla/addons-linter/issues/1361
            "sByDic": byDic.hex()  if bBinaryDictAsHexString  else [ e  for e in byDic ],
            "l2grams": list(self.a2grams)
        }
    def writeAsJSObject (self, spfDst, nCompressionMethod=1, bInJSModule=False, bBinaryDictAsHexString=True):
        "write a file (JSON or JS module) with all the necessary data"
        if not spfDst.endswith(".json"):
            spfDst += "."+str(nCompressionMethod)+".json"
        with open(spfDst, "w", encoding="utf-8", newline="\n") as hDst:
            if bInJSModule:
                hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ')
            hDst.write( json.dumps(self.getBinaryAsJSON(nCompressionMethod, bBinaryDictAsHexString), ensure_ascii=False) )
 | 
| ︙ | ︙ | 
Modified graphspell/spellchecker.py from [2bdbe76996] to [9b47d651ea].
| ︙ | ︙ | |||
12 13 14 15 16 17 18  | 
import traceback
from . import ibdawg
from . import tokenizer
dDefaultDictionaries = {
 | | |  | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27  | 
import traceback
from . import ibdawg
from . import tokenizer
dDefaultDictionaries = {
    "fr": "fr-allvars.json",
    "en": "en.json"
}
class SpellChecker ():
    "SpellChecker: wrapper for the IBDAWG class"
    def __init__ (self, sLangCode, sfMainDic="", sfCommunityDic="", sfPersonalDic=""):
 | 
| ︙ | ︙ | 
Modified lex_build.py from [0d00b07703] to [1b2b5d0ea9].
1 2 3 4 5 6 7 8 9 10 11 12 13  | #!python3 """ Lexicon builder """ import argparse from distutils import dir_util import graphspell.dawg as fsa from graphspell.ibdawg import IBDAWG  | | | | > | > | |  | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32  | 
#!python3
"""
Lexicon builder
"""
import argparse
from distutils import dir_util
import graphspell.dawg as fsa
from graphspell.ibdawg import IBDAWG
def build (spfSrc, sLangCode, sLangName, sfDict, bJavaScript=False, sDicName="", sDescription="", sFilter="", cStemmingMethod="S", nCompressMethod=1):
    "transform a text lexicon as a binary indexable dictionary"
    oDAWG = fsa.DAWG(spfSrc, cStemmingMethod, sLangCode, sLangName, sDicName, sDescription, sFilter)
    dir_util.mkpath("graphspell/_dictionaries")
    #oDAWG.writeInfo("graphspell/_dictionaries/" + sfDict + ".info.txt")
    #oDAWG.writeBinary("graphspell/_dictionaries/" + sfDict + ".bdic", int(nCompressMethod))
    oDAWG.writeAsJSObject("graphspell/_dictionaries/" + sfDict + ".json")
    if bJavaScript:
        dir_util.mkpath("graphspell-js/_dictionaries")
        oDAWG.writeAsJSObject("graphspell-js/_dictionaries/" + sfDict + ".json")
        #oDic = IBDAWG(sfDict + ".bdic")
        #oDic.writeAsJSObject("graphspell-js/_dictionaries/" + sfDict + ".json", bBinaryDictAsHexString=True)
def main ():
    "parse args from CLI"
    xParser = argparse.ArgumentParser()
    xParser.add_argument("src_lexicon", type=str, help="path and file name of the source lexicon")
    xParser.add_argument("lang_code", type=str, help="language code")
 | 
| ︙ | ︙ | 
Modified make.py from [f59af684eb] to [a76be310e9].
| ︙ | ︙ | |||
313 314 315 316 317 318 319  | 
    dVars["dic_personal_filename_js"] = ""
    lDict = [ ("main", s)  for s in dVars['dic_filenames'].split(",") ]
    if bCommunityDict:
        lDict.append(("community", dVars['dic_community_filename']))
    if bPersonalDict:
        lDict.append(("personal", dVars['dic_personal_filename']))
    for sType, sFileName in lDict:
 | | | |  | 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338  | 
    dVars["dic_personal_filename_js"] = ""
    lDict = [ ("main", s)  for s in dVars['dic_filenames'].split(",") ]
    if bCommunityDict:
        lDict.append(("community", dVars['dic_community_filename']))
    if bPersonalDict:
        lDict.append(("personal", dVars['dic_personal_filename']))
    for sType, sFileName in lDict:
        spfPyDic = f"graphspell/_dictionaries/{sFileName}.json"
        spfJSDic = f"graphspell-js/_dictionaries/{sFileName}.json"
        if not os.path.isfile(spfPyDic) or (bJavaScript and not os.path.isfile(spfJSDic)):
            buildDictionary(dVars, sType, bJavaScript)
        print("  +", spfPyDic)
        file_util.copy_file(spfPyDic, "grammalecte/graphspell/_dictionaries")
        dVars['dic_'+sType+'_filename_py'] = sFileName + '.json'
        if bJavaScript:
            print("  +", spfJSDic)
            file_util.copy_file(spfJSDic, "grammalecte-js/graphspell/_dictionaries")
            dVars['dic_'+sType+'_filename_js'] = sFileName + '.json'
    dVars['dic_main_filename_py'] = dVars['dic_default_filename_py'] + ".json"
    dVars['dic_main_filename_js'] = dVars['dic_default_filename_js'] + ".json"
def buildDictionary (dVars, sType, bJavaScript=False):
    "build binary dictionary for Graphspell from lexicons"
    if sType == "main":
        spfLexSrc = dVars['lexicon_src']
 | 
| ︙ | ︙ | 
Modified reader.py from [66f5eb17ae] to [e2706fc6a2].
1 2 3 4 5 6 7 8 9  | #!python3 # Just a file for one-shot scripts import os import sys import re import graphspell.ibdawg as ibdawg  | | |  | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36  | 
#!python3
# Just a file for one-shot scripts
import os
import sys
import re
import graphspell.ibdawg as ibdawg
oDict = ibdawg.IBDAWG("fr-allvars.json")
def readFile (spf):
    if os.path.isfile(spf):
        with open(spf, "r", encoding="utf-8") as hSrc:
            for sLine in hSrc:
                yield sLine
    else:
        print("# Error: file not found.")
# --------------------------------------------------------------------------------------------------
def listUnknownWords (spf):
    with open(spf+".res.txt", "w", encoding="utf-8") as hDst:
        for sLine in readFile(spfSrc):
            sLine = sLine.strip()
            if sLine:
                for sWord in sLine.split():
                    if not oDict.isValid(sWord):
                        hDst.write(sWord+"\n")
# --------------------------------------------------------------------------------------------------
def createLexStatFile (spf, dStat):
    dWord = {}
    for i, sLine in enumerate(readFile(spf)):
 | 
| ︙ | ︙ |