Comment: | [build][graphspell][lo] dictionary: drop support for binary file -> use JSON |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | build | lo | graphspell | dict2 |
Files: | files | file ages | folders |
SHA3-256: |
05fb167483021d19c671911fede3595b |
User & Date: | olr on 2020-11-04 11:37:30 |
Other Links: | branch diff | manifest | tags |
2020-11-04
| ||
12:02 | [graphspell] ibdawg: code cleaning, remove old code, useless compression versions check-in: 86250e8e6c user: olr tags: dict2, graphspell | |
11:37 | [build][graphspell][lo] dictionary: drop support for binary file -> use JSON check-in: 05fb167483 user: olr tags: build, dict2, graphspell, lo | |
2020-11-03
| ||
12:35 | [fr] ajustements check-in: 3cffdae3b0 user: olr tags: fr, trunk | |
Modified gc_lang/fr/build_data.py from [6e865955c0] to [3d9c0f4ca9].
︙ | ︙ | |||
48 49 50 51 52 53 54 | raise OSError("# Error. File not found or not loadable: " + spf) def loadDictionary (): global oDict if not oDict: try: | | | 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | raise OSError("# Error. File not found or not loadable: " + spf) def loadDictionary (): global oDict if not oDict: try: oDict = ibdawg.IBDAWG("fr-allvars.json") except: traceback.print_exc() def makeDictionaries (sp, sVersion): with cd(sp+"/dictionnaire"): if platform.system() == "Windows": |
︙ | ︙ |
Modified gc_lang/fr/oxt/ContextMenu/ContextMenu.py from [c17c3b29b1] to [f3255b533a].
︙ | ︙ | |||
131 132 133 134 135 136 137 | if not oSpellChecker: xCurCtx = uno.getComponentContext() oGC = self.ctx.ServiceManager.createInstanceWithContext("org.openoffice.comp.pyuno.Lightproof.grammalecte", self.ctx) if hasattr(oGC, "getSpellChecker"): # https://bugs.documentfoundation.org/show_bug.cgi?id=97790 oSpellChecker = oGC.getSpellChecker() else: | | | 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | if not oSpellChecker: xCurCtx = uno.getComponentContext() oGC = self.ctx.ServiceManager.createInstanceWithContext("org.openoffice.comp.pyuno.Lightproof.grammalecte", self.ctx) if hasattr(oGC, "getSpellChecker"): # https://bugs.documentfoundation.org/show_bug.cgi?id=97790 oSpellChecker = oGC.getSpellChecker() else: oSpellChecker = SpellChecker("${lang}", "fr-allvars.json") except: traceback.print_exc() def execute (self, args): if not args: return try: |
︙ | ︙ |
Modified gc_lang/fr/oxt/DictOptions/LexiconEditor.py from [828f4f365e] to [5ef5214006].
︙ | ︙ | |||
404 405 406 407 408 409 410 | @_waitPointer def importDictionary (self): spfImported = "" try: xFilePicker = self.xSvMgr.createInstanceWithContext('com.sun.star.ui.dialogs.FilePicker', self.ctx) # other possibility: com.sun.star.ui.dialogs.SystemFilePicker xFilePicker.initialize([uno.getConstantByName("com.sun.star.ui.dialogs.TemplateDescription.FILEOPEN_SIMPLE")]) # seems useless | | | 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 | @_waitPointer def importDictionary (self): spfImported = "" try: xFilePicker = self.xSvMgr.createInstanceWithContext('com.sun.star.ui.dialogs.FilePicker', self.ctx) # other possibility: com.sun.star.ui.dialogs.SystemFilePicker xFilePicker.initialize([uno.getConstantByName("com.sun.star.ui.dialogs.TemplateDescription.FILEOPEN_SIMPLE")]) # seems useless xFilePicker.appendFilter("Supported files", "*.json") xFilePicker.setDefaultName("fr.__personal__.json") # useless, doesn’t work xFilePicker.setDisplayDirectory("") xFilePicker.setMultiSelectionMode(False) nResult = xFilePicker.execute() if nResult == 1: # lFile = xFilePicker.getSelectedFiles() lFile = xFilePicker.getFiles() |
︙ | ︙ | |||
459 460 461 462 463 464 465 | self.xDateDic.Label = self.dUI.get("void", "#err") MessageBox(self.xDocument, self.dUI.get('save_message', "#err"), self.dUI.get('save_title', "#err")) def exportDictionary (self): try: xFilePicker = self.xSvMgr.createInstanceWithContext('com.sun.star.ui.dialogs.FilePicker', self.ctx) # other possibility: com.sun.star.ui.dialogs.SystemFilePicker xFilePicker.initialize([uno.getConstantByName("com.sun.star.ui.dialogs.TemplateDescription.FILESAVE_SIMPLE")]) # seems useless | | | 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 | self.xDateDic.Label = self.dUI.get("void", "#err") MessageBox(self.xDocument, self.dUI.get('save_message', "#err"), self.dUI.get('save_title', "#err")) def exportDictionary (self): try: xFilePicker = self.xSvMgr.createInstanceWithContext('com.sun.star.ui.dialogs.FilePicker', self.ctx) # other possibility: com.sun.star.ui.dialogs.SystemFilePicker xFilePicker.initialize([uno.getConstantByName("com.sun.star.ui.dialogs.TemplateDescription.FILESAVE_SIMPLE")]) # seems useless xFilePicker.appendFilter("Supported files", "*.json") xFilePicker.setDefaultName("fr.__personal__.json") # useless, doesn’t work xFilePicker.setDisplayDirectory("") xFilePicker.setMultiSelectionMode(False) nResult = xFilePicker.execute() if nResult == 1: # lFile = xFilePicker.getSelectedFiles() lFile = xFilePicker.getFiles() |
︙ | ︙ |
Modified gc_lang/fr/oxt/DictOptions/SearchWords.py from [764a885065] to [2c4ada79ef].
︙ | ︙ | |||
182 183 184 185 186 187 188 | elif xActionEvent.ActionCommand == "Close": self.xContainer.endExecute() except: traceback.print_exc() def initSpellChecker (self): if not self.oSpellChecker: | | | 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 | elif xActionEvent.ActionCommand == "Close": self.xContainer.endExecute() except: traceback.print_exc() def initSpellChecker (self): if not self.oSpellChecker: self.oSpellChecker = sc.SpellChecker("fr", "fr-allvars.json", "", self.oPersonalDicJSON) @_waitPointer def searchSimilar (self): self.initSpellChecker() sWord = self.xWord.Text.strip() if sWord: xGridDataModel = self.xGridModel.GridDataModel |
︙ | ︙ |
Modified gc_lang/fr/oxt/Graphspell.py from [810dc52bd8] to [76770ac233].
︙ | ︙ | |||
65 66 67 68 69 70 71 | sPersonalDicJSON = self.xOptionNode.getPropertyValue("personal_dic") if sPersonalDicJSON: try: personal_dic = json.loads(sPersonalDicJSON) except: print("Graphspell: wrong personal_dic") traceback.print_exc() | | | 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | sPersonalDicJSON = self.xOptionNode.getPropertyValue("personal_dic") if sPersonalDicJSON: try: personal_dic = json.loads(sPersonalDicJSON) except: print("Graphspell: wrong personal_dic") traceback.print_exc() self.oGraphspell = SpellChecker("fr", "fr-"+sMainDicName+".json", "", personal_dic) self.loadHunspell() # print("Graphspell: init done") except: print("Graphspell: init failed") traceback.print_exc() def loadHunspell (self): |
︙ | ︙ |
Modified gc_lang/fr/setup.py from [8a0db0631b] to [955f5741fe].
︙ | ︙ | |||
89 90 91 92 93 94 95 | # 'test': ['coverage'], # }, # If there are data files included in your packages that need to be # installed, specify them here. If using Python 2.6 or less, then these # have to be included in MANIFEST.in as well. package_data={ | | | 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | # 'test': ['coverage'], # }, # If there are data files included in your packages that need to be # installed, specify them here. If using Python 2.6 or less, then these # have to be included in MANIFEST.in as well. package_data={ 'grammalecte': ['graphspell/_dictionaries/*.json', '*.txt'] }, # Although 'package_data' is the preferred approach, in some case you may # need to place data files outside of your packages. See: # http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa # In this case, 'data_file' will be installed into '<sys.prefix>/my_data' # data_files=[('my_data', ['data/data_file'])], |
︙ | ︙ |
Modified graphspell/dawg.py from [b60434a390] to [c083d6a347].
︙ | ︙ | |||
474 475 476 477 478 479 480 | # Mozilla’s JS parser don’t like file bigger than 4 Mb! # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension. # https://github.com/mozilla/addons-linter/issues/1361 "sByDic": byDic.hex() if bBinaryDictAsHexString else [ e for e in byDic ], "l2grams": list(self.a2grams) } | | | 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 | # Mozilla’s JS parser don’t like file bigger than 4 Mb! # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension. # https://github.com/mozilla/addons-linter/issues/1361 "sByDic": byDic.hex() if bBinaryDictAsHexString else [ e for e in byDic ], "l2grams": list(self.a2grams) } def writeAsJSObject (self, spfDst, nCompressionMethod=1, bInJSModule=False, bBinaryDictAsHexString=True): "write a file (JSON or JS module) with all the necessary data" if not spfDst.endswith(".json"): spfDst += "."+str(nCompressionMethod)+".json" with open(spfDst, "w", encoding="utf-8", newline="\n") as hDst: if bInJSModule: hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ') hDst.write( json.dumps(self.getBinaryAsJSON(nCompressionMethod, bBinaryDictAsHexString), ensure_ascii=False) ) |
︙ | ︙ |
Modified graphspell/spellchecker.py from [2bdbe76996] to [9b47d651ea].
︙ | ︙ | |||
12 13 14 15 16 17 18 | import traceback from . import ibdawg from . import tokenizer dDefaultDictionaries = { | | | | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | import traceback from . import ibdawg from . import tokenizer dDefaultDictionaries = { "fr": "fr-allvars.json", "en": "en.json" } class SpellChecker (): "SpellChecker: wrapper for the IBDAWG class" def __init__ (self, sLangCode, sfMainDic="", sfCommunityDic="", sfPersonalDic=""): |
︙ | ︙ |
Modified lex_build.py from [0d00b07703] to [1b2b5d0ea9].
1 2 3 4 5 6 7 8 9 10 11 12 13 | #!python3 """ Lexicon builder """ import argparse from distutils import dir_util import graphspell.dawg as fsa from graphspell.ibdawg import IBDAWG | | | | > | > | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | #!python3 """ Lexicon builder """ import argparse from distutils import dir_util import graphspell.dawg as fsa from graphspell.ibdawg import IBDAWG def build (spfSrc, sLangCode, sLangName, sfDict, bJavaScript=False, sDicName="", sDescription="", sFilter="", cStemmingMethod="S", nCompressMethod=1): "transform a text lexicon as a binary indexable dictionary" oDAWG = fsa.DAWG(spfSrc, cStemmingMethod, sLangCode, sLangName, sDicName, sDescription, sFilter) dir_util.mkpath("graphspell/_dictionaries") #oDAWG.writeInfo("graphspell/_dictionaries/" + sfDict + ".info.txt") #oDAWG.writeBinary("graphspell/_dictionaries/" + sfDict + ".bdic", int(nCompressMethod)) oDAWG.writeAsJSObject("graphspell/_dictionaries/" + sfDict + ".json") if bJavaScript: dir_util.mkpath("graphspell-js/_dictionaries") oDAWG.writeAsJSObject("graphspell-js/_dictionaries/" + sfDict + ".json") #oDic = IBDAWG(sfDict + ".bdic") #oDic.writeAsJSObject("graphspell-js/_dictionaries/" + sfDict + ".json", bBinaryDictAsHexString=True) def main (): "parse args from CLI" xParser = argparse.ArgumentParser() xParser.add_argument("src_lexicon", type=str, help="path and file name of the source lexicon") xParser.add_argument("lang_code", type=str, help="language code") |
︙ | ︙ |
Modified make.py from [f59af684eb] to [a76be310e9].
︙ | ︙ | |||
313 314 315 316 317 318 319 | dVars["dic_personal_filename_js"] = "" lDict = [ ("main", s) for s in dVars['dic_filenames'].split(",") ] if bCommunityDict: lDict.append(("community", dVars['dic_community_filename'])) if bPersonalDict: lDict.append(("personal", dVars['dic_personal_filename'])) for sType, sFileName in lDict: | | | | | 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 | dVars["dic_personal_filename_js"] = "" lDict = [ ("main", s) for s in dVars['dic_filenames'].split(",") ] if bCommunityDict: lDict.append(("community", dVars['dic_community_filename'])) if bPersonalDict: lDict.append(("personal", dVars['dic_personal_filename'])) for sType, sFileName in lDict: spfPyDic = f"graphspell/_dictionaries/{sFileName}.json" spfJSDic = f"graphspell-js/_dictionaries/{sFileName}.json" if not os.path.isfile(spfPyDic) or (bJavaScript and not os.path.isfile(spfJSDic)): buildDictionary(dVars, sType, bJavaScript) print(" +", spfPyDic) file_util.copy_file(spfPyDic, "grammalecte/graphspell/_dictionaries") dVars['dic_'+sType+'_filename_py'] = sFileName + '.json' if bJavaScript: print(" +", spfJSDic) file_util.copy_file(spfJSDic, "grammalecte-js/graphspell/_dictionaries") dVars['dic_'+sType+'_filename_js'] = sFileName + '.json' dVars['dic_main_filename_py'] = dVars['dic_default_filename_py'] + ".json" dVars['dic_main_filename_js'] = dVars['dic_default_filename_js'] + ".json" def buildDictionary (dVars, sType, bJavaScript=False): "build binary dictionary for Graphspell from lexicons" if sType == "main": spfLexSrc = dVars['lexicon_src'] |
︙ | ︙ |
Modified reader.py from [66f5eb17ae] to [e2706fc6a2].
1 2 3 4 5 6 7 8 9 | #!python3 # Just a file for one-shot scripts import os import sys import re import graphspell.ibdawg as ibdawg | | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | #!python3 # Just a file for one-shot scripts import os import sys import re import graphspell.ibdawg as ibdawg oDict = ibdawg.IBDAWG("fr-allvars.json") def readFile (spf): if os.path.isfile(spf): with open(spf, "r", encoding="utf-8") as hSrc: for sLine in hSrc: yield sLine else: print("# Error: file not found.") # -------------------------------------------------------------------------------------------------- def listUnknownWords (spf): with open(spf+".res.txt", "w", encoding="utf-8") as hDst: for sLine in readFile(spfSrc): sLine = sLine.strip() if sLine: for sWord in sLine.split(): if not oDict.isValid(sWord): hDst.write(sWord+"\n") # -------------------------------------------------------------------------------------------------- def createLexStatFile (spf, dStat): dWord = {} for i, sLine in enumerate(readFile(spf)): |
︙ | ︙ |