Changes In Branch dict2 Through [866ec22f7d] Excluding Merge-Ins
This is equivalent to a diff from 3cffdae3b0 to 866ec22f7d
2020-11-05
| ||
12:14 | [fr] ajustements check-in: 86d8777431 user: olr tags: trunk, fr | |
2020-11-04
| ||
12:37 | [graphspell][py] remove duplicate method check-in: a3980c3ca4 user: olr tags: graphspell, dict2 | |
12:21 | [graphspell][py] ibdawg: remove binary dict support check-in: 866ec22f7d user: olr tags: graphspell, dict2 | |
12:02 | [graphspell] ibdawg: code cleaning, remove old code, useless compression versions check-in: 86250e8e6c user: olr tags: graphspell, dict2 | |
11:37 | [build][graphspell][lo] dictionary: drop support for binary file -> use JSON check-in: 05fb167483 user: olr tags: build, lo, graphspell, dict2 | |
2020-11-03
| ||
12:35 | [fr] ajustements check-in: 3cffdae3b0 user: olr tags: trunk, fr | |
2020-11-02
| ||
15:07 | [lo] launch lexical editor from context menu check-in: 616a21e49f user: olr tags: trunk, lo | |
Modified gc_lang/fr/build_data.py from [6e865955c0] to [3d9c0f4ca9].
︙ | ︙ | |||
48 49 50 51 52 53 54 | raise OSError("# Error. File not found or not loadable: " + spf) def loadDictionary (): global oDict if not oDict: try: | | | 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | raise OSError("# Error. File not found or not loadable: " + spf) def loadDictionary (): global oDict if not oDict: try: oDict = ibdawg.IBDAWG("fr-allvars.json") except: traceback.print_exc() def makeDictionaries (sp, sVersion): with cd(sp+"/dictionnaire"): if platform.system() == "Windows": |
︙ | ︙ |
Modified gc_lang/fr/oxt/ContextMenu/ContextMenu.py from [c17c3b29b1] to [f3255b533a].
︙ | ︙ | |||
131 132 133 134 135 136 137 | if not oSpellChecker: xCurCtx = uno.getComponentContext() oGC = self.ctx.ServiceManager.createInstanceWithContext("org.openoffice.comp.pyuno.Lightproof.grammalecte", self.ctx) if hasattr(oGC, "getSpellChecker"): # https://bugs.documentfoundation.org/show_bug.cgi?id=97790 oSpellChecker = oGC.getSpellChecker() else: | | | 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | if not oSpellChecker: xCurCtx = uno.getComponentContext() oGC = self.ctx.ServiceManager.createInstanceWithContext("org.openoffice.comp.pyuno.Lightproof.grammalecte", self.ctx) if hasattr(oGC, "getSpellChecker"): # https://bugs.documentfoundation.org/show_bug.cgi?id=97790 oSpellChecker = oGC.getSpellChecker() else: oSpellChecker = SpellChecker("${lang}", "fr-allvars.json") except: traceback.print_exc() def execute (self, args): if not args: return try: |
︙ | ︙ |
Modified gc_lang/fr/oxt/DictOptions/LexiconEditor.py from [828f4f365e] to [5ef5214006].
︙ | ︙ | |||
404 405 406 407 408 409 410 | @_waitPointer def importDictionary (self): spfImported = "" try: xFilePicker = self.xSvMgr.createInstanceWithContext('com.sun.star.ui.dialogs.FilePicker', self.ctx) # other possibility: com.sun.star.ui.dialogs.SystemFilePicker xFilePicker.initialize([uno.getConstantByName("com.sun.star.ui.dialogs.TemplateDescription.FILEOPEN_SIMPLE")]) # seems useless | | | 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 | @_waitPointer def importDictionary (self): spfImported = "" try: xFilePicker = self.xSvMgr.createInstanceWithContext('com.sun.star.ui.dialogs.FilePicker', self.ctx) # other possibility: com.sun.star.ui.dialogs.SystemFilePicker xFilePicker.initialize([uno.getConstantByName("com.sun.star.ui.dialogs.TemplateDescription.FILEOPEN_SIMPLE")]) # seems useless xFilePicker.appendFilter("Supported files", "*.json") xFilePicker.setDefaultName("fr.__personal__.json") # useless, doesn’t work xFilePicker.setDisplayDirectory("") xFilePicker.setMultiSelectionMode(False) nResult = xFilePicker.execute() if nResult == 1: # lFile = xFilePicker.getSelectedFiles() lFile = xFilePicker.getFiles() |
︙ | ︙ | |||
459 460 461 462 463 464 465 | self.xDateDic.Label = self.dUI.get("void", "#err") MessageBox(self.xDocument, self.dUI.get('save_message', "#err"), self.dUI.get('save_title', "#err")) def exportDictionary (self): try: xFilePicker = self.xSvMgr.createInstanceWithContext('com.sun.star.ui.dialogs.FilePicker', self.ctx) # other possibility: com.sun.star.ui.dialogs.SystemFilePicker xFilePicker.initialize([uno.getConstantByName("com.sun.star.ui.dialogs.TemplateDescription.FILESAVE_SIMPLE")]) # seems useless | | | 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 | self.xDateDic.Label = self.dUI.get("void", "#err") MessageBox(self.xDocument, self.dUI.get('save_message', "#err"), self.dUI.get('save_title', "#err")) def exportDictionary (self): try: xFilePicker = self.xSvMgr.createInstanceWithContext('com.sun.star.ui.dialogs.FilePicker', self.ctx) # other possibility: com.sun.star.ui.dialogs.SystemFilePicker xFilePicker.initialize([uno.getConstantByName("com.sun.star.ui.dialogs.TemplateDescription.FILESAVE_SIMPLE")]) # seems useless xFilePicker.appendFilter("Supported files", "*.json") xFilePicker.setDefaultName("fr.__personal__.json") # useless, doesn’t work xFilePicker.setDisplayDirectory("") xFilePicker.setMultiSelectionMode(False) nResult = xFilePicker.execute() if nResult == 1: # lFile = xFilePicker.getSelectedFiles() lFile = xFilePicker.getFiles() |
︙ | ︙ |
Modified gc_lang/fr/oxt/DictOptions/SearchWords.py from [764a885065] to [2c4ada79ef].
︙ | ︙ | |||
182 183 184 185 186 187 188 | elif xActionEvent.ActionCommand == "Close": self.xContainer.endExecute() except: traceback.print_exc() def initSpellChecker (self): if not self.oSpellChecker: | | | 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 | elif xActionEvent.ActionCommand == "Close": self.xContainer.endExecute() except: traceback.print_exc() def initSpellChecker (self): if not self.oSpellChecker: self.oSpellChecker = sc.SpellChecker("fr", "fr-allvars.json", "", self.oPersonalDicJSON) @_waitPointer def searchSimilar (self): self.initSpellChecker() sWord = self.xWord.Text.strip() if sWord: xGridDataModel = self.xGridModel.GridDataModel |
︙ | ︙ |
Modified gc_lang/fr/oxt/Graphspell.py from [810dc52bd8] to [76770ac233].
︙ | ︙ | |||
65 66 67 68 69 70 71 | sPersonalDicJSON = self.xOptionNode.getPropertyValue("personal_dic") if sPersonalDicJSON: try: personal_dic = json.loads(sPersonalDicJSON) except: print("Graphspell: wrong personal_dic") traceback.print_exc() | | | 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | sPersonalDicJSON = self.xOptionNode.getPropertyValue("personal_dic") if sPersonalDicJSON: try: personal_dic = json.loads(sPersonalDicJSON) except: print("Graphspell: wrong personal_dic") traceback.print_exc() self.oGraphspell = SpellChecker("fr", "fr-"+sMainDicName+".json", "", personal_dic) self.loadHunspell() # print("Graphspell: init done") except: print("Graphspell: init failed") traceback.print_exc() def loadHunspell (self): |
︙ | ︙ |
Modified gc_lang/fr/setup.py from [8a0db0631b] to [955f5741fe].
︙ | ︙ | |||
89 90 91 92 93 94 95 | # 'test': ['coverage'], # }, # If there are data files included in your packages that need to be # installed, specify them here. If using Python 2.6 or less, then these # have to be included in MANIFEST.in as well. package_data={ | | | 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | # 'test': ['coverage'], # }, # If there are data files included in your packages that need to be # installed, specify them here. If using Python 2.6 or less, then these # have to be included in MANIFEST.in as well. package_data={ 'grammalecte': ['graphspell/_dictionaries/*.json', '*.txt'] }, # Although 'package_data' is the preferred approach, in some case you may # need to place data files outside of your packages. See: # http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa # In this case, 'data_file' will be installed into '<sys.prefix>/my_data' # data_files=[('my_data', ['data/data_file'])], |
︙ | ︙ |
Modified graphspell-js/dawg.js from [525275df92] to [bb21108d9e].
︙ | ︙ | |||
340 341 342 343 344 345 346 | } } } } } // BINARY CONVERSION | | | < | | | | < < < | | < < | | | < | 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 | } } } } } // BINARY CONVERSION createBinaryJSON (nCompressionMethod=1) { console.log("Write DAWG as an indexable binary dictionary"); this.nBytesArc = Math.floor( (this.nArcVal.toString(2).length + 2) / 8 ) + 1; // We add 2 bits. See DawgNode.convToBytes() this.nBytesOffset = 0; this._calcNumBytesNodeAddress(); this._calcNodesAddress(); console.log("Arc values (chars, affixes and tags): " + this.nArcVal); console.log("Arc size: "+this.nBytesArc+" bytes, Address size: "+this.nBytesNodeAddress+" bytes"); console.log("-> " + this.nBytesArc+this.nBytesNodeAddress + " * " + this.nArc + " = " + (this.nBytesArc+this.nBytesNodeAddress)*this.nArc + " bytes"); return this._createJSON(nCompressionMethod); } _calcNumBytesNodeAddress () { // how many bytes needed to store all nodes/arcs in the binary dictionary this.nBytesNodeAddress = 1; while (((this.nBytesArc + this.nBytesNodeAddress) * this.nArc) > (2 ** (this.nBytesNodeAddress * 8))) { this.nBytesNodeAddress += 1; } } _calcNodesAddress () { let nBytesNode = this.nBytesArc + this.nBytesNodeAddress; let iAddr = this.oRoot.arcs.size * nBytesNode; for (let oNode of this.dMinimizedNodes.values()) { oNode.addr = iAddr; iAddr += Math.max(oNode.arcs.size, 1) * nBytesNode; } } _createJSON (nCompressionMethod=1) { let sByDic = this.oRoot.convToBytes(this.nBytesArc, this.nBytesNodeAddress); for (let oNode of this.dMinimizedNodes.values()) { sByDic += oNode.convToBytes(this.nBytesArc, this.nBytesNodeAddress); } let oJSON = { "sHeader": "/grammalecte-fsa/", "sLangCode": this.sLangCode, "sLangName": this.sLangName, "sDicName": this.sDicName, "sDescription": this.sDescription, |
︙ | ︙ | |||
492 493 494 495 496 497 498 | for (let oNode of this.arcs.values()) { oNode.display(nTab+1, lArcVal, bRecur); } } } // VERSION 1 ===================================================================================================== | | | 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 | for (let oNode of this.arcs.values()) { oNode.display(nTab+1, lArcVal, bRecur); } } } // VERSION 1 ===================================================================================================== convToBytes (nBytesArc, nBytesNodeAddress) { /* Node scheme: - Arc length is defined by nBytesArc - Address length is defined by nBytesNodeAddress | Arc | Address of next node | | | | |
︙ | ︙ |
Modified graphspell/dawg.py from [b60434a390] to [8c6420c0dc].
︙ | ︙ | |||
354 355 356 357 358 359 360 | sEntry = sWord + "\t" + self.funcStemming(sWord, self.lArcVal[nVal]) for nMorphVal, _ in oNextNode.arcs.items(): if not zPattern or zPattern.search(self.lArcVal[nMorphVal]): yield sEntry + "\t" + self.lArcVal[nMorphVal] # BINARY CONVERSION | | | < | | | | < < < < < < < < < < < < < | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | | | < < < < < < < < | 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 | sEntry = sWord + "\t" + self.funcStemming(sWord, self.lArcVal[nVal]) for nMorphVal, _ in oNextNode.arcs.items(): if not zPattern or zPattern.search(self.lArcVal[nMorphVal]): yield sEntry + "\t" + self.lArcVal[nMorphVal] # BINARY CONVERSION def _calculateBinary (self, nCompressionMethod=1): print(" > Write DAWG as an indexable binary dictionary") self.nBytesArc = ( (self.nArcVal.bit_length() + 2) // 8 ) + 1 # We add 2 bits. See DawgNode.convToBytes() self.nBytesOffset = 0 self._calcNumBytesNodeAddress() self._calcNodesAddress() print(" Arc values (chars, affixes and tags): {} -> {} bytes".format( self.nArcVal, len("\t".join(self.lArcVal).encode("utf-8")) )) print(" Arc size: {} bytes, Address size: {} bytes -> {} * {} = {} bytes".format( self.nBytesArc, self.nBytesNodeAddress, \ self.nBytesArc+self.nBytesNodeAddress, self.nArc, \ (self.nBytesArc+self.nBytesNodeAddress)*self.nArc )) def _calcNumBytesNodeAddress (self): "how many bytes needed to store all nodes/arcs in the binary dictionary" self.nBytesNodeAddress = 1 while ((self.nBytesArc + self.nBytesNodeAddress) * self.nArc) > (2 ** (self.nBytesNodeAddress * 8)): self.nBytesNodeAddress += 1 def _calcNodesAddress (self): nBytesNode = self.nBytesArc + self.nBytesNodeAddress iAddr = len(self.oRoot.arcs) * nBytesNode for oNode in self.lMinimizedNodes: oNode.addr = iAddr iAddr += max(len(oNode.arcs), 1) * nBytesNode def getBinaryAsJSON (self, nCompressionMethod=1, bBinaryDictAsHexString=True): "return a JSON string containing all necessary data of the dictionary (compressed as a binary string)" self._calculateBinary(nCompressionMethod) byDic = b"" byDic = self.oRoot.convToBytes(self.nBytesArc, self.nBytesNodeAddress) for oNode in self.lMinimizedNodes: byDic += oNode.convToBytes(self.nBytesArc, self.nBytesNodeAddress) return { "sHeader": "/grammalecte-fsa/", "sLangCode": self.sLangCode, "sLangName": self.sLangName, "sDicName": self.sDicName, "sDescription": self.sDescription, "sFileName": self.sFileName, |
︙ | ︙ | |||
474 475 476 477 478 479 480 | # Mozilla’s JS parser don’t like file bigger than 4 Mb! # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension. # https://github.com/mozilla/addons-linter/issues/1361 "sByDic": byDic.hex() if bBinaryDictAsHexString else [ e for e in byDic ], "l2grams": list(self.a2grams) } | | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | < | | | | < < < < < < < < < | 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 | # Mozilla’s JS parser don’t like file bigger than 4 Mb! # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension. # https://github.com/mozilla/addons-linter/issues/1361 "sByDic": byDic.hex() if bBinaryDictAsHexString else [ e for e in byDic ], "l2grams": list(self.a2grams) } def writeAsJSObject (self, spfDst, nCompressionMethod=1, bInJSModule=False, bBinaryDictAsHexString=True): "write a file (JSON or JS module) with all the necessary data" if not spfDst.endswith(".json"): spfDst += "."+str(nCompressionMethod)+".json" with open(spfDst, "w", encoding="utf-8", newline="\n") as hDst: if bInJSModule: hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ') hDst.write( json.dumps(self.getBinaryAsJSON(nCompressionMethod, bBinaryDictAsHexString), ensure_ascii=False) ) if bInJSModule: hDst.write(";\n\nexports.dictionary = dictionary;\n") def _getDate (self): return time.strftime("%Y-%m-%d %H:%M:%S") def _writeNodes (self, sPathFile, nCompressionMethod=1): "for debugging only" print(" > Write nodes") with open(sPathFile+".nodes."+str(nCompressionMethod)+".txt", 'w', encoding='utf-8', newline="\n") as hDst: hDst.write(self.oRoot.getTxtRepr(self.nBytesArc, self.lArcVal)+"\n") #hDst.write( ''.join( [ "%02X " % z for z in self.oRoot.convToBytes(self.nBytesArc, self.nBytesNodeAddress) ] ).strip() ) for oNode in self.lMinimizedNodes: hDst.write(oNode.getTxtRepr(self.nBytesArc, self.lArcVal)+"\n") class DawgNode: """Node of the word graph""" NextId = 0 |
︙ | ︙ | |||
644 645 646 647 648 649 650 | self.arcs = collections.OrderedDict(sorted(self.arcs.items(), key=lambda t: dValOccur.get(t[0], 0), reverse=True)) def sortArcs2 (self, dValOccur, lArcVal): "sort arcs of each node depending on the previous char" self.arcs = collections.OrderedDict(sorted(self.arcs.items(), key=lambda t: dValOccur.get(lArcVal[t[0]], 0), reverse=True)) # VERSION 1 ===================================================================================================== | | | 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 | self.arcs = collections.OrderedDict(sorted(self.arcs.items(), key=lambda t: dValOccur.get(t[0], 0), reverse=True)) def sortArcs2 (self, dValOccur, lArcVal): "sort arcs of each node depending on the previous char" self.arcs = collections.OrderedDict(sorted(self.arcs.items(), key=lambda t: dValOccur.get(lArcVal[t[0]], 0), reverse=True)) # VERSION 1 ===================================================================================================== def convToBytes (self, nBytesArc, nBytesNodeAddress): """ Convert to bytes (method 1). Node scheme: - Arc length is defined by nBytesArc - Address length is defined by nBytesNodeAddress |
︙ | ︙ | |||
686 687 688 689 690 691 692 | val = val | nFinalNodeMask if i == nArc: val = val | nFinalArcMask by += val.to_bytes(nBytesArc, byteorder='big') by += self.arcs[arc].addr.to_bytes(nBytesNodeAddress, byteorder='big') return by | | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 | val = val | nFinalNodeMask if i == nArc: val = val | nFinalArcMask by += val.to_bytes(nBytesArc, byteorder='big') by += self.arcs[arc].addr.to_bytes(nBytesNodeAddress, byteorder='big') return by def getTxtRepr (self, nBytesArc, lVal): "return representation as string of node (method 1)" nArc = len(self.arcs) nFinalNodeMask = 1 << ((nBytesArc*8)-1) nFinalArcMask = 1 << ((nBytesArc*8)-2) s = "i{:_>10} -- #{:_>10}\n".format(self.i, self.addr) if not nArc: s += " {:<20} {:0>16} i{:_>10} #{:_>10}\n".format("", bin(nFinalNodeMask | nFinalArcMask)[2:], "0", "0") return s for i, arc in enumerate(self.arcs, 1): val = arc if i == 1 and self.final: val = val | nFinalNodeMask if i == nArc: val = val | nFinalArcMask s += " {:<20} {:0>16} i{:_>10} #{:_>10}\n".format(lVal[arc], bin(val)[2:], self.arcs[arc].i, self.arcs[arc].addr) return s # Another attempt to sort node arcs _dCharOrder = { # key: previous char, value: dictionary of chars {c: nValue} "": {} |
︙ | ︙ |
Modified graphspell/ibdawg.py from [2cf8f6a51c] to [0da0287637].
︙ | ︙ | |||
110 111 112 113 114 115 116 | class IBDAWG: """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH""" def __init__ (self, source): if isinstance(source, str): | | | | < < < | | < | | > | > > > | 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | class IBDAWG: """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH""" def __init__ (self, source): if isinstance(source, str): by = pkgutil.get_data(__package__, "_dictionaries/" + source) if not by: raise OSError("# Error. File not found or not loadable: "+source) self.sFileName = source oData = json.loads(by.decode("utf-8")) #json.loads(by) # In Python 3.6, can read directly binary strings else: self.sFileName = "[None]" oData = source self.sByDic = "" # init to prevent pylint whining self.__dict__.update(oData) self.byDic = binascii.unhexlify(self.sByDic) self.dCharVal = { v: k for k, v in self.dChar.items() } self.a2grams = set(getattr(self, 'l2grams')) if hasattr(self, 'l2grams') else None # Performance trick: # Instead of converting bytes to integers each times we parse the binary dictionary, # we do it once, then parse the array nAcc = 0 byBuffer = b"" self.lByDic = [] |
︙ | ︙ | |||
166 167 168 169 170 171 172 | # lexicographer module ? self.lexicographer = None try: self.lexicographer = importlib.import_module(".lexgraph_"+self.sLangCode, "grammalecte.graphspell") except ImportError: print("# No module <graphspell.lexgraph_"+self.sLangCode+".py>") | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 166 167 168 169 170 171 172 173 174 175 176 177 178 179 | # lexicographer module ? self.lexicographer = None try: self.lexicographer = importlib.import_module(".lexgraph_"+self.sLangCode, "grammalecte.graphspell") except ImportError: print("# No module <graphspell.lexgraph_"+self.sLangCode+".py>") def getInfo (self): "return string about the IBDAWG" return " Language: {0.sLangName} Lang code: {0.sLangCode} Dictionary name: {0.sDicName}" \ " Compression method: {0.nCompressionMethod:>2} Date: {0.sDate} Stemming: {0.cStemming}FX\n" \ " Arcs values: {0.nArcVal:>10,} = {0.nChar:>5,} characters, {0.nAff:>6,} affixes, {0.nTag:>6,} tags\n" \ " Dictionary: {0.nEntry:>12,} entries, {0.nNode:>11,} nodes, {0.nArc:>11,} arcs\n" \ " Address size: {0.nBytesNodeAddress:>1} bytes, Arc size: {0.nBytesArc:>1} bytes\n".format(self) |
︙ | ︙ |
Modified graphspell/spellchecker.py from [2bdbe76996] to [9b47d651ea].
︙ | ︙ | |||
12 13 14 15 16 17 18 | import traceback from . import ibdawg from . import tokenizer dDefaultDictionaries = { | | | | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | import traceback from . import ibdawg from . import tokenizer dDefaultDictionaries = { "fr": "fr-allvars.json", "en": "en.json" } class SpellChecker (): "SpellChecker: wrapper for the IBDAWG class" def __init__ (self, sLangCode, sfMainDic="", sfCommunityDic="", sfPersonalDic=""): |
︙ | ︙ |
Modified lex_build.py from [0d00b07703] to [1b2b5d0ea9].
1 2 3 4 5 6 7 8 9 10 11 12 13 | #!python3 """ Lexicon builder """ import argparse from distutils import dir_util import graphspell.dawg as fsa from graphspell.ibdawg import IBDAWG | | | | > | > | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | #!python3 """ Lexicon builder """ import argparse from distutils import dir_util import graphspell.dawg as fsa from graphspell.ibdawg import IBDAWG def build (spfSrc, sLangCode, sLangName, sfDict, bJavaScript=False, sDicName="", sDescription="", sFilter="", cStemmingMethod="S", nCompressMethod=1): "transform a text lexicon as a binary indexable dictionary" oDAWG = fsa.DAWG(spfSrc, cStemmingMethod, sLangCode, sLangName, sDicName, sDescription, sFilter) dir_util.mkpath("graphspell/_dictionaries") #oDAWG.writeInfo("graphspell/_dictionaries/" + sfDict + ".info.txt") #oDAWG.writeBinary("graphspell/_dictionaries/" + sfDict + ".bdic", int(nCompressMethod)) oDAWG.writeAsJSObject("graphspell/_dictionaries/" + sfDict + ".json") if bJavaScript: dir_util.mkpath("graphspell-js/_dictionaries") oDAWG.writeAsJSObject("graphspell-js/_dictionaries/" + sfDict + ".json") #oDic = IBDAWG(sfDict + ".bdic") #oDic.writeAsJSObject("graphspell-js/_dictionaries/" + sfDict + ".json", bBinaryDictAsHexString=True) def main (): "parse args from CLI" xParser = argparse.ArgumentParser() xParser.add_argument("src_lexicon", type=str, help="path and file name of the source lexicon") xParser.add_argument("lang_code", type=str, help="language code") |
︙ | ︙ |
Modified make.py from [f59af684eb] to [a76be310e9].
︙ | ︙ | |||
313 314 315 316 317 318 319 | dVars["dic_personal_filename_js"] = "" lDict = [ ("main", s) for s in dVars['dic_filenames'].split(",") ] if bCommunityDict: lDict.append(("community", dVars['dic_community_filename'])) if bPersonalDict: lDict.append(("personal", dVars['dic_personal_filename'])) for sType, sFileName in lDict: | | | | | 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 | dVars["dic_personal_filename_js"] = "" lDict = [ ("main", s) for s in dVars['dic_filenames'].split(",") ] if bCommunityDict: lDict.append(("community", dVars['dic_community_filename'])) if bPersonalDict: lDict.append(("personal", dVars['dic_personal_filename'])) for sType, sFileName in lDict: spfPyDic = f"graphspell/_dictionaries/{sFileName}.json" spfJSDic = f"graphspell-js/_dictionaries/{sFileName}.json" if not os.path.isfile(spfPyDic) or (bJavaScript and not os.path.isfile(spfJSDic)): buildDictionary(dVars, sType, bJavaScript) print(" +", spfPyDic) file_util.copy_file(spfPyDic, "grammalecte/graphspell/_dictionaries") dVars['dic_'+sType+'_filename_py'] = sFileName + '.json' if bJavaScript: print(" +", spfJSDic) file_util.copy_file(spfJSDic, "grammalecte-js/graphspell/_dictionaries") dVars['dic_'+sType+'_filename_js'] = sFileName + '.json' dVars['dic_main_filename_py'] = dVars['dic_default_filename_py'] + ".json" dVars['dic_main_filename_js'] = dVars['dic_default_filename_js'] + ".json" def buildDictionary (dVars, sType, bJavaScript=False): "build binary dictionary for Graphspell from lexicons" if sType == "main": spfLexSrc = dVars['lexicon_src'] |
︙ | ︙ |
Modified reader.py from [66f5eb17ae] to [e2706fc6a2].
1 2 3 4 5 6 7 8 9 | #!python3 # Just a file for one-shot scripts import os import sys import re import graphspell.ibdawg as ibdawg | | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | #!python3 # Just a file for one-shot scripts import os import sys import re import graphspell.ibdawg as ibdawg oDict = ibdawg.IBDAWG("fr-allvars.json") def readFile (spf): if os.path.isfile(spf): with open(spf, "r", encoding="utf-8") as hSrc: for sLine in hSrc: yield sLine else: print("# Error: file not found.") # -------------------------------------------------------------------------------------------------- def listUnknownWords (spf): with open(spf+".res.txt", "w", encoding="utf-8") as hDst: for sLine in readFile(spfSrc): sLine = sLine.strip() if sLine: for sWord in sLine.split(): if not oDict.isValid(sWord): hDst.write(sWord+"\n") # -------------------------------------------------------------------------------------------------- def createLexStatFile (spf, dStat): dWord = {} for i, sLine in enumerate(readFile(spf)): |
︙ | ︙ |