Overview
Comment: | [graphspell][bug] dawg: conversion to binary string |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | graphspell |
Files: | files | file ages | folders |
SHA3-256: |
a25340628a90b0f26d0690ba8834babb |
User & Date: | olr on 2018-02-07 13:57:15 |
Other Links: | manifest | tags |
Context
2018-02-07
| ||
13:58 | [fx] update: lexicon editor check-in: 84a72ba4e1 user: olr tags: trunk, fx | |
13:57 | [graphspell][bug] dawg: conversion to binary string check-in: a25340628a user: olr tags: trunk, graphspell | |
2018-02-06
| ||
16:08 | [fx] update: lexicon editor check-in: 898b80959d user: olr tags: trunk, fx | |
Changes
Modified graphspell-js/dawg.js from [af3f047637] to [01de2ded5b].
︙ | ︙ | |||
143 144 145 146 147 148 149 | } } this.finish(); this.countNodes(); this.countArcs(); this.sortNodeArcs(dValOccur); this.displayInfo(); | | | 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | } } this.finish(); this.countNodes(); this.countArcs(); this.sortNodeArcs(dValOccur); this.displayInfo(); //this.writeInfo(); //this.oRoot.display(0, this.lArcVal, true); } // BUILD DAWG insert (aEntry) { if (aEntry < this.aPreviousEntry) { throw "Error: Words must be inserted in alphabetical order."; |
︙ | ︙ | |||
292 293 294 295 296 297 298 | for (let s of this.lArcVal) { console.log(i + ": " + s); i++; } } // BINARY CONVERSION | | | | 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 | for (let s of this.lArcVal) { console.log(i + ": " + s); i++; } } // BINARY CONVERSION createBinary (nMethod) { console.log("Write DAWG as an indexable binary dictionary [method: "+nMethod+"]"); if (nMethod == 1) { this.nBytesArc = Math.floor( (this.nArcVal.toString(2).length + 2) / 8 ) + 1; // We add 2 bits. See DawgNode.convToBytes1() this._calcNumBytesNodeAddress() this._calcNodesAddress1() } else { console.log("Error: unknown compression method"); } console.log("Arc values (chars, affixes and tags): " + this.nArcVal); console.log("Arc size: "+this.nBytesArc+" bytes, Address size: "+this.nBytesNodeAddress+" bytes"); |
︙ | ︙ | |||
355 356 357 358 359 360 361 | - Section Values: * a list of strings encoded in binary from utf-8, each value separated with a tabulation - Section Word Graph (nodes / arcs) * A list of nodes which are a list of arcs with an address of the next node. See DawgNode.convToBytes() for details. */ | < < < < < < < | > | 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 | - Section Values: * a list of strings encoded in binary from utf-8, each value separated with a tabulation - Section Word Graph (nodes / arcs) * A list of nodes which are a list of arcs with an address of the next node. See DawgNode.convToBytes() for details. */ let sByDic = ""; if (nMethod == 1) { sByDic = this.oRoot.convToBytes1(this.nBytesArc, this.nBytesNodeAddress); for (let oNode of this.dMinimizedNodes.values()) { sByDic += oNode.convToBytes1(this.nBytesArc, this.nBytesNodeAddress); } } let oJSON = { "sName": this.sName, "nVersion": this.nMethod, "sHeader": this.sHeader, "lArcVal": this.lArcVal, "nArcVal": this.nArcVal, "byDic": sByDic, "sLang": this.sLang, "nChar": this.nChar, "nBytesArc": this.nBytesArc, "nBytesNodeAddress": this.nBytesNodeAddress, "nEntries": this.nEntry, "nNode": this.nNode, "nArc": this.nArc, "nAff": this.nAff, "cStemming": this.cStemming, "nTag": this.nTag, "dChar": this.dChar, "_arcMask": this._arcMask, "_finalNodeMask": this._finalNodeMask, "_lastArcMask": this._lastArcMask, "_addrBitMask": this._addrBitMask, "nBytesOffset": this.nBytesOffset }; return oJSON; } } const oNodeCounter = { nNextId: 0, |
︙ | ︙ | |||
493 494 495 496 497 498 499 | | \___ if 1, last arc of this node \_____ if 1, this node is final (only on the first arc) */ let nArc = this.arcs.size; let nFinalNodeMask = 1 << ((nBytesArc*8)-1); let nFinalArcMask = 1 << ((nBytesArc*8)-2); if (this.arcs.size == 0) { | | | | | | | | | | | | | < < < | > | > | 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 | | \___ if 1, last arc of this node \_____ if 1, this node is final (only on the first arc) */ let nArc = this.arcs.size; let nFinalNodeMask = 1 << ((nBytesArc*8)-1); let nFinalArcMask = 1 << ((nBytesArc*8)-2); if (this.arcs.size == 0) { let nVal = nFinalNodeMask | nFinalArcMask; let sBinary = this.convValueToHexString(nVal, nBytesArc); sBinary += this.convValueToHexString(0, nBytesNodeAddress); return sBinary; } let sBinary = ""; let i = 1; for (let arc of this.arcs.keys()) { let nVal = arc; if (i == 1 && this.final) { nVal = nVal | nFinalNodeMask; } if (i == nArc) { nVal = nVal | nFinalArcMask; } i++; sBinary += this.convValueToHexString(nVal, nBytesArc); sBinary += this.convValueToHexString(this.arcs.get(arc).addr, nBytesNodeAddress); } return sBinary; } convValueToHexString (nVal, nByte) { // nVal: value to convert, nByte: number of bytes let sHexVal = nVal.toString(16); // conversion to hexadecimal string //console.log(`value: ${nVal} in ${nByte} bytes`); if (sHexVal.length < (nByte*2)) { return "0".repeat((nByte*2) - sHexVal.length) + sHexVal; } else if (sHexVal.length == (nByte*2)) { return sHexVal } else { throw "Conversion to byte string: value bigger than allowed."; } } } // Another attempt to sort node arcs const _dCharOrder = new Map([ ["", new Map()] ]); // key: previous char, value: dictionary of chars {c: nValue} |
︙ | ︙ |