Overview
Comment: | [graphspell][bug] ibdawg: fix confusion between <char value> and <tag value> |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | graphspell |
Files: | files | file ages | folders |
SHA3-256: |
ad412c44e3401d8a8913292ed07970f0 |
User & Date: | olr on 2018-02-11 19:19:14 |
Other Links: | manifest | tags |
Context
2018-02-12
| ||
09:03 | [graphspell][py] new binary file structure check-in: 556c26d78a user: olr tags: trunk, graphspell | |
2018-02-11
| ||
19:19 | [graphspell][bug] ibdawg: fix confusion between <char value> and <tag value> check-in: ad412c44e3 user: olr tags: trunk, graphspell | |
18:26 | [graphspell][py] fix import check-in: 84c2ade56e user: olr tags: trunk, graphspell | |
Changes
Modified graphspell-js/ibdawg.js from [4ebdc2968e] to [61152510c9].
︙ | ︙ | |||
348 349 350 351 352 353 354 | } } } * _getCharArcs (iAddr) { // generator: yield all chars and addresses from node at address <iAddr> for (let [nVal, jAddr] of this._getArcs(iAddr)) { | | | 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 | } } } * _getCharArcs (iAddr) { // generator: yield all chars and addresses from node at address <iAddr> for (let [nVal, jAddr] of this._getArcs(iAddr)) { if (nVal <= this.nChar) { yield [this.dCharVal.get(nVal), jAddr]; } } } * _getSimilarCharArcs (cChar, iAddr) { // generator: yield similar char of <cChar> and address of the following node |
︙ | ︙ | |||
370 371 372 373 374 375 376 | } } _getTails (iAddr, sTail="", n=2) { // return a list of suffixes ending at a distance of <n> from <iAddr> let aTails = new Set(); for (let [nVal, jAddr] of this._getArcs(iAddr)) { | | | 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 | } } _getTails (iAddr, sTail="", n=2) { // return a list of suffixes ending at a distance of <n> from <iAddr> let aTails = new Set(); for (let [nVal, jAddr] of this._getArcs(iAddr)) { if (nVal <= this.nChar) { if (this._convBytesToInteger(this.byDic.slice(jAddr, jAddr+this.nBytesArc)) & this._finalNodeMask) { aTails.add(sTail + this.dCharVal.get(nVal)); } if (n && aTails.size == 0) { aTails.gl_update(this._getTails(jAddr, sTail+this.dCharVal.get(nVal), n-1)); } } |
︙ | ︙ | |||
439 440 441 442 443 444 445 | if (this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask) { let l = []; let nRawArc = 0; while (!(nRawArc & this._lastArcMask)) { let iEndArcAddr = iAddr + this.nBytesArc; nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr)); let nArc = nRawArc & this._arcMask; | | | 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 | if (this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask) { let l = []; let nRawArc = 0; while (!(nRawArc & this._lastArcMask)) { let iEndArcAddr = iAddr + this.nBytesArc; nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr)); let nArc = nRawArc & this._arcMask; if (nArc > this.nChar) { // This value is not a char, this is a stemming code let sStem = ">" + this.funcStemming(sWord, this.lArcVal[nArc]); // Now , we go to the next node and retrieve all following arcs values, all of them are tags let iAddr2 = this._convBytesToInteger(this.byDic.slice(iEndArcAddr, iEndArcAddr+this.nBytesNodeAddress)); let nRawArc2 = 0; while (!(nRawArc2 & this._lastArcMask)) { let iEndArcAddr2 = iAddr2 + this.nBytesArc; |
︙ | ︙ | |||
478 479 480 481 482 483 484 | if (this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask) { let l = []; let nRawArc = 0; while (!(nRawArc & this._lastArcMask)) { let iEndArcAddr = iAddr + this.nBytesArc; nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr)); let nArc = nRawArc & this._arcMask; | | | 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 | if (this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask) { let l = []; let nRawArc = 0; while (!(nRawArc & this._lastArcMask)) { let iEndArcAddr = iAddr + this.nBytesArc; nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr)); let nArc = nRawArc & this._arcMask; if (nArc > this.nChar) { // This value is not a char, this is a stemming code l.push(this.funcStemming(sWord, this.lArcVal[nArc])); } iAddr = iEndArcAddr + this.nBytesNodeAddress; } return l; } |
︙ | ︙ |
Modified graphspell/ibdawg.py from [56b76111df] to [69a2b665f2].
︙ | ︙ | |||
349 350 351 352 353 354 355 | for cChar, jAddr in self._getCharArcsWithPriority(iAddr, oSuggResult.sWord[nDeep:nDeep+1]): self._suggest2(oSuggResult, nDeep+1, jAddr, sNewWord+cChar) return def _getCharArcs (self, iAddr): "generator: yield all chars and addresses from node at address <iAddr>" for nVal, jAddr in self._getArcs(iAddr): | | | 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 | for cChar, jAddr in self._getCharArcsWithPriority(iAddr, oSuggResult.sWord[nDeep:nDeep+1]): self._suggest2(oSuggResult, nDeep+1, jAddr, sNewWord+cChar) return def _getCharArcs (self, iAddr): "generator: yield all chars and addresses from node at address <iAddr>" for nVal, jAddr in self._getArcs(iAddr): if nVal <= self.nChar: yield (self.dCharVal[nVal], jAddr) def _getSimilarCharArcs (self, cChar, iAddr): "generator: yield similar char of <cChar> and address of the following node" for c in cp.d1to1.get(cChar, [cChar]): if c in self.dChar: jAddr = self._lookupArcNode(self.dChar[c], iAddr) |
︙ | ︙ | |||
371 372 373 374 375 376 377 | lTuple.sort(key=lambda t: 0 if t[0] in cp.d1to1.get(cChar, cChar) else 1) yield from lTuple def _getTails (self, iAddr, sTail="", n=2): "return a list of suffixes ending at a distance of <n> from <iAddr>" aTails = set() for nVal, jAddr in self._getArcs(iAddr): | | | 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 | lTuple.sort(key=lambda t: 0 if t[0] in cp.d1to1.get(cChar, cChar) else 1) yield from lTuple def _getTails (self, iAddr, sTail="", n=2): "return a list of suffixes ending at a distance of <n> from <iAddr>" aTails = set() for nVal, jAddr in self._getArcs(iAddr): if nVal <= self.nChar: if int.from_bytes(self.byDic[jAddr:jAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask: aTails.add(sTail + self.dCharVal[nVal]) if n and not aTails: aTails.update(self._getTails(jAddr, sTail+self.dCharVal[nVal], n-1)) return aTails def drawPath (self, sWord, iAddr=0): |
︙ | ︙ | |||
439 440 441 442 443 444 445 | if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask): l = [] nRawArc = 0 while not (nRawArc & self._lastArcMask): iEndArcAddr = iAddr + self.nBytesArc nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big') nArc = nRawArc & self._arcMask | | | 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 | if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask): l = [] nRawArc = 0 while not (nRawArc & self._lastArcMask): iEndArcAddr = iAddr + self.nBytesArc nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big') nArc = nRawArc & self._arcMask if nArc > self.nChar: # This value is not a char, this is a stemming code sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc]) # Now , we go to the next node and retrieve all following arcs values, all of them are tags iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big') nRawArc2 = 0 while not (nRawArc2 & self._lastArcMask): iEndArcAddr2 = iAddr2 + self.nBytesArc |
︙ | ︙ | |||
470 471 472 473 474 475 476 | if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask): l = [] nRawArc = 0 while not (nRawArc & self._lastArcMask): iEndArcAddr = iAddr + self.nBytesArc nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big') nArc = nRawArc & self._arcMask | | | 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 | if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask): l = [] nRawArc = 0 while not (nRawArc & self._lastArcMask): iEndArcAddr = iAddr + self.nBytesArc nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big') nArc = nRawArc & self._arcMask if nArc > self.nChar: # This value is not a char, this is a stemming code l.append(self.funcStemming(sWord, self.lArcVal[nArc])) iAddr = iEndArcAddr+self.nBytesNodeAddress return l return [] def _lookupArcNode1 (self, nVal, iAddr): |
︙ | ︙ | |||
537 538 539 540 541 542 543 | if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask): l = [] nRawArc = 0 while not (nRawArc & self._lastArcMask): iEndArcAddr = iAddr + self.nBytesArc nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big') nArc = nRawArc & self._arcMask | | | 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 | if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask): l = [] nRawArc = 0 while not (nRawArc & self._lastArcMask): iEndArcAddr = iAddr + self.nBytesArc nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big') nArc = nRawArc & self._arcMask if nArc > self.nChar: # This value is not a char, this is a stemming code sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc]) # Now , we go to the next node and retrieve all following arcs values, all of them are tags if not (nRawArc & self._addrBitMask): iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big') else: # we go to the end of the node |
︙ | ︙ | |||
575 576 577 578 579 580 581 | if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask): l = [] nRawArc = 0 while not (nRawArc & self._lastArcMask): iEndArcAddr = iAddr + self.nBytesArc nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big') nArc = nRawArc & self._arcMask | | | 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 | if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask): l = [] nRawArc = 0 while not (nRawArc & self._lastArcMask): iEndArcAddr = iAddr + self.nBytesArc nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big') nArc = nRawArc & self._arcMask if nArc > self.nChar: # This value is not a char, this is a stemming code l.append(self.funcStemming(sWord, self.lArcVal[nArc])) # Now , we go to the next node if not (nRawArc & self._addrBitMask): iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big') else: # we go to the end of the node |
︙ | ︙ | |||
653 654 655 656 657 658 659 | l = [] nRawArc = 0 iAddrNode = iAddr while not (nRawArc & self._lastArcMask): iEndArcAddr = iAddr + self.nBytesArc nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big') nArc = nRawArc & self._arcMask | | | 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 | l = [] nRawArc = 0 iAddrNode = iAddr while not (nRawArc & self._lastArcMask): iEndArcAddr = iAddr + self.nBytesArc nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big') nArc = nRawArc & self._arcMask if nArc > self.nChar: # This value is not a char, this is a stemming code sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc]) # Now , we go to the next node and retrieve all following arcs values, all of them are tags if not (nRawArc & self._addrBitMask): iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big') else: iAddr2 = iAddrNode + int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesOffset], byteorder='big') |
︙ | ︙ | |||
688 689 690 691 692 693 694 | l = [] nRawArc = 0 iAddrNode = iAddr while not (nRawArc & self._lastArcMask): iEndArcAddr = iAddr + self.nBytesArc nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big') nArc = nRawArc & self._arcMask | | | 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 | l = [] nRawArc = 0 iAddrNode = iAddr while not (nRawArc & self._lastArcMask): iEndArcAddr = iAddr + self.nBytesArc nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big') nArc = nRawArc & self._arcMask if nArc > self.nChar: # This value is not a char, this is a stemming code l.append(self.funcStemming(sWord, self.lArcVal[nArc])) iAddr = iEndArcAddr+self.nBytesNodeAddress if not (nRawArc & self._addrBitMask) else iEndArcAddr+self.nBytesOffset return l return [] def _lookupArcNode3 (self, nVal, iAddr): |
︙ | ︙ |