Overview
| Comment: | [graphspell][bug] ibdawg: fix confusion between <char value> and <tag value> |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | graphspell |
| Files: | files | file ages | folders |
| SHA3-256: |
ad412c44e3401d8a8913292ed07970f0 |
| User & Date: | olr on 2018-02-11 19:19:14 |
| Other Links: | manifest | tags |
Context
|
2018-02-12
| ||
| 09:03 | [graphspell][py] new binary file structure check-in: 556c26d78a user: olr tags: trunk, graphspell | |
|
2018-02-11
| ||
| 19:19 | [graphspell][bug] ibdawg: fix confusion between <char value> and <tag value> check-in: ad412c44e3 user: olr tags: trunk, graphspell | |
| 18:26 | [graphspell][py] fix import check-in: 84c2ade56e user: olr tags: trunk, graphspell | |
Changes
Modified graphspell-js/ibdawg.js from [4ebdc2968e] to [61152510c9].
| ︙ | ︙ | |||
348 349 350 351 352 353 354 |
}
}
}
* _getCharArcs (iAddr) {
// generator: yield all chars and addresses from node at address <iAddr>
for (let [nVal, jAddr] of this._getArcs(iAddr)) {
| | | 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 |
}
}
}
* _getCharArcs (iAddr) {
// generator: yield all chars and addresses from node at address <iAddr>
for (let [nVal, jAddr] of this._getArcs(iAddr)) {
if (nVal <= this.nChar) {
yield [this.dCharVal.get(nVal), jAddr];
}
}
}
* _getSimilarCharArcs (cChar, iAddr) {
// generator: yield similar char of <cChar> and address of the following node
|
| ︙ | ︙ | |||
370 371 372 373 374 375 376 |
}
}
_getTails (iAddr, sTail="", n=2) {
// return a list of suffixes ending at a distance of <n> from <iAddr>
let aTails = new Set();
for (let [nVal, jAddr] of this._getArcs(iAddr)) {
| | | 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 |
}
}
_getTails (iAddr, sTail="", n=2) {
// return a list of suffixes ending at a distance of <n> from <iAddr>
let aTails = new Set();
for (let [nVal, jAddr] of this._getArcs(iAddr)) {
if (nVal <= this.nChar) {
if (this._convBytesToInteger(this.byDic.slice(jAddr, jAddr+this.nBytesArc)) & this._finalNodeMask) {
aTails.add(sTail + this.dCharVal.get(nVal));
}
if (n && aTails.size == 0) {
aTails.gl_update(this._getTails(jAddr, sTail+this.dCharVal.get(nVal), n-1));
}
}
|
| ︙ | ︙ | |||
439 440 441 442 443 444 445 |
if (this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask) {
let l = [];
let nRawArc = 0;
while (!(nRawArc & this._lastArcMask)) {
let iEndArcAddr = iAddr + this.nBytesArc;
nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr));
let nArc = nRawArc & this._arcMask;
| | | 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 |
if (this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask) {
let l = [];
let nRawArc = 0;
while (!(nRawArc & this._lastArcMask)) {
let iEndArcAddr = iAddr + this.nBytesArc;
nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr));
let nArc = nRawArc & this._arcMask;
if (nArc > this.nChar) {
// This value is not a char, this is a stemming code
let sStem = ">" + this.funcStemming(sWord, this.lArcVal[nArc]);
// Now , we go to the next node and retrieve all following arcs values, all of them are tags
let iAddr2 = this._convBytesToInteger(this.byDic.slice(iEndArcAddr, iEndArcAddr+this.nBytesNodeAddress));
let nRawArc2 = 0;
while (!(nRawArc2 & this._lastArcMask)) {
let iEndArcAddr2 = iAddr2 + this.nBytesArc;
|
| ︙ | ︙ | |||
478 479 480 481 482 483 484 |
if (this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask) {
let l = [];
let nRawArc = 0;
while (!(nRawArc & this._lastArcMask)) {
let iEndArcAddr = iAddr + this.nBytesArc;
nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr));
let nArc = nRawArc & this._arcMask;
| | | 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 |
if (this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask) {
let l = [];
let nRawArc = 0;
while (!(nRawArc & this._lastArcMask)) {
let iEndArcAddr = iAddr + this.nBytesArc;
nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr));
let nArc = nRawArc & this._arcMask;
if (nArc > this.nChar) {
// This value is not a char, this is a stemming code
l.push(this.funcStemming(sWord, this.lArcVal[nArc]));
}
iAddr = iEndArcAddr + this.nBytesNodeAddress;
}
return l;
}
|
| ︙ | ︙ |
Modified graphspell/ibdawg.py from [56b76111df] to [69a2b665f2].
| ︙ | ︙ | |||
349 350 351 352 353 354 355 |
for cChar, jAddr in self._getCharArcsWithPriority(iAddr, oSuggResult.sWord[nDeep:nDeep+1]):
self._suggest2(oSuggResult, nDeep+1, jAddr, sNewWord+cChar)
return
def _getCharArcs (self, iAddr):
"generator: yield all chars and addresses from node at address <iAddr>"
for nVal, jAddr in self._getArcs(iAddr):
| | | 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 |
for cChar, jAddr in self._getCharArcsWithPriority(iAddr, oSuggResult.sWord[nDeep:nDeep+1]):
self._suggest2(oSuggResult, nDeep+1, jAddr, sNewWord+cChar)
return
def _getCharArcs (self, iAddr):
"generator: yield all chars and addresses from node at address <iAddr>"
for nVal, jAddr in self._getArcs(iAddr):
if nVal <= self.nChar:
yield (self.dCharVal[nVal], jAddr)
def _getSimilarCharArcs (self, cChar, iAddr):
"generator: yield similar char of <cChar> and address of the following node"
for c in cp.d1to1.get(cChar, [cChar]):
if c in self.dChar:
jAddr = self._lookupArcNode(self.dChar[c], iAddr)
|
| ︙ | ︙ | |||
371 372 373 374 375 376 377 |
lTuple.sort(key=lambda t: 0 if t[0] in cp.d1to1.get(cChar, cChar) else 1)
yield from lTuple
def _getTails (self, iAddr, sTail="", n=2):
"return a list of suffixes ending at a distance of <n> from <iAddr>"
aTails = set()
for nVal, jAddr in self._getArcs(iAddr):
| | | 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 |
lTuple.sort(key=lambda t: 0 if t[0] in cp.d1to1.get(cChar, cChar) else 1)
yield from lTuple
def _getTails (self, iAddr, sTail="", n=2):
"return a list of suffixes ending at a distance of <n> from <iAddr>"
aTails = set()
for nVal, jAddr in self._getArcs(iAddr):
if nVal <= self.nChar:
if int.from_bytes(self.byDic[jAddr:jAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
aTails.add(sTail + self.dCharVal[nVal])
if n and not aTails:
aTails.update(self._getTails(jAddr, sTail+self.dCharVal[nVal], n-1))
return aTails
def drawPath (self, sWord, iAddr=0):
|
| ︙ | ︙ | |||
439 440 441 442 443 444 445 |
if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask):
l = []
nRawArc = 0
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
| | | 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 |
if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask):
l = []
nRawArc = 0
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
if nArc > self.nChar:
# This value is not a char, this is a stemming code
sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc])
# Now , we go to the next node and retrieve all following arcs values, all of them are tags
iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
nRawArc2 = 0
while not (nRawArc2 & self._lastArcMask):
iEndArcAddr2 = iAddr2 + self.nBytesArc
|
| ︙ | ︙ | |||
470 471 472 473 474 475 476 |
if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask):
l = []
nRawArc = 0
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
| | | 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 |
if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask):
l = []
nRawArc = 0
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
if nArc > self.nChar:
# This value is not a char, this is a stemming code
l.append(self.funcStemming(sWord, self.lArcVal[nArc]))
iAddr = iEndArcAddr+self.nBytesNodeAddress
return l
return []
def _lookupArcNode1 (self, nVal, iAddr):
|
| ︙ | ︙ | |||
537 538 539 540 541 542 543 |
if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask):
l = []
nRawArc = 0
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
| | | 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 |
if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask):
l = []
nRawArc = 0
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
if nArc > self.nChar:
# This value is not a char, this is a stemming code
sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc])
# Now , we go to the next node and retrieve all following arcs values, all of them are tags
if not (nRawArc & self._addrBitMask):
iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
else:
# we go to the end of the node
|
| ︙ | ︙ | |||
575 576 577 578 579 580 581 |
if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask):
l = []
nRawArc = 0
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
| | | 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 |
if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask):
l = []
nRawArc = 0
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
if nArc > self.nChar:
# This value is not a char, this is a stemming code
l.append(self.funcStemming(sWord, self.lArcVal[nArc]))
# Now , we go to the next node
if not (nRawArc & self._addrBitMask):
iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
else:
# we go to the end of the node
|
| ︙ | ︙ | |||
653 654 655 656 657 658 659 |
l = []
nRawArc = 0
iAddrNode = iAddr
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
| | | 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 |
l = []
nRawArc = 0
iAddrNode = iAddr
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
if nArc > self.nChar:
# This value is not a char, this is a stemming code
sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc])
# Now , we go to the next node and retrieve all following arcs values, all of them are tags
if not (nRawArc & self._addrBitMask):
iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
else:
iAddr2 = iAddrNode + int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesOffset], byteorder='big')
|
| ︙ | ︙ | |||
688 689 690 691 692 693 694 |
l = []
nRawArc = 0
iAddrNode = iAddr
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
| | | 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 |
l = []
nRawArc = 0
iAddrNode = iAddr
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
if nArc > self.nChar:
# This value is not a char, this is a stemming code
l.append(self.funcStemming(sWord, self.lArcVal[nArc]))
iAddr = iEndArcAddr+self.nBytesNodeAddress if not (nRawArc & self._addrBitMask) else iEndArcAddr+self.nBytesOffset
return l
return []
def _lookupArcNode3 (self, nVal, iAddr):
|
| ︙ | ︙ |