Index: graphspell-js/ibdawg.js ================================================================== --- graphspell-js/ibdawg.js +++ graphspell-js/ibdawg.js @@ -350,11 +350,11 @@ } * _getCharArcs (iAddr) { // generator: yield all chars and addresses from node at address for (let [nVal, jAddr] of this._getArcs(iAddr)) { - if (nVal < this.nChar) { + if (nVal <= this.nChar) { yield [this.dCharVal.get(nVal), jAddr]; } } } @@ -372,11 +372,11 @@ _getTails (iAddr, sTail="", n=2) { // return a list of suffixes ending at a distance of from let aTails = new Set(); for (let [nVal, jAddr] of this._getArcs(iAddr)) { - if (nVal < this.nChar) { + if (nVal <= this.nChar) { if (this._convBytesToInteger(this.byDic.slice(jAddr, jAddr+this.nBytesArc)) & this._finalNodeMask) { aTails.add(sTail + this.dCharVal.get(nVal)); } if (n && aTails.size == 0) { aTails.gl_update(this._getTails(jAddr, sTail+this.dCharVal.get(nVal), n-1)); @@ -441,11 +441,11 @@ let nRawArc = 0; while (!(nRawArc & this._lastArcMask)) { let iEndArcAddr = iAddr + this.nBytesArc; nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr)); let nArc = nRawArc & this._arcMask; - if (nArc >= this.nChar) { + if (nArc > this.nChar) { // This value is not a char, this is a stemming code let sStem = ">" + this.funcStemming(sWord, this.lArcVal[nArc]); // Now , we go to the next node and retrieve all following arcs values, all of them are tags let iAddr2 = this._convBytesToInteger(this.byDic.slice(iEndArcAddr, iEndArcAddr+this.nBytesNodeAddress)); let nRawArc2 = 0; @@ -480,11 +480,11 @@ let nRawArc = 0; while (!(nRawArc & this._lastArcMask)) { let iEndArcAddr = iAddr + this.nBytesArc; nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr)); let nArc = nRawArc & this._arcMask; - if (nArc >= this.nChar) { + if (nArc > this.nChar) { // This value is not a char, this is a stemming code l.push(this.funcStemming(sWord, this.lArcVal[nArc])); } iAddr = iEndArcAddr + this.nBytesNodeAddress; } Index: graphspell/ibdawg.py ================================================================== --- graphspell/ibdawg.py +++ graphspell/ibdawg.py @@ -351,11 +351,11 @@ return def _getCharArcs (self, iAddr): "generator: yield all chars and addresses from node at address " for nVal, jAddr in self._getArcs(iAddr): - if nVal < self.nChar: + if nVal <= self.nChar: yield (self.dCharVal[nVal], jAddr) def _getSimilarCharArcs (self, cChar, iAddr): "generator: yield similar char of and address of the following node" for c in cp.d1to1.get(cChar, [cChar]): @@ -373,11 +373,11 @@ def _getTails (self, iAddr, sTail="", n=2): "return a list of suffixes ending at a distance of from " aTails = set() for nVal, jAddr in self._getArcs(iAddr): - if nVal < self.nChar: + if nVal <= self.nChar: if int.from_bytes(self.byDic[jAddr:jAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask: aTails.add(sTail + self.dCharVal[nVal]) if n and not aTails: aTails.update(self._getTails(jAddr, sTail+self.dCharVal[nVal], n-1)) return aTails @@ -441,11 +441,11 @@ nRawArc = 0 while not (nRawArc & self._lastArcMask): iEndArcAddr = iAddr + self.nBytesArc nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big') nArc = nRawArc & self._arcMask - if nArc >= self.nChar: + if nArc > self.nChar: # This value is not a char, this is a stemming code sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc]) # Now , we go to the next node and retrieve all following arcs values, all of them are tags iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big') nRawArc2 = 0 @@ -472,11 +472,11 @@ nRawArc = 0 while not (nRawArc & self._lastArcMask): iEndArcAddr = iAddr + self.nBytesArc nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big') nArc = nRawArc & self._arcMask - if nArc >= self.nChar: + if nArc > self.nChar: # This value is not a char, this is a stemming code l.append(self.funcStemming(sWord, self.lArcVal[nArc])) iAddr = iEndArcAddr+self.nBytesNodeAddress return l return [] @@ -539,11 +539,11 @@ nRawArc = 0 while not (nRawArc & self._lastArcMask): iEndArcAddr = iAddr + self.nBytesArc nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big') nArc = nRawArc & self._arcMask - if nArc >= self.nChar: + if nArc > self.nChar: # This value is not a char, this is a stemming code sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc]) # Now , we go to the next node and retrieve all following arcs values, all of them are tags if not (nRawArc & self._addrBitMask): iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big') @@ -577,11 +577,11 @@ nRawArc = 0 while not (nRawArc & self._lastArcMask): iEndArcAddr = iAddr + self.nBytesArc nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big') nArc = nRawArc & self._arcMask - if nArc >= self.nChar: + if nArc > self.nChar: # This value is not a char, this is a stemming code l.append(self.funcStemming(sWord, self.lArcVal[nArc])) # Now , we go to the next node if not (nRawArc & self._addrBitMask): iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big') @@ -655,11 +655,11 @@ iAddrNode = iAddr while not (nRawArc & self._lastArcMask): iEndArcAddr = iAddr + self.nBytesArc nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big') nArc = nRawArc & self._arcMask - if nArc >= self.nChar: + if nArc > self.nChar: # This value is not a char, this is a stemming code sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc]) # Now , we go to the next node and retrieve all following arcs values, all of them are tags if not (nRawArc & self._addrBitMask): iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big') @@ -690,11 +690,11 @@ iAddrNode = iAddr while not (nRawArc & self._lastArcMask): iEndArcAddr = iAddr + self.nBytesArc nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big') nArc = nRawArc & self._arcMask - if nArc >= self.nChar: + if nArc > self.nChar: # This value is not a char, this is a stemming code l.append(self.funcStemming(sWord, self.lArcVal[nArc])) iAddr = iEndArcAddr+self.nBytesNodeAddress if not (nRawArc & self._addrBitMask) else iEndArcAddr+self.nBytesOffset return l return []