485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
|
sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc])
# Now , we go to the next node and retrieve all following arcs values, all of them are tags
iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
nRawArc2 = 0
while not (nRawArc2 & self._lastArcMask):
iEndArcAddr2 = iAddr2 + self.nBytesArc
nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
l.append(sStem + " " + self.lArcVal[nRawArc2 & self._arcMask])
iAddr2 = iEndArcAddr2+self.nBytesNodeAddress
iAddr = iEndArcAddr+self.nBytesNodeAddress
return l
return []
def _stem1 (self, sWord):
"returns stems list of <sWord>"
|
|
|
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
|
sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc])
# Now , we go to the next node and retrieve all following arcs values, all of them are tags
iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
nRawArc2 = 0
while not (nRawArc2 & self._lastArcMask):
iEndArcAddr2 = iAddr2 + self.nBytesArc
nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
l.append(sStem + "/" + self.lArcVal[nRawArc2 & self._arcMask])
iAddr2 = iEndArcAddr2+self.nBytesNodeAddress
iAddr = iEndArcAddr+self.nBytesNodeAddress
return l
return []
def _stem1 (self, sWord):
"returns stems list of <sWord>"
|
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
|
while not (nRawArc & self._lastArcMask):
nRawArc = int.from_bytes(self.byDic[iAddr2:iAddr2+self.nBytesArc], byteorder='big')
iAddr2 += self.nBytesArc + self.nBytesNodeAddress
nRawArc2 = 0
while not (nRawArc2 & self._lastArcMask):
iEndArcAddr2 = iAddr2 + self.nBytesArc
nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
l.append(sStem + " " + self.lArcVal[nRawArc2 & self._arcMask])
iAddr2 = iEndArcAddr2+self.nBytesNodeAddress if not (nRawArc2 & self._addrBitMask) else iEndArcAddr2
iAddr = iEndArcAddr+self.nBytesNodeAddress if not (nRawArc & self._addrBitMask) else iEndArcAddr
return l
return []
def _stem2 (self, sWord):
"returns stems list of <sWord>"
|
|
|
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
|
while not (nRawArc & self._lastArcMask):
nRawArc = int.from_bytes(self.byDic[iAddr2:iAddr2+self.nBytesArc], byteorder='big')
iAddr2 += self.nBytesArc + self.nBytesNodeAddress
nRawArc2 = 0
while not (nRawArc2 & self._lastArcMask):
iEndArcAddr2 = iAddr2 + self.nBytesArc
nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
l.append(sStem + "/" + self.lArcVal[nRawArc2 & self._arcMask])
iAddr2 = iEndArcAddr2+self.nBytesNodeAddress if not (nRawArc2 & self._addrBitMask) else iEndArcAddr2
iAddr = iEndArcAddr+self.nBytesNodeAddress if not (nRawArc & self._addrBitMask) else iEndArcAddr
return l
return []
def _stem2 (self, sWord):
"returns stems list of <sWord>"
|
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
|
iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
else:
iAddr2 = iAddrNode + int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesOffset], byteorder='big')
nRawArc2 = 0
while not (nRawArc2 & self._lastArcMask):
iEndArcAddr2 = iAddr2 + self.nBytesArc
nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
l.append(sStem + " " + self.lArcVal[nRawArc2 & self._arcMask])
iAddr2 = iEndArcAddr2+self.nBytesNodeAddress if not (nRawArc2 & self._addrBitMask) else iEndArcAddr2+self.nBytesOffset
iAddr = iEndArcAddr+self.nBytesNodeAddress if not (nRawArc & self._addrBitMask) else iEndArcAddr+self.nBytesOffset
return l
return []
def _stem3 (self, sWord):
"returns stems list of <sWord>"
|
|
|
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
|
iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
else:
iAddr2 = iAddrNode + int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesOffset], byteorder='big')
nRawArc2 = 0
while not (nRawArc2 & self._lastArcMask):
iEndArcAddr2 = iAddr2 + self.nBytesArc
nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
l.append(sStem + "/" + self.lArcVal[nRawArc2 & self._arcMask])
iAddr2 = iEndArcAddr2+self.nBytesNodeAddress if not (nRawArc2 & self._addrBitMask) else iEndArcAddr2+self.nBytesOffset
iAddr = iEndArcAddr+self.nBytesNodeAddress if not (nRawArc & self._addrBitMask) else iEndArcAddr+self.nBytesOffset
return l
return []
def _stem3 (self, sWord):
"returns stems list of <sWord>"
|