Overview
| Comment: | [graphspell][py] trim useless spaces |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | graphspell |
| Files: | files | file ages | folders |
| SHA3-256: |
016c0dd46acf03c0f3f5c07f0538ee9d |
| User & Date: | olr on 2018-07-05 12:43:57 |
| Other Links: | manifest | tags |
Context
|
2018-07-05
| ||
| 12:44 | [graphspell][js] trim useless spaces check-in: 2f18864bbc user: olr tags: trunk, graphspell | |
| 12:43 | [graphspell][py] trim useless spaces check-in: 016c0dd46a user: olr tags: trunk, graphspell | |
|
2018-07-01
| ||
| 20:13 | [fx] update webext number (necessary for Mozilla website) check-in: f2ef884d79 user: olr tags: trunk, fx | |
Changes
Modified graphspell/ibdawg.py from [a255097656] to [87f324b718].
| ︙ | ︙ | |||
145 146 147 148 149 150 151 |
raise TypeError("# Error. Not a grammalecte-fsa binary dictionary. Header: {}".format(self.by[0:9]))
if not(self.by[17:18] == b"1" or self.by[17:18] == b"2" or self.by[17:18] == b"3"):
raise ValueError("# Error. Unknown dictionary version: {}".format(self.by[17:18]))
try:
header, info, values, bdic = self.by.split(b"\0\0\0\0", 3)
except Exception:
raise Exception
| | | 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
raise TypeError("# Error. Not a grammalecte-fsa binary dictionary. Header: {}".format(self.by[0:9]))
if not(self.by[17:18] == b"1" or self.by[17:18] == b"2" or self.by[17:18] == b"3"):
raise ValueError("# Error. Unknown dictionary version: {}".format(self.by[17:18]))
try:
header, info, values, bdic = self.by.split(b"\0\0\0\0", 3)
except Exception:
raise Exception
self.nCompressionMethod = int(self.by[17:18].decode("utf-8"))
self.sHeader = header.decode("utf-8")
self.lArcVal = values.decode("utf-8").split("\t")
self.nArcVal = len(self.lArcVal)
self.byDic = bdic
l = info.decode("utf-8").split("//")
|
| ︙ | ︙ | |||
477 478 479 480 481 482 483 |
l = []
nRawArc = 0
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
if nArc > self.nChar:
| | | 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 |
l = []
nRawArc = 0
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
if nArc > self.nChar:
# This value is not a char, this is a stemming code
sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc])
# Now , we go to the next node and retrieve all following arcs values, all of them are tags
iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
nRawArc2 = 0
while not (nRawArc2 & self._lastArcMask):
iEndArcAddr2 = iAddr2 + self.nBytesArc
nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
|
| ︙ | ︙ | |||
508 509 510 511 512 513 514 |
l = []
nRawArc = 0
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
if nArc > self.nChar:
| | | | 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 |
l = []
nRawArc = 0
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
if nArc > self.nChar:
# This value is not a char, this is a stemming code
l.append(self.funcStemming(sWord, self.lArcVal[nArc]))
iAddr = iEndArcAddr+self.nBytesNodeAddress
return l
return []
def _lookupArcNode1 (self, nVal, iAddr):
"looks if <nVal> is an arc at the node at <iAddr>, if yes, returns address of next node else None"
while True:
iEndArcAddr = iAddr+self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
if nVal == (nRawArc & self._arcMask):
# the value we are looking for
# we return the address of the next node
return int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
else:
# value not found
if (nRawArc & self._lastArcMask):
return None
iAddr = iEndArcAddr+self.nBytesNodeAddress
|
| ︙ | ︙ | |||
575 576 577 578 579 580 581 |
l = []
nRawArc = 0
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
if nArc > self.nChar:
| | | 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 |
l = []
nRawArc = 0
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
if nArc > self.nChar:
# This value is not a char, this is a stemming code
sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc])
# Now , we go to the next node and retrieve all following arcs values, all of them are tags
if not (nRawArc & self._addrBitMask):
iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
else:
# we go to the end of the node
iAddr2 = iEndArcAddr
|
| ︙ | ︙ | |||
613 614 615 616 617 618 619 |
l = []
nRawArc = 0
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
if nArc > self.nChar:
| | | | 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 |
l = []
nRawArc = 0
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
if nArc > self.nChar:
# This value is not a char, this is a stemming code
l.append(self.funcStemming(sWord, self.lArcVal[nArc]))
# Now , we go to the next node
if not (nRawArc & self._addrBitMask):
iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
else:
# we go to the end of the node
iAddr2 = iEndArcAddr
while not (nRawArc & self._lastArcMask):
nRawArc = int.from_bytes(self.byDic[iAddr2:iAddr2+self.nBytesArc], byteorder='big')
iAddr2 += self.nBytesArc + self.nBytesNodeAddress
iAddr = iEndArcAddr+self.nBytesNodeAddress if not (nRawArc & self._addrBitMask) else iEndArcAddr
return l
return []
def _lookupArcNode2 (self, nVal, iAddr):
"looks if <nVal> is an arc at the node at <iAddr>, if yes, returns address of next node else None"
while True:
iEndArcAddr = iAddr+self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
if nVal == (nRawArc & self._arcMask):
# the value we are looking for
if not (nRawArc & self._addrBitMask):
# we return the address of the next node
return int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
else:
# we go to the end of the node
iAddr = iEndArcAddr
while not (nRawArc & self._lastArcMask):
|
| ︙ | ︙ | |||
691 692 693 694 695 696 697 |
nRawArc = 0
iAddrNode = iAddr
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
if nArc > self.nChar:
| | | 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 |
nRawArc = 0
iAddrNode = iAddr
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
if nArc > self.nChar:
# This value is not a char, this is a stemming code
sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc])
# Now , we go to the next node and retrieve all following arcs values, all of them are tags
if not (nRawArc & self._addrBitMask):
iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
else:
iAddr2 = iAddrNode + int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesOffset], byteorder='big')
nRawArc2 = 0
|
| ︙ | ︙ | |||
726 727 728 729 730 731 732 |
nRawArc = 0
iAddrNode = iAddr
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
if nArc > self.nChar:
| | | | 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 |
nRawArc = 0
iAddrNode = iAddr
while not (nRawArc & self._lastArcMask):
iEndArcAddr = iAddr + self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
nArc = nRawArc & self._arcMask
if nArc > self.nChar:
# This value is not a char, this is a stemming code
l.append(self.funcStemming(sWord, self.lArcVal[nArc]))
iAddr = iEndArcAddr+self.nBytesNodeAddress if not (nRawArc & self._addrBitMask) else iEndArcAddr+self.nBytesOffset
return l
return []
def _lookupArcNode3 (self, nVal, iAddr):
"looks if <nVal> is an arc at the node at <iAddr>, if yes, returns address of next node else None"
iAddrNode = iAddr
while True:
iEndArcAddr = iAddr+self.nBytesArc
nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
if nVal == (nRawArc & self._arcMask):
# the value we are looking for
if not (nRawArc & self._addrBitMask):
return int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
else:
return iAddrNode + int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesOffset], byteorder='big')
else:
# value not found
if (nRawArc & self._lastArcMask):
|
| ︙ | ︙ |