Overview
| Comment: | [graphspell][py] ibdawg: code cleaning, remove version 2 and 3, never used |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | graphspell | bdic_opt |
| Files: | files | file ages | folders |
| SHA3-256: |
4fa9631623da51ba6404c5b847e9912a |
| User & Date: | olr on 2020-09-12 12:45:25 |
| Other Links: | branch diff | manifest | tags |
Context
|
2020-09-12
| ||
| 12:54 | [graphspell] code cleaning check-in: 9df0f3e6b2 user: olr tags: graphspell, bdic_opt | |
| 12:45 | [graphspell][py] ibdawg: code cleaning, remove version 2 and 3, never used check-in: 4fa9631623 user: olr tags: graphspell, bdic_opt | |
| 12:32 | [graphspell][js] ibdawg: code cleaning, remove version 2 and 3, never used check-in: b3f2f1d72a user: olr tags: graphspell, bdic_opt | |
Changes
Modified graphspell/ibdawg.py from [0fe5cbd03f] to [258f41f141].
| ︙ | ︙ | |||
149 150 151 152 153 154 155 |
if self.cStemming == "S":
self.funcStemming = st.changeWordWithSuffixCode
elif self.cStemming == "A":
self.funcStemming = st.changeWordWithAffixCode
else:
self.funcStemming = st.noStemming
| < < < < < < < < < < < < < < < < < < < < < < | 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
if self.cStemming == "S":
self.funcStemming = st.changeWordWithSuffixCode
elif self.cStemming == "A":
self.funcStemming = st.changeWordWithAffixCode
else:
self.funcStemming = st.noStemming
self.bAcronymValid = False
self.bNumAtLastValid = False
# lexicographer module ?
self.lexicographer = None
try:
self.lexicographer = importlib.import_module(".lexgraph_"+self.sLangCode, "grammalecte.graphspell")
|
| ︙ | ︙ | |||
323 324 325 326 327 328 329 |
return bool(self.byDic[iAddr] & self._finalNodeMask)
def getMorph (self, sWord):
"retrieves morphologies list, different casing allowed"
if not sWord:
return []
sWord = st.spellingNormalization(sWord)
| | | | | 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 |
return bool(self.byDic[iAddr] & self._finalNodeMask)
def getMorph (self, sWord):
"retrieves morphologies list, different casing allowed"
if not sWord:
return []
sWord = st.spellingNormalization(sWord)
l = self._morph(sWord)
if sWord[0:1].isupper():
l.extend(self._morph(sWord.lower()))
if sWord.isupper() and len(sWord) > 1:
l.extend(self._morph(sWord.capitalize()))
return l
#@timethis
def suggest (self, sWord, nSuggLimit=10, bSplitTrailingNumbers=False):
"returns a set of suggestions for <sWord>"
sWord = sWord.rstrip(".") # useful for LibreOffice
sWord = st.spellingNormalization(sWord)
|
| ︙ | ︙ | |||
487 488 489 490 491 492 493 |
if sFlexPattern:
zFlexPattern = re.compile(sFlexPattern)
if sTagsPattern:
zTagsPattern = re.compile(sTagsPattern)
except re.error:
print("# Error in regex pattern")
traceback.print_exc()
| | < < < < | | | | | | 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 |
if sFlexPattern:
zFlexPattern = re.compile(sFlexPattern)
if sTagsPattern:
zTagsPattern = re.compile(sTagsPattern)
except re.error:
print("# Error in regex pattern")
traceback.print_exc()
yield from self._select(zFlexPattern, zTagsPattern, 0, "")
def _select (self, zFlexPattern, zTagsPattern, iAddr, sWord):
# recursive generator
for nVal, jAddr in self._getArcs(iAddr):
if nVal <= self.nChar:
# simple character
yield from self._select(zFlexPattern, zTagsPattern, jAddr, sWord + self.lArcVal[nVal])
else:
if not zFlexPattern or zFlexPattern.search(sWord):
sStem = self.funcStemming(sWord, self.lArcVal[nVal])
for nMorphVal, _ in self._getArcs(jAddr):
if not zTagsPattern or zTagsPattern.search(self.lArcVal[nMorphVal]):
yield [sWord, sStem, self.lArcVal[nMorphVal]]
def _morph (self, sWord):
"returns morphologies of <sWord>"
iAddr = 0
for c in sWord:
if c not in self.dChar:
return []
iAddr = self._lookupArcNode(self.dChar[c], iAddr)
if iAddr is None:
|
| ︙ | ︙ | |||
537 538 539 540 541 542 543 |
nRawArc2 = self.byDic[iAddr2]
l.append(sStem + "/" + self.lArcVal[nRawArc2 & self._arcMask])
iAddr2 = iEndArcAddr2 + 1
iAddr = iEndArcAddr + 1
return l
return []
| | | 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 |
nRawArc2 = self.byDic[iAddr2]
l.append(sStem + "/" + self.lArcVal[nRawArc2 & self._arcMask])
iAddr2 = iEndArcAddr2 + 1
iAddr = iEndArcAddr + 1
return l
return []
def _stem (self, sWord):
"returns stems list of <sWord>"
iAddr = 0
for c in sWord:
if c not in self.dChar:
return []
iAddr = self._lookupArcNode(self.dChar[c], iAddr)
if iAddr is None:
|
| ︙ | ︙ | |||
560 561 562 563 564 565 566 |
if nArc > self.nChar:
# This value is not a char, this is a stemming code
l.append(self.funcStemming(sWord, self.lArcVal[nArc]))
iAddr = iEndArcAddr + 1
return l
return []
| | | | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 |
if nArc > self.nChar:
# This value is not a char, this is a stemming code
l.append(self.funcStemming(sWord, self.lArcVal[nArc]))
iAddr = iEndArcAddr + 1
return l
return []
def _lookupArcNode (self, nVal, iAddr):
"looks if <nVal> is an arc at the node at <iAddr>, if yes, returns address of next node else None"
while True:
iEndArcAddr = iAddr + 1
nRawArc = self.byDic[iAddr]
if nVal == (nRawArc & self._arcMask):
# the value we are looking for
# we return the address of the next node
return self.byDic[iEndArcAddr]
# value not found
if nRawArc & self._lastArcMask:
return None
iAddr = iEndArcAddr + 1
def _getArcs (self, iAddr):
"generator: return all arcs at <iAddr> as tuples of (nVal, iAddr)"
while True:
iEndArcAddr = iAddr + 1
nRawArc = self.byDic[iAddr]
yield nRawArc & self._arcMask, self.byDic[iEndArcAddr]
if nRawArc & self._lastArcMask:
break
iAddr = iEndArcAddr + 1
def _writeNodes (self, spfDest):
"for debugging only"
print(" > Write binary nodes")
with open(spfDest, 'w', 'utf-8', newline="\n") as hDst:
iAddr = 0
hDst.write("i{:_>10} -- #{:_>10}\n".format("0", iAddr))
while iAddr < len(self.byDic):
iEndArcAddr = iAddr + 1
nRawArc = self.byDic[iAddr]
nArc = nRawArc & self._arcMask
hDst.write(" {:<20} {:0>16} i{:>10} #{:_>10}\n".format(self.lArcVal[nArc], bin(nRawArc)[2:], "?", self.byDic[iEndArcAddr]))
iAddr = iEndArcAddr + 1
if (nRawArc & self._lastArcMask) and iAddr < len(self.byDic):
hDst.write("\ni{:_>10} -- #{:_>10}\n".format("?", iAddr))
hDst.close()
|