Overview
| Comment: | [graphspell] end of lemma is now a slash instead of a space |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | graphspell | rg |
| Files: | files | file ages | folders |
| SHA3-256: |
c5c926760b88e94d5a1ea9b32aad40e7 |
| User & Date: | olr on 2018-06-06 09:54:07 |
| Other Links: | branch diff | manifest | tags |
Context
|
2018-06-06
| ||
| 10:35 | [fr] end of lemma is now a slash instead of a space check-in: 3bd7a19c94 user: olr tags: fr, rg | |
| 09:54 | [graphspell] end of lemma is now a slash instead of a space check-in: c5c926760b user: olr tags: graphspell, rg | |
| 09:30 | [core] darg: merge morph and morphex functions check-in: 4134a01a49 user: olr tags: core, rg | |
Changes
Modified graphspell-js/ibdawg.js from [241ce099fe] to [068f06a16d].
| ︙ | ︙ | |||
510 511 512 513 514 515 516 |
let sStem = ">" + this.funcStemming(sWord, this.lArcVal[nArc]);
// Now , we go to the next node and retrieve all following arcs values, all of them are tags
let iAddr2 = this._convBytesToInteger(this.byDic.slice(iEndArcAddr, iEndArcAddr+this.nBytesNodeAddress));
let nRawArc2 = 0;
while (!(nRawArc2 & this._lastArcMask)) {
let iEndArcAddr2 = iAddr2 + this.nBytesArc;
nRawArc2 = this._convBytesToInteger(this.byDic.slice(iAddr2, iEndArcAddr2));
| | | 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 |
let sStem = ">" + this.funcStemming(sWord, this.lArcVal[nArc]);
// Now , we go to the next node and retrieve all following arcs values, all of them are tags
let iAddr2 = this._convBytesToInteger(this.byDic.slice(iEndArcAddr, iEndArcAddr+this.nBytesNodeAddress));
let nRawArc2 = 0;
while (!(nRawArc2 & this._lastArcMask)) {
let iEndArcAddr2 = iAddr2 + this.nBytesArc;
nRawArc2 = this._convBytesToInteger(this.byDic.slice(iAddr2, iEndArcAddr2));
l.push(sStem + "/" + this.lArcVal[nRawArc2 & this._arcMask]);
iAddr2 = iEndArcAddr2+this.nBytesNodeAddress;
}
}
iAddr = iEndArcAddr + this.nBytesNodeAddress;
}
return l;
}
|
| ︙ | ︙ |
Modified graphspell-js/spellchecker.js from [a6bdb52bd3] to [5b9ccbbb56].
| ︙ | ︙ | |||
236 237 238 239 240 241 242 |
lMorph.push(...this.oCommunityDic.getMorph(sWord));
}
if (this.bPersonalDic) {
lMorph.push(...this.oPersonalDic.getMorph(sWord));
}
if (this.bStorage) {
this._dMorphologies.set(sWord, lMorph);
| | | | 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 |
lMorph.push(...this.oCommunityDic.getMorph(sWord));
}
if (this.bPersonalDic) {
lMorph.push(...this.oPersonalDic.getMorph(sWord));
}
if (this.bStorage) {
this._dMorphologies.set(sWord, lMorph);
this._dLemmas.set(sWord, Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); }))));
//console.log(sWord, this._dLemmas.get(sWord));
}
return lMorph;
}
getLemma (sWord) {
// retrieves lemmas
if (this.bStorage) {
if (!this._dLemmas.has(sWord)) {
this.getMorph(sWord);
}
return this._dLemmas.get(sWord);
}
return Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); })));
}
* suggest (sWord, nSuggLimit=10) {
// generator: returns 1, 2 or 3 lists of suggestions
yield this.oMainDic.suggest(sWord, nSuggLimit);
if (this.bExtendedDic) {
yield this.oExtendedDic.suggest(sWord, nSuggLimit);
|
| ︙ | ︙ |
Modified graphspell/ibdawg.py from [a255097656] to [71ae57c736].
| ︙ | ︙ | |||
485 486 487 488 489 490 491 |
sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc])
# Now , we go to the next node and retrieve all following arcs values, all of them are tags
iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
nRawArc2 = 0
while not (nRawArc2 & self._lastArcMask):
iEndArcAddr2 = iAddr2 + self.nBytesArc
nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
| | | 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 |
sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc])
# Now , we go to the next node and retrieve all following arcs values, all of them are tags
iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
nRawArc2 = 0
while not (nRawArc2 & self._lastArcMask):
iEndArcAddr2 = iAddr2 + self.nBytesArc
nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
l.append(sStem + "/" + self.lArcVal[nRawArc2 & self._arcMask])
iAddr2 = iEndArcAddr2+self.nBytesNodeAddress
iAddr = iEndArcAddr+self.nBytesNodeAddress
return l
return []
def _stem1 (self, sWord):
"returns stems list of <sWord>"
|
| ︙ | ︙ | |||
590 591 592 593 594 595 596 |
while not (nRawArc & self._lastArcMask):
nRawArc = int.from_bytes(self.byDic[iAddr2:iAddr2+self.nBytesArc], byteorder='big')
iAddr2 += self.nBytesArc + self.nBytesNodeAddress
nRawArc2 = 0
while not (nRawArc2 & self._lastArcMask):
iEndArcAddr2 = iAddr2 + self.nBytesArc
nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
| | | 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 |
while not (nRawArc & self._lastArcMask):
nRawArc = int.from_bytes(self.byDic[iAddr2:iAddr2+self.nBytesArc], byteorder='big')
iAddr2 += self.nBytesArc + self.nBytesNodeAddress
nRawArc2 = 0
while not (nRawArc2 & self._lastArcMask):
iEndArcAddr2 = iAddr2 + self.nBytesArc
nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
l.append(sStem + "/" + self.lArcVal[nRawArc2 & self._arcMask])
iAddr2 = iEndArcAddr2+self.nBytesNodeAddress if not (nRawArc2 & self._addrBitMask) else iEndArcAddr2
iAddr = iEndArcAddr+self.nBytesNodeAddress if not (nRawArc & self._addrBitMask) else iEndArcAddr
return l
return []
def _stem2 (self, sWord):
"returns stems list of <sWord>"
|
| ︙ | ︙ | |||
702 703 704 705 706 707 708 |
iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
else:
iAddr2 = iAddrNode + int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesOffset], byteorder='big')
nRawArc2 = 0
while not (nRawArc2 & self._lastArcMask):
iEndArcAddr2 = iAddr2 + self.nBytesArc
nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
| | | 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 |
iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
else:
iAddr2 = iAddrNode + int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesOffset], byteorder='big')
nRawArc2 = 0
while not (nRawArc2 & self._lastArcMask):
iEndArcAddr2 = iAddr2 + self.nBytesArc
nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
l.append(sStem + "/" + self.lArcVal[nRawArc2 & self._arcMask])
iAddr2 = iEndArcAddr2+self.nBytesNodeAddress if not (nRawArc2 & self._addrBitMask) else iEndArcAddr2+self.nBytesOffset
iAddr = iEndArcAddr+self.nBytesNodeAddress if not (nRawArc & self._addrBitMask) else iEndArcAddr+self.nBytesOffset
return l
return []
def _stem3 (self, sWord):
"returns stems list of <sWord>"
|
| ︙ | ︙ |
Modified graphspell/spellchecker.py from [70326fed78] to [e03172e122].
| ︙ | ︙ | |||
209 210 211 212 213 214 215 |
lMorph.extend(self.oExtendedDic.getMorph(sWord))
if self.bCommunityDic:
lMorph.extend(self.oCommunityDic.getMorph(sWord))
if self.bPersonalDic:
lMorph.extend(self.oPersonalDic.getMorph(sWord))
if self.bStorage:
self._dMorphologies[sWord] = lMorph
| | | | 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 |
lMorph.extend(self.oExtendedDic.getMorph(sWord))
if self.bCommunityDic:
lMorph.extend(self.oCommunityDic.getMorph(sWord))
if self.bPersonalDic:
lMorph.extend(self.oPersonalDic.getMorph(sWord))
if self.bStorage:
self._dMorphologies[sWord] = lMorph
self._dLemmas[sWord] = set([ s[1:s.find("/")] for s in lMorph ])
return lMorph
def getLemma (self, sWord):
"retrieves lemmas"
if self.bStorage:
if sWord not in self._dLemmas:
self.getMorph(sWord)
return self._dLemmas[sWord]
return set([ s[1:s.find("/")] for s in self.getMorph(sWord) ])
def suggest (self, sWord, nSuggLimit=10):
"generator: returns 1, 2 or 3 lists of suggestions"
if self.dDefaultSugg:
if sWord in self.dDefaultSugg:
yield self.dDefaultSugg[sWord].split("|")
elif sWord.istitle() and sWord.lower() in self.dDefaultSugg:
|
| ︙ | ︙ |