Overview
Comment: | [graphspell] end of lemma is now a slash instead of a space |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | graphspell | rg |
Files: | files | file ages | folders |
SHA3-256: |
c5c926760b88e94d5a1ea9b32aad40e7 |
User & Date: | olr on 2018-06-06 09:54:07 |
Other Links: | branch diff | manifest | tags |
Context
2018-06-06
| ||
10:35 | [fr] end of lemma is now a slash instead of a space check-in: 3bd7a19c94 user: olr tags: fr, rg | |
09:54 | [graphspell] end of lemma is now a slash instead of a space check-in: c5c926760b user: olr tags: graphspell, rg | |
09:30 | [core] darg: merge morph and morphex functions check-in: 4134a01a49 user: olr tags: core, rg | |
Changes
Modified graphspell-js/ibdawg.js from [241ce099fe] to [068f06a16d].
︙ | ︙ | |||
510 511 512 513 514 515 516 | let sStem = ">" + this.funcStemming(sWord, this.lArcVal[nArc]); // Now , we go to the next node and retrieve all following arcs values, all of them are tags let iAddr2 = this._convBytesToInteger(this.byDic.slice(iEndArcAddr, iEndArcAddr+this.nBytesNodeAddress)); let nRawArc2 = 0; while (!(nRawArc2 & this._lastArcMask)) { let iEndArcAddr2 = iAddr2 + this.nBytesArc; nRawArc2 = this._convBytesToInteger(this.byDic.slice(iAddr2, iEndArcAddr2)); | | | 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 | let sStem = ">" + this.funcStemming(sWord, this.lArcVal[nArc]); // Now , we go to the next node and retrieve all following arcs values, all of them are tags let iAddr2 = this._convBytesToInteger(this.byDic.slice(iEndArcAddr, iEndArcAddr+this.nBytesNodeAddress)); let nRawArc2 = 0; while (!(nRawArc2 & this._lastArcMask)) { let iEndArcAddr2 = iAddr2 + this.nBytesArc; nRawArc2 = this._convBytesToInteger(this.byDic.slice(iAddr2, iEndArcAddr2)); l.push(sStem + "/" + this.lArcVal[nRawArc2 & this._arcMask]); iAddr2 = iEndArcAddr2+this.nBytesNodeAddress; } } iAddr = iEndArcAddr + this.nBytesNodeAddress; } return l; } |
︙ | ︙ |
Modified graphspell-js/spellchecker.js from [a6bdb52bd3] to [5b9ccbbb56].
︙ | ︙ | |||
236 237 238 239 240 241 242 | lMorph.push(...this.oCommunityDic.getMorph(sWord)); } if (this.bPersonalDic) { lMorph.push(...this.oPersonalDic.getMorph(sWord)); } if (this.bStorage) { this._dMorphologies.set(sWord, lMorph); | | | | 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 | lMorph.push(...this.oCommunityDic.getMorph(sWord)); } if (this.bPersonalDic) { lMorph.push(...this.oPersonalDic.getMorph(sWord)); } if (this.bStorage) { this._dMorphologies.set(sWord, lMorph); this._dLemmas.set(sWord, Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); })))); //console.log(sWord, this._dLemmas.get(sWord)); } return lMorph; } getLemma (sWord) { // retrieves lemmas if (this.bStorage) { if (!this._dLemmas.has(sWord)) { this.getMorph(sWord); } return this._dLemmas.get(sWord); } return Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); }))); } * suggest (sWord, nSuggLimit=10) { // generator: returns 1, 2 or 3 lists of suggestions yield this.oMainDic.suggest(sWord, nSuggLimit); if (this.bExtendedDic) { yield this.oExtendedDic.suggest(sWord, nSuggLimit); |
︙ | ︙ |
Modified graphspell/ibdawg.py from [a255097656] to [71ae57c736].
︙ | ︙ | |||
485 486 487 488 489 490 491 | sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc]) # Now , we go to the next node and retrieve all following arcs values, all of them are tags iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big') nRawArc2 = 0 while not (nRawArc2 & self._lastArcMask): iEndArcAddr2 = iAddr2 + self.nBytesArc nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big') | | | 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 | sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc]) # Now , we go to the next node and retrieve all following arcs values, all of them are tags iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big') nRawArc2 = 0 while not (nRawArc2 & self._lastArcMask): iEndArcAddr2 = iAddr2 + self.nBytesArc nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big') l.append(sStem + "/" + self.lArcVal[nRawArc2 & self._arcMask]) iAddr2 = iEndArcAddr2+self.nBytesNodeAddress iAddr = iEndArcAddr+self.nBytesNodeAddress return l return [] def _stem1 (self, sWord): "returns stems list of <sWord>" |
︙ | ︙ | |||
590 591 592 593 594 595 596 | while not (nRawArc & self._lastArcMask): nRawArc = int.from_bytes(self.byDic[iAddr2:iAddr2+self.nBytesArc], byteorder='big') iAddr2 += self.nBytesArc + self.nBytesNodeAddress nRawArc2 = 0 while not (nRawArc2 & self._lastArcMask): iEndArcAddr2 = iAddr2 + self.nBytesArc nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big') | | | 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 | while not (nRawArc & self._lastArcMask): nRawArc = int.from_bytes(self.byDic[iAddr2:iAddr2+self.nBytesArc], byteorder='big') iAddr2 += self.nBytesArc + self.nBytesNodeAddress nRawArc2 = 0 while not (nRawArc2 & self._lastArcMask): iEndArcAddr2 = iAddr2 + self.nBytesArc nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big') l.append(sStem + "/" + self.lArcVal[nRawArc2 & self._arcMask]) iAddr2 = iEndArcAddr2+self.nBytesNodeAddress if not (nRawArc2 & self._addrBitMask) else iEndArcAddr2 iAddr = iEndArcAddr+self.nBytesNodeAddress if not (nRawArc & self._addrBitMask) else iEndArcAddr return l return [] def _stem2 (self, sWord): "returns stems list of <sWord>" |
︙ | ︙ | |||
702 703 704 705 706 707 708 | iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big') else: iAddr2 = iAddrNode + int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesOffset], byteorder='big') nRawArc2 = 0 while not (nRawArc2 & self._lastArcMask): iEndArcAddr2 = iAddr2 + self.nBytesArc nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big') | | | 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 | iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big') else: iAddr2 = iAddrNode + int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesOffset], byteorder='big') nRawArc2 = 0 while not (nRawArc2 & self._lastArcMask): iEndArcAddr2 = iAddr2 + self.nBytesArc nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big') l.append(sStem + "/" + self.lArcVal[nRawArc2 & self._arcMask]) iAddr2 = iEndArcAddr2+self.nBytesNodeAddress if not (nRawArc2 & self._addrBitMask) else iEndArcAddr2+self.nBytesOffset iAddr = iEndArcAddr+self.nBytesNodeAddress if not (nRawArc & self._addrBitMask) else iEndArcAddr+self.nBytesOffset return l return [] def _stem3 (self, sWord): "returns stems list of <sWord>" |
︙ | ︙ |
Modified graphspell/spellchecker.py from [70326fed78] to [e03172e122].
︙ | ︙ | |||
209 210 211 212 213 214 215 | lMorph.extend(self.oExtendedDic.getMorph(sWord)) if self.bCommunityDic: lMorph.extend(self.oCommunityDic.getMorph(sWord)) if self.bPersonalDic: lMorph.extend(self.oPersonalDic.getMorph(sWord)) if self.bStorage: self._dMorphologies[sWord] = lMorph | | | | 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 | lMorph.extend(self.oExtendedDic.getMorph(sWord)) if self.bCommunityDic: lMorph.extend(self.oCommunityDic.getMorph(sWord)) if self.bPersonalDic: lMorph.extend(self.oPersonalDic.getMorph(sWord)) if self.bStorage: self._dMorphologies[sWord] = lMorph self._dLemmas[sWord] = set([ s[1:s.find("/")] for s in lMorph ]) return lMorph def getLemma (self, sWord): "retrieves lemmas" if self.bStorage: if sWord not in self._dLemmas: self.getMorph(sWord) return self._dLemmas[sWord] return set([ s[1:s.find("/")] for s in self.getMorph(sWord) ]) def suggest (self, sWord, nSuggLimit=10): "generator: returns 1, 2 or 3 lists of suggestions" if self.dDefaultSugg: if sWord in self.dDefaultSugg: yield self.dDefaultSugg[sWord].split("|") elif sWord.istitle() and sWord.lower() in self.dDefaultSugg: |
︙ | ︙ |