Grammalecte  Check-in [c5c926760b]

Overview
Comment:[graphspell] end of lemma is now a slash instead of a space
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | graphspell | rg
Files: files | file ages | folders
SHA3-256: c5c926760b88e94d5a1ea9b32aad40e78b329aa9b44308191a90a557bdde39fb
User & Date: olr on 2018-06-06 09:54:07
Other Links: branch diff | manifest | tags
Context
2018-06-06
10:35
[fr] end of lemma is now a slash instead of a space check-in: 3bd7a19c94 user: olr tags: fr, rg
09:54
[graphspell] end of lemma is now a slash instead of a space check-in: c5c926760b user: olr tags: graphspell, rg
09:30
[core] darg: merge morph and morphex functions check-in: 4134a01a49 user: olr tags: core, rg
Changes

Modified graphspell-js/ibdawg.js from [241ce099fe] to [068f06a16d].

510
511
512
513
514
515
516
517

518
519
520
521
522
523
524
510
511
512
513
514
515
516

517
518
519
520
521
522
523
524







-
+







                    let sStem = ">" + this.funcStemming(sWord, this.lArcVal[nArc]);
                    // Now , we go to the next node and retrieve all following arcs values, all of them are tags
                    let iAddr2 = this._convBytesToInteger(this.byDic.slice(iEndArcAddr, iEndArcAddr+this.nBytesNodeAddress));
                    let nRawArc2 = 0;
                    while (!(nRawArc2 & this._lastArcMask)) {
                        let iEndArcAddr2 = iAddr2 + this.nBytesArc;
                        nRawArc2 = this._convBytesToInteger(this.byDic.slice(iAddr2, iEndArcAddr2));
                        l.push(sStem + " " + this.lArcVal[nRawArc2 & this._arcMask]);
                        l.push(sStem + "/" + this.lArcVal[nRawArc2 & this._arcMask]);
                        iAddr2 = iEndArcAddr2+this.nBytesNodeAddress;
                    }
                }
                iAddr = iEndArcAddr + this.nBytesNodeAddress;
            }
            return l;
        }

Modified graphspell-js/spellchecker.js from [a6bdb52bd3] to [5b9ccbbb56].

236
237
238
239
240
241
242
243

244
245
246
247
248
249
250
251
252
253
254
255
256
257

258
259
260
261
262
263
264
236
237
238
239
240
241
242

243
244
245
246
247
248
249
250
251
252
253
254
255
256

257
258
259
260
261
262
263
264







-
+













-
+







            lMorph.push(...this.oCommunityDic.getMorph(sWord));
        }
        if (this.bPersonalDic) {
            lMorph.push(...this.oPersonalDic.getMorph(sWord));
        }
        if (this.bStorage) {
            this._dMorphologies.set(sWord, lMorph);
            this._dLemmas.set(sWord, Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf(" ")); }))));
            this._dLemmas.set(sWord, Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); }))));
            //console.log(sWord, this._dLemmas.get(sWord));
        }
        return lMorph;
    }

    getLemma (sWord) {
        // retrieves lemmas
        if (this.bStorage) {
            if (!this._dLemmas.has(sWord)) {
                this.getMorph(sWord);
            }
            return this._dLemmas.get(sWord);
        }
        return Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf(" ")); })));
        return Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); })));
    }

    * suggest (sWord, nSuggLimit=10) {
        // generator: returns 1, 2 or 3 lists of suggestions
        yield this.oMainDic.suggest(sWord, nSuggLimit);
        if (this.bExtendedDic) {
            yield this.oExtendedDic.suggest(sWord, nSuggLimit);

Modified graphspell/ibdawg.py from [a255097656] to [71ae57c736].

485
486
487
488
489
490
491
492

493
494
495
496
497
498
499
485
486
487
488
489
490
491

492
493
494
495
496
497
498
499







-
+







                    sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc])
                    # Now , we go to the next node and retrieve all following arcs values, all of them are tags
                    iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
                    nRawArc2 = 0
                    while not (nRawArc2 & self._lastArcMask):
                        iEndArcAddr2 = iAddr2 + self.nBytesArc
                        nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
                        l.append(sStem + " " + self.lArcVal[nRawArc2 & self._arcMask])
                        l.append(sStem + "/" + self.lArcVal[nRawArc2 & self._arcMask])
                        iAddr2 = iEndArcAddr2+self.nBytesNodeAddress
                iAddr = iEndArcAddr+self.nBytesNodeAddress
            return l
        return []

    def _stem1 (self, sWord):
        "returns stems list of <sWord>"
590
591
592
593
594
595
596
597

598
599
600
601
602
603
604
590
591
592
593
594
595
596

597
598
599
600
601
602
603
604







-
+







                        while not (nRawArc & self._lastArcMask):
                            nRawArc = int.from_bytes(self.byDic[iAddr2:iAddr2+self.nBytesArc], byteorder='big')
                            iAddr2 += self.nBytesArc + self.nBytesNodeAddress
                    nRawArc2 = 0
                    while not (nRawArc2 & self._lastArcMask):
                        iEndArcAddr2 = iAddr2 + self.nBytesArc
                        nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
                        l.append(sStem + " " + self.lArcVal[nRawArc2 & self._arcMask])
                        l.append(sStem + "/" + self.lArcVal[nRawArc2 & self._arcMask])
                        iAddr2 = iEndArcAddr2+self.nBytesNodeAddress  if not (nRawArc2 & self._addrBitMask) else iEndArcAddr2
                iAddr = iEndArcAddr+self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else iEndArcAddr
            return l
        return []

    def _stem2 (self, sWord):
        "returns stems list of <sWord>"
702
703
704
705
706
707
708
709

710
711
712
713
714
715
716
702
703
704
705
706
707
708

709
710
711
712
713
714
715
716







-
+







                        iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
                    else:
                        iAddr2 = iAddrNode + int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesOffset], byteorder='big')
                    nRawArc2 = 0
                    while not (nRawArc2 & self._lastArcMask):
                        iEndArcAddr2 = iAddr2 + self.nBytesArc
                        nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
                        l.append(sStem + " " + self.lArcVal[nRawArc2 & self._arcMask])
                        l.append(sStem + "/" + self.lArcVal[nRawArc2 & self._arcMask])
                        iAddr2 = iEndArcAddr2+self.nBytesNodeAddress  if not (nRawArc2 & self._addrBitMask) else iEndArcAddr2+self.nBytesOffset
                iAddr = iEndArcAddr+self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else iEndArcAddr+self.nBytesOffset
            return l
        return []

    def _stem3 (self, sWord):
        "returns stems list of <sWord>"

Modified graphspell/spellchecker.py from [70326fed78] to [e03172e122].

209
210
211
212
213
214
215
216

217
218
219
220
221
222
223
224
225

226
227
228
229
230
231
232
209
210
211
212
213
214
215

216
217
218
219
220
221
222
223
224

225
226
227
228
229
230
231
232







-
+








-
+







            lMorph.extend(self.oExtendedDic.getMorph(sWord))
        if self.bCommunityDic:
            lMorph.extend(self.oCommunityDic.getMorph(sWord))
        if self.bPersonalDic:
            lMorph.extend(self.oPersonalDic.getMorph(sWord))
        if self.bStorage:
            self._dMorphologies[sWord] = lMorph
            self._dLemmas[sWord] = set([ s[1:s.find(" ")]  for s in lMorph ])
            self._dLemmas[sWord] = set([ s[1:s.find("/")]  for s in lMorph ])
        return lMorph

    def getLemma (self, sWord):
        "retrieves lemmas"
        if self.bStorage:
            if sWord not in self._dLemmas:
                self.getMorph(sWord)
            return self._dLemmas[sWord]
        return set([ s[1:s.find(" ")]  for s in self.getMorph(sWord) ])
        return set([ s[1:s.find("/")]  for s in self.getMorph(sWord) ])

    def suggest (self, sWord, nSuggLimit=10):
        "generator: returns 1, 2 or 3 lists of suggestions"
        if self.dDefaultSugg:
            if sWord in self.dDefaultSugg:
                yield self.dDefaultSugg[sWord].split("|")
            elif sWord.istitle() and sWord.lower() in self.dDefaultSugg: