Grammalecte  Check-in [b7d3d7f5ba]

Overview
Comment:[graphspell] suggestions speed up (memorize state)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | graphspell
Files: files | file ages | folders
SHA3-256: b7d3d7f5ba455fdf612e4440bc62b6ac8012e11ff165c5b08d7ce83188b28778
User & Date: olr on 2018-04-07 11:05:41
Other Links: manifest | tags
Context
2018-04-10
13:07
[fx] https://grammalecte.net -> http://grammalecte.net check-in: a7d907829f user: olr tags: trunk, fx
2018-04-07
11:05
[graphspell] suggestions speed up (memorize state) check-in: b7d3d7f5ba user: olr tags: trunk, graphspell
2018-04-06
21:28
[graphspell] suggestions speed up check-in: d420766661 user: olr tags: trunk, graphspell
Changes

Modified graphspell-js/ibdawg.js from [cc24c04950] to [09222b990d].

172
173
174
175
176
177
178


179
180
181
182
183
184
185
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187







+
+







                break;
            default:
                throw ValueError("# Error: unknown code: " + this.nCompressionMethod);
        }
        //console.log(this.getInfo());
        this.bOptNumSigle = true;
        this.bOptNumAtLast = false;

        this.aSuggState = new Set();
    }

    getInfo () {
        return  `  Language: ${this.sLangName}   Lang code: ${this.sLangCode}   Dictionary name: ${this.sDicName}\n` +
                `  Compression method: ${this.nCompressionMethod}   Date: ${this.sDate}   Stemming: ${this.cStemming}FX\n` +
                `  Arcs values:  ${this.nArcVal} = ${this.nChar} characters,  ${this.nAff} affixes,  ${this.nTag} tags\n` +
                `  Dictionary: ${this.nEntry} entries,    ${this.nNode} nodes,   ${this.nArc} arcs\n` +
314
315
316
317
318
319
320

321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339








340
341
342
343
344
345
346
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357







+



















+
+
+
+
+
+
+
+







        if (sWord.gl_isTitle()) {
            this._suggest(oSuggResult, sWord.toLowerCase(), nMaxSwitch, nMaxDel, nMaxHardRepl);
        }
        else if (sWord.gl_isLowerCase()) {
            this._suggest(oSuggResult, sWord.gl_toCapitalize(), nMaxSwitch, nMaxDel, nMaxHardRepl);
        }
        let aSugg = oSuggResult.getSuggestions(nSuggLimit);
        this.aSuggState.clear();
        if (sSfx || sPfx) {
            // we add what we removed
            return aSugg.map( (sSugg) => { return sPfx + sSugg + sSfx } );
        }
        return aSugg;
    }

    _suggest (oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=false) {
        // returns a set of suggestions
        // recursive function
        if (sRemain == "") {
            if (this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask) {
                oSuggResult.addSugg(sNewWord);
            }
            for (let sTail of this._getTails(iAddr)) {
                oSuggResult.addSugg(sNewWord+sTail);
            }
            return;
        }
        else if (sRemain.length < 4) {
            let sState = sNewWord + ":" + sRemain;
            if (this.aSuggState.has(sState)) {
                return;
            }
            this.aSuggState.add(sState);
        }

        let cCurrent = sRemain.slice(0, 1);
        for (let [cChar, jAddr] of this._getCharArcs(iAddr)) {
            if (char_player.d1to1.gl_get(cCurrent, cCurrent).indexOf(cChar) != -1) {
                this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, jAddr, sNewWord+cChar);
            }
            else if (!bAvoidLoop && nMaxHardRepl) {
                this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl-1, nDeep+1, jAddr, sNewWord+cChar, true);

Modified graphspell/ibdawg.py from [e811d4895f] to [7087758a69].

134
135
136
137
138
139
140


141
142
143
144
145
146
147
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149







+
+







            self._writeNodes = self._writeNodes3
        else:
            raise ValueError("  # Error: unknown code: {}".format(self.nCompressionMethod))

        self.bOptNumSigle = False
        self.bOptNumAtLast = False

        self.aSuggState = set()

    def _initBinary (self):
        "initialize with binary structure file"
        if self.by[0:17] != b"/grammalecte-fsa/":
            raise TypeError("# Error. Not a grammalecte-fsa binary dictionary. Header: {}".format(self.by[0:9]))
        if not(self.by[17:18] == b"1" or self.by[17:18] == b"2" or self.by[17:18] == b"3"):
            raise ValueError("# Error. Unknown dictionary version: {}".format(self.by[17:18]))
        try:
289
290
291
292
293
294
295

296
297
298
299
300
301
302
303
304
305
306
307
308
309





310
311
312
313
314
315
316
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324







+














+
+
+
+
+







        oSuggResult = SuggResult(sWord)
        self._suggest(oSuggResult, sWord, nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)
        if sWord.istitle():
            self._suggest(oSuggResult, sWord.lower(), nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)
        elif sWord.islower():
            self._suggest(oSuggResult, sWord.title(), nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)
        aSugg = oSuggResult.getSuggestions(nSuggLimit)
        self.aSuggState.clear()
        if sSfx or sPfx:
            # we add what we removed
            return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
        return aSugg

    def _suggest (self, oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
        # recursive function
        #logging.info((nDeep * "  ") + sNewWord + ":" + sRemain)
        if not sRemain:
            if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
                oSuggResult.addSugg(sNewWord, nDeep)
            for sTail in self._getTails(iAddr):
                oSuggResult.addSugg(sNewWord+sTail, nDeep)
            return
        elif len(sRemain) < 4:
            sState = sNewWord + ":" + sRemain
            if sState in self.aSuggState:
                return
            self.aSuggState.add(sState)
        cCurrent = sRemain[0:1]
        for cChar, jAddr in self._getCharArcs(iAddr):
            if cChar in cp.d1to1.get(cCurrent, cCurrent):
                self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, jAddr, sNewWord+cChar)
            elif not bAvoidLoop and nMaxHardRepl:
                self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl-1, nDeep+1, jAddr, sNewWord+cChar, True)
        if not bAvoidLoop: # avoid infinite loop