Grammalecte  Check-in [622060334c]

Overview
Comment:[graphspell] suggestions: split word at apostrophes and check each part
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | graphspell
Files: files | file ages | folders
SHA3-256: 622060334cf37de6c5957200183b914da68c5b804a91d1eedb175f7fe8d01e9c
User & Date: olr on 2018-11-23 15:33:45
Other Links: manifest | tags
Context
2018-11-23
16:05
[graphspell] char_player: another simplification method for sound “é” to avoid oversimplification check-in: 93a0b84b63 user: olr tags: trunk, graphspell
15:33
[graphspell] suggestions: split word at apostrophes and check each part check-in: 622060334c user: olr tags: trunk, graphspell
15:32
[fr] ajustements check-in: def3f4276e user: olr tags: trunk, fr
Changes

Modified graphspell-js/ibdawg.js from [edb211270b] to [87959bb3f3].

328
329
330
331
332
333
334

335
336
337
338
339
340
341
342
343











344
345
346
347
348
349
350
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362







+









+
+
+
+
+
+
+
+
+
+
+







        let sSfx = "";
        [sPfx, sWord, sSfx] = char_player.cut(sWord);
        let nMaxSwitch = Math.max(Math.floor(sWord.length / 3), 1);
        let nMaxDel = Math.floor(sWord.length / 5);
        let nMaxHardRepl = Math.max(Math.floor((sWord.length - 5) / 4), 1);
        let nMaxJump = Math.max(Math.floor(sWord.length / 4), 1);
        let oSuggResult = new SuggResult(sWord);
        this._splitSuggest(oSuggResult, sWord);
        this._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump);
        let aSugg = oSuggResult.getSuggestions(nSuggLimit);
        if (sSfx || sPfx) {
            // we add what we removed
            return aSugg.map( (sSugg) => { return sPfx + sSugg + sSfx; } );
        }
        //console.timeEnd("Suggestions for " + sWord);
        return aSugg;
    }

    _splitSuggest (oSuggResult, sWord) {
        for (let cSplitter of "'’") {
            if (sWord.includes(cSplitter)) {
                let [sWord1, sWord2] = sWord.split(cSplitter, 2);
                if (this.isValid(sWord1) && this.isValid(sWord2)) {
                    oSuggResult.addSugg(sWord1+" "+sWord2);
                }
            }
        }
    }

    _suggest (oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nMaxJump=0, nDist=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=false) {
        // returns a set of suggestions
        // recursive function
        if (sRemain == "") {
            if (this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask) {
                oSuggResult.addSugg(sNewWord);

Modified graphspell/ibdawg.py from [59273fbe60] to [630526bab1].

299
300
301
302
303
304
305

306
307
308
309
310
311
312







313
314
315
316
317
318
319
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327







+







+
+
+
+
+
+
+







        sWord = cp.spellingNormalization(sWord)
        sPfx, sWord, sSfx = cp.cut(sWord)
        nMaxSwitch = max(len(sWord) // 3, 1)
        nMaxDel = len(sWord) // 5
        nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
        nMaxJump = max(len(sWord) // 4, 1)
        oSuggResult = SuggResult(sWord)
        self._splitSuggest(oSuggResult, sWord)
        self._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump)
        aSugg = oSuggResult.getSuggestions(nSuggLimit)
        if sSfx or sPfx:
            # we add what we removed
            return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
        return aSugg

    def _splitSuggest (self, oSuggResult, sWord):
        for cSplitter in "'’":
            if cSplitter in sWord:
                sWord1, sWord2 = sWord.split(cSplitter, 1)
                if self.isValid(sWord1) and self.isValid(sWord2):
                    oSuggResult.addSugg(sWord1+" "+sWord2)

    def _suggest (self, oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nMaxJump=0, nDist=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
        # recursive function
        #logging.info((nDeep * "  ") + sNewWord + ":" + sRemain)
        if not sRemain:
            if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
                oSuggResult.addSugg(sNewWord, nDeep)
            for sTail in self._getTails(iAddr):