Grammalecte  Check-in [1fb5cf9076]

Overview
Comment:[graphspell] spellchecker: select returns an array instead of a string, + getSimilarEntries
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | graphspell | multid
Files: files | file ages | folders
SHA3-256: 1fb5cf907607aa3ce2c6ffa1811441edc07f84cda5f4facdf446f6e8d49cb93b
User & Date: olr on 2018-03-23 12:38:49
Other Links: branch diff | manifest | tags
Context
2018-03-23
17:39
[fx] lexicon editor: search page check-in: 412e20b14a user: olr tags: fx, multid
12:38
[graphspell] spellchecker: select returns an array instead of a string, + getSimilarEntries check-in: 1fb5cf9076 user: olr tags: graphspell, multid
2018-03-22
11:08
[fx] lexicon editor: update > informations check-in: b7574fbc3d user: olr tags: fx, multid
Changes

Modified gc_lang/fr/tb/content/lex_editor.js from [c528c957f0] to [09631747a1].

418
419
420
421
422
423
424
425
426


427
428
429
430
431
432
433
418
419
420
421
422
423
424


425
426
427
428
429
430
431
432
433







-
-
+
+








    load: async function () {
        let sJSON = await oFileHandler.loadFile("fr.personal.json");
        if (sJSON != "") {
            let oJSON = JSON.parse(sJSON);
            this.oIBDAWG = new IBDAWG(oJSON);
            let lEntry = [];
            for (let s of this.oIBDAWG.select()) {
                lEntry.push(s.split("\t"));
            for (let aRes of this.oIBDAWG.select()) {
                lEntry.push(aRes);
            }        
            oLexiconTable.fill(lEntry);
            this.setDictData(this.oIBDAWG.nEntry, this.oIBDAWG.sDate);
            enableElement("export_button");
        } else {
            this.setDictData(0, "[néant]");
            disableElement("export_button");
488
489
490
491
492
493
494
495
496
497
498
499

500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516


517
518
519
520
521
522
523
488
489
490
491
492
493
494





495






496
497
498
499
500
501
502
503
504


505
506
507
508
509
510
511
512
513







-
-
-
-
-
+
-
-
-
-
-
-









-
-
+
+







        document.getElementById("search_regex_button").addEventListener("click", () => { this.searchRegex() }, false);
    },

    searchSimilar: function () {
        oSearchTable.clear();
        let sWord = document.getElementById("search_similar").value;
        if (sWord !== "") {
            let lSimilarWords = [];
            for (let l of this.oSpellChecker.suggest(sWord, 20)) {
                lSimilarWords.push(...l);
            }
            let lResult = [];
            let lResult = this.oSpellChecker.getSimilarEntries(sWord, 20);
            for (let sSimilar of lSimilarWords) {
                for (let sMorph of this.oSpellChecker.getMorph(sSimilar)) {
                    let nCut = sMorph.indexOf(" ");
                    lResult.push( [sSimilar, sMorph.slice(1, nCut), sMorph.slice(nCut+1)] );
                }
            }
            oSearchTable.fill(lResult);
        }
    },

    searchRegex: function () {
        let sFlexPattern = document.getElementById("search_flexion_pattern").value.trim();
        let sTagsPattern = document.getElementById("search_tags_pattern").value.trim();
        let lEntry = [];
        let i = 0;
        for (let s of this.oSpellChecker.select(sFlexPattern, sTagsPattern)) {
            lEntry.push(s.split("\t"));
        for (let aRes of this.oSpellChecker.select(sFlexPattern, sTagsPattern)) {
            lEntry.push(aRes);
            i++;
            if (i >= 2000) {
                break;
            }
        }
        oSearchTable.fill(lEntry);
    }

Modified gc_lang/fr/webext/panel/lex_editor.js from [848f24fd10] to [46a64e44e7].

504
505
506
507
508
509
510
511
512


513
514
515
516
517
518
519
504
505
506
507
508
509
510


511
512
513
514
515
516
517
518
519







-
-
+
+







        if (!oResult.hasOwnProperty("oPersonalDictionary")) {
            hideElement("export_button");
            return;
        }
        let oJSON = oResult.oPersonalDictionary;
        this.oIBDAWG = new IBDAWG(oJSON);
        let lEntry = [];
        for (let s of this.oIBDAWG.select()) {
            lEntry.push(s.split("\t"));
        for (let aRes of this.oIBDAWG.select()) {
            lEntry.push(aRes);
        }        
        oLexiconTable.fill(lEntry);
        this.setDictData(this.oIBDAWG.nEntry, this.oIBDAWG.sDate);
        showElement("export_button");
    },

    setDictData: function (nEntries, sDate) {
570
571
572
573
574
575
576
577
578
579
580
581

582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598


599
600
601
602
603
604
605
570
571
572
573
574
575
576





577






578
579
580
581
582
583
584
585
586


587
588
589
590
591
592
593
594
595







-
-
-
-
-
+
-
-
-
-
-
-









-
-
+
+







        document.getElementById("search_regex_button").addEventListener("click", () => { this.searchRegex() }, false);
    },

    searchSimilar: function () {
        oSearchTable.clear();
        let sWord = document.getElementById("search_similar").value;
        if (sWord !== "") {
            let lSimilarWords = [];
            for (let l of this.oSpellChecker.suggest(sWord, 20)) {
                lSimilarWords.push(...l);
            }
            let lResult = [];
            let lResult = this.oSpellChecker.getSimilarEntries(sWord, 20);
            for (let sSimilar of lSimilarWords) {
                for (let sMorph of this.oSpellChecker.getMorph(sSimilar)) {
                    let nCut = sMorph.indexOf(" ");
                    lResult.push( [sSimilar, sMorph.slice(1, nCut), sMorph.slice(nCut+1)] );
                }
            }
            oSearchTable.fill(lResult);
        }
    },

    searchRegex: function () {
        let sFlexPattern = document.getElementById("search_flexion_pattern").value.trim();
        let sTagsPattern = document.getElementById("search_tags_pattern").value.trim();
        let lEntry = [];
        let i = 0;
        for (let s of this.oSpellChecker.select(sFlexPattern, sTagsPattern)) {
            lEntry.push(s.split("\t"));
        for (let aRes of this.oSpellChecker.select(sFlexPattern, sTagsPattern)) {
            lEntry.push(aRes);
            i++;
            if (i >= 2000) {
                break;
            }
        }
        oSearchTable.fill(lEntry);
    }

Modified grammalecte-cli.py from [3a000dfd52] to [3cad321446].

194
195
196
197
198
199
200
201
202


203
204
205
206
207
208
209
194
195
196
197
198
199
200


201
202
203
204
205
206
207
208
209







-
-
+
+







                if "=" in sSearch:
                    nCut = sSearch.find("=")
                    sFlexPattern = sSearch[0:nCut]
                    sTagsPattern = sSearch[nCut+1:]
                else:
                    sFlexPattern = sSearch
                    sTagsPattern = ""
                for sRes in oSpellChecker.select(sFlexPattern, sTagsPattern):
                    echo(sRes)
                for aRes in oSpellChecker.select(sFlexPattern, sTagsPattern):
                    echo("\t".join(aRes))
            elif sText.startswith("/+ "):
                oGrammarChecker.gce.setOptions({ opt:True  for opt in sText[3:].strip().split()  if opt in oGrammarChecker.gce.getOptions() })
                echo("done")
            elif sText.startswith("/- "):
                oGrammarChecker.gce.setOptions({ opt:False  for opt in sText[3:].strip().split()  if opt in oGrammarChecker.gce.getOptions() })
                echo("done")
            elif sText.startswith("/-- "):

Modified graphspell-js/ibdawg.js from [494630e09a] to [050f6e0036].

413
414
415
416
417
418
419




420











421
422
423
424
425
426
427
413
414
415
416
417
418
419
420
421
422
423

424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441







+
+
+
+
-
+
+
+
+
+
+
+
+
+
+
+







        }
        return aTails;
    }

    // morph (sWord) {
    //     is defined in constructor
    // }
    getSimilarEntries (sWord, nSuggLimit=10) {
        // return a list of tuples (similar word, stem, morphology)
        if (sWord == "") {
            return [];
    
        }
        let lResult = [];
        for (let sSimilar of this.suggest(sWord, nSuggLimit)) {
            for (let sMorph of this.getMorph(sSimilar)) {
                let nCut = sMorph.indexOf(" ");
                lResult.push( [sSimilar, sMorph.slice(1, nCut), sMorph.slice(nCut+1)] );
            }
        }
        return lResult;
    }

    * select (sFlexPattern="", sTagsPattern="") {
        // generator: returns all entries which flexion fits <sFlexPattern> and morphology fits <sTagsPattern>
        let zFlexPattern = null;
        let zTagsPattern = null;
        try {
            zFlexPattern = (sFlexPattern !== "") ? new RegExp(sFlexPattern) : null;
            zTagsPattern = (sTagsPattern !== "") ? new RegExp(sTagsPattern) : null;
439
440
441
442
443
444
445
446

447
448
449

450
451
452
453
454
455
456
453
454
455
456
457
458
459

460
461
462

463
464
465
466
467
468
469
470







-
+


-
+







        // recursive generator
        for (let [nVal, jAddr] of this._getArcs1(iAddr)) {
            if (nVal <= this.nChar) {
                // simple character
                yield* this._select1(zFlexPattern, zTagsPattern, jAddr, sWord + this.lArcVal[nVal]);
            } else {
                if (!zFlexPattern || zFlexPattern.test(sWord)) {
                    let sEntry = sWord + "\t" + this.funcStemming(sWord, this.lArcVal[nVal]);
                    let sStem = this.funcStemming(sWord, this.lArcVal[nVal]);
                    for (let [nMorphVal, _] of this._getArcs1(jAddr)) {
                        if (!zTagsPattern || zTagsPattern.test(this.lArcVal[nMorphVal])) {
                            yield sEntry + "\t" + this.lArcVal[nMorphVal];
                            yield [sWord, sStem, this.lArcVal[nMorphVal]];
                        }
                    }
                }
            }
        }
    }            

Modified graphspell-js/spellchecker.js from [a81aa20368] to [67633fda3c].

241
242
243
244
245
246
247















248
249
250
251
252
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+





        if (this.bCommunityDic) {
            yield* this.oCommunityDic.select(sFlexPattern, sTagsPattern);
        }
        if (this.bPersonalDic) {
            yield* this.oPersonalDic.select(sFlexPattern, sTagsPattern);
        }
    }

    getSimilarEntries (sWord, nSuggLimit=10) {
        // return a list of tuples (similar word, stem, morphology)
        let lResult = this.oMainDic.getSimilarEntries(sWord);
        if (this.bExtendedDic) {
            lResult.push(...this.oExtendedDic.getSimilarEntries(sWord));
        }
        if (this.bCommunityDic) {
            lResult.push(...this.oCommunityDic.getSimilarEntries(sWord));
        }
        if (this.bPersonalDic) {
            lResult.push(...this.oPersonalDic.getSimilarEntries(sWord));
        }
        return lResult;
    }
}

if (typeof(exports) !== 'undefined') {
    exports.SpellChecker = SpellChecker;
}

Modified graphspell/ibdawg.py from [7fe99d0e79] to [10a4e0fc85].

404
405
406
407
408
409
410











411
412
413
414
415
416
417
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428







+
+
+
+
+
+
+
+
+
+
+







                iPos = n
            n += 1
        if not sWord:
            return
        if iPos >= 0:
            print("\n   "+ " " * iPos + "|")
            self.drawPath(sWord[1:], iNextNodeAddr)

    def getSimilarEntries (self, sWord, nSuggLimit=10):
        "return a list of tuples (similar word, stem, morphology)"
        if not sWord:
            return []
        lResult = []
        for sSimilar in self.suggest(sWord, nSuggLimit):
            for sMorph in self.getMorph(sSimilar):
                nCut = sMorph.find(" ")
                lResult.append( (sSimilar, sMorph[1:nCut], sMorph[nCut+1:]) )
        return lResult

    def select (self, sFlexPattern="", sTagsPattern=""):
        "generator: returns all entries which flexion fits <sFlexPattern> and morphology fits <sTagsPattern>"
        zFlexPattern = None
        zTagsPattern = None
        try:
            if sFlexPattern:
431
432
433
434
435
436
437
438

439
440
441

442
443
444
445
446
447
448
442
443
444
445
446
447
448

449
450
451

452
453
454
455
456
457
458
459







-
+


-
+







        # recursive generator
        for nVal, jAddr in self._getArcs1(iAddr):
            if nVal <= self.nChar:
                # simple character
                yield from self._select1(zFlexPattern, zTagsPattern, jAddr, sWord + self.lArcVal[nVal])
            else:
                if not zFlexPattern or zFlexPattern.search(sWord):
                    sEntry = sWord + "\t" + self.funcStemming(sWord, self.lArcVal[nVal])
                    sStem = self.funcStemming(sWord, self.lArcVal[nVal])
                    for nMorphVal, _ in self._getArcs1(jAddr):
                        if not zTagsPattern or zTagsPattern.search(self.lArcVal[nMorphVal]):
                            yield sEntry + "\t" + self.lArcVal[nMorphVal]
                            yield [sWord, sStem, self.lArcVal[nMorphVal]]

    def _morph1 (self, sWord):
        "returns morphologies of <sWord>"
        iAddr = 0
        for c in sWord:
            if c not in self.dChar:
                return []

Modified graphspell/spellchecker.py from [1230d0d8a8] to [3ef8ba9d14].

210
211
212
213
214
215
216











210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227







+
+
+
+
+
+
+
+
+
+
+
            self.oExtendedDic.drawPath(sWord)
        if self.bCommunityDic:
            print("-----")
            self.oCommunityDic.drawPath(sWord)
        if self.bPersonalDic:
            print("-----")
            self.oPersonalDic.drawPath(sWord)

    def getSimilarEntries (self, sWord):
        "return a list of tuples (similar word, stem, morphology)"
        lResult = self.oMainDic.getSimilarEntries(sWord)
        if self.bExtendedDic:
            lResult.extend(self.oExtendedDic.getSimilarEntries(sWord))
        if self.bCommunityDic:
            lResult.extend(self.oCommunityDic.getSimilarEntries(sWord))
        if self.bPersonalDic:
            lResult.extend(self.oPersonalDic.getSimilarEntries(sWord))
        return lResult