Grammalecte  Check-in [c2a07ab8a2]

Overview
Comment:[graphspell] ignore underscore for spellchecking and morph
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | graphspell
Files: files | file ages | folders
SHA3-256: c2a07ab8a257bff5f17d24ef9e3292df7906da9e7d9696d30aef263e82fc0b64
User & Date: olr on 2020-09-30 16:06:29
Other Links: manifest | tags
Context
2020-09-30
16:10
[graphspell] replace straight apostrophe in spellingNormalization() check-in: 3f34a1e2e7 user: olr tags: trunk, graphspell
16:06
[graphspell] ignore underscore for spellchecking and morph check-in: c2a07ab8a2 user: olr tags: trunk, graphspell
12:52
[build][fr] fix perf_memo name check-in: fbb6da3fff user: olr tags: trunk, fr, build
Changes

Modified gc_lang/fr/perf_memo.txt from [4773fc39c6] to [e113d352f1].

30
31
32
33
34
35
36

1.6.0       2020.01.03 20:22    1.38847     0.346214    0.240242    0.0709539   0.0737499   0.0748733   0.176477    0.0969171   0.0187857   0.0025143   (nouveau dictionnaire avec lemmes masculins)
1.9.0       2020.04.20 19:57    1.51183     0.369546    0.25681     0.0734314   0.0764396   0.0785668   0.183922    0.103674    0.0185812   0.002099    (NFC normalization)
1.9.2       2020.05.12 08:43    1.62465     0.398831    0.273012    0.0810811   0.080937    0.0845885   0.204133    0.114146    0.0212864   0.0029547
1.12.2      2020.09.09 13:34    1.50568     0.374504    0.233108    0.0798712   0.0804466   0.0769674   0.171519    0.0945132   0.0165344   0.0019474
1.12.2      2020.09.09 13:35    1.41094     0.359093    0.236443    0.06968     0.0734418   0.0738087   0.169371    0.0946279   0.0167106   0.0019773
1.12.2      2020.09.11 19:16    1.35297     0.330545    0.221731    0.0666998   0.0692539   0.0701707   0.160564    0.0891676   0.015807    0.0045998
1.12.2      2020.09.30 14:50    1.37531     0.330381    0.226012    0.0668063   0.0690574   0.0694727   0.160282    0.0929373   0.0176629   0.0019713








>
30
31
32
33
34
35
36
37
1.6.0       2020.01.03 20:22    1.38847     0.346214    0.240242    0.0709539   0.0737499   0.0748733   0.176477    0.0969171   0.0187857   0.0025143   (nouveau dictionnaire avec lemmes masculins)
1.9.0       2020.04.20 19:57    1.51183     0.369546    0.25681     0.0734314   0.0764396   0.0785668   0.183922    0.103674    0.0185812   0.002099    (NFC normalization)
1.9.2       2020.05.12 08:43    1.62465     0.398831    0.273012    0.0810811   0.080937    0.0845885   0.204133    0.114146    0.0212864   0.0029547
1.12.2      2020.09.09 13:34    1.50568     0.374504    0.233108    0.0798712   0.0804466   0.0769674   0.171519    0.0945132   0.0165344   0.0019474
1.12.2      2020.09.09 13:35    1.41094     0.359093    0.236443    0.06968     0.0734418   0.0738087   0.169371    0.0946279   0.0167106   0.0019773
1.12.2      2020.09.11 19:16    1.35297     0.330545    0.221731    0.0666998   0.0692539   0.0701707   0.160564    0.0891676   0.015807    0.0045998
1.12.2      2020.09.30 14:50    1.37531     0.330381    0.226012    0.0668063   0.0690574   0.0694727   0.160282    0.0929373   0.0176629   0.0019713
1.12.2      2020.09.30 17:01    1.37168     0.329009    0.248127    0.0670758   0.0701238   0.0910568   0.170556    0.093876    0.0168925   0.0020051

Modified graphspell-js/ibdawg.js from [fa91a09215] to [a4ceb45f56].

271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
        }
        return false;
    }

    isValid (sWord) {
        // checks if sWord is valid (different casing tested if the first letter is a capital)
        if (!sWord) {
            return null;
        }
        if (sWord.includes("'")) { // ugly hack
            sWord = sWord.replace("'", "’");
        }
        if (this.lookup(sWord)) {
            return true;
        }







|







271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
        }
        return false;
    }

    isValid (sWord) {
        // checks if sWord is valid (different casing tested if the first letter is a capital)
        if (!sWord) {
            return true;
        }
        if (sWord.includes("'")) { // ugly hack
            sWord = sWord.replace("'", "’");
        }
        if (this.lookup(sWord)) {
            return true;
        }

Modified graphspell-js/spellchecker.js from [a94325f4c6] to [1483d1a455].

10
11
12
13
14
15
16

17
18
19
20
21
22
23
/* jshint esversion:6, -W097 */
/* jslint esversion:6 */
/* global require, exports, console, IBDAWG, Tokenizer */

"use strict";

${map}



if (typeof(process) !== 'undefined') {
    var ibdawg = require("./ibdawg.js");
    var tokenizer = require("./tokenizer.js");
}








>







10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/* jshint esversion:6, -W097 */
/* jslint esversion:6 */
/* global require, exports, console, IBDAWG, Tokenizer */

"use strict";

${map}
${string}


if (typeof(process) !== 'undefined') {
    var ibdawg = require("./ibdawg.js");
    var tokenizer = require("./tokenizer.js");
}

225
226
227
228
229
230
231

232
233
234
235
236
237
238
        return aSpellErr;
    }

    // IBDAWG functions

    isValidToken (sToken) {
        // checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)

        if (this.oMainDic.isValidToken(sToken)) {
            return true;
        }
        if (this.bCommunityDic && this.oCommunityDic.isValidToken(sToken)) {
            return true;
        }
        if (this.bPersonalDic && this.oPersonalDic.isValidToken(sToken)) {







>







226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
        return aSpellErr;
    }

    // IBDAWG functions

    isValidToken (sToken) {
        // checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)
        sToken = sToken.gl_trim("_");
        if (this.oMainDic.isValidToken(sToken)) {
            return true;
        }
        if (this.bCommunityDic && this.oCommunityDic.isValidToken(sToken)) {
            return true;
        }
        if (this.bPersonalDic && this.oPersonalDic.isValidToken(sToken)) {
267
268
269
270
271
272
273

274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293

294
295
296
297
298
299
300
301
302
303
304

305
306
307
308
309
310
311
            return true;
        }
        return false;
    }

    getMorph (sWord) {
        // retrieves morphologies list, different casing allowed

        if (this.bStorage && this._dMorphologies.has(sWord)) {
            return this._dMorphologies.get(sWord);
        }
        let lMorph = this.oMainDic.getMorph(sWord);
        if (this.bCommunityDic) {
            lMorph.push(...this.oCommunityDic.getMorph(sWord));
        }
        if (this.bPersonalDic) {
            lMorph.push(...this.oPersonalDic.getMorph(sWord));
        }
        if (this.bStorage) {
            this._dMorphologies.set(sWord, lMorph);
            this._dLemmas.set(sWord, Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); }))));
            //console.log(sWord, this._dLemmas.get(sWord));
        }
        return lMorph;
    }

    getLemma (sWord) {
        // retrieves lemmas

        if (this.bStorage) {
            if (!this._dLemmas.has(sWord)) {
                this.getMorph(sWord);
            }
            return this._dLemmas.get(sWord);
        }
        return Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); })));
    }

    * suggest (sWord, nSuggLimit=10) {
        // generator: returns 1, 2 or 3 lists of suggestions

        if (this.lexicographer) {
            if (this.lexicographer.dSugg.has(sWord)) {
                yield this.lexicographer.dSugg.get(sWord).split("|");
            } else if (sWord.gl_isTitle() && this.lexicographer.dSugg.has(sWord.toLowerCase())) {
                let lRes = this.lexicographer.dSugg.get(sWord.toLowerCase()).split("|");
                yield lRes.map((sSugg) => { return sSugg.slice(0,1).toUpperCase() + sSugg.slice(1); });
            } else {







>




















>











>







269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
            return true;
        }
        return false;
    }

    getMorph (sWord) {
        // retrieves morphologies list, different casing allowed
        sWord = sWord.gl_trim("_");
        if (this.bStorage && this._dMorphologies.has(sWord)) {
            return this._dMorphologies.get(sWord);
        }
        let lMorph = this.oMainDic.getMorph(sWord);
        if (this.bCommunityDic) {
            lMorph.push(...this.oCommunityDic.getMorph(sWord));
        }
        if (this.bPersonalDic) {
            lMorph.push(...this.oPersonalDic.getMorph(sWord));
        }
        if (this.bStorage) {
            this._dMorphologies.set(sWord, lMorph);
            this._dLemmas.set(sWord, Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); }))));
            //console.log(sWord, this._dLemmas.get(sWord));
        }
        return lMorph;
    }

    getLemma (sWord) {
        // retrieves lemmas
        sWord = sWord.gl_trim("_");
        if (this.bStorage) {
            if (!this._dLemmas.has(sWord)) {
                this.getMorph(sWord);
            }
            return this._dLemmas.get(sWord);
        }
        return Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); })));
    }

    * suggest (sWord, nSuggLimit=10) {
        // generator: returns 1, 2 or 3 lists of suggestions
        sWord = sWord.gl_trim("_");
        if (this.lexicographer) {
            if (this.lexicographer.dSugg.has(sWord)) {
                yield this.lexicographer.dSugg.get(sWord).split("|");
            } else if (sWord.gl_isTitle() && this.lexicographer.dSugg.has(sWord.toLowerCase())) {
                let lRes = this.lexicographer.dSugg.get(sWord.toLowerCase()).split("|");
                yield lRes.map((sSugg) => { return sSugg.slice(0,1).toUpperCase() + sSugg.slice(1); });
            } else {

Modified graphspell/ibdawg.py from [d672255b46] to [43975eca5b].

274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
        if "." in sToken or "·" in sToken:
            return True
        return False

    def isValid (self, sWord):
        "checks if <sWord> is valid (different casing tested if the first letter is a capital)"
        if not sWord:
            return None
        if "'" in sWord: # ugly hack
            sWord = sWord.replace("'", "’")
        if self.lookup(sWord):
            return True
        if sWord[0:1].isupper():
            if len(sWord) > 1:
                if sWord.istitle():







|







274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
        if "." in sToken or "·" in sToken:
            return True
        return False

    def isValid (self, sWord):
        "checks if <sWord> is valid (different casing tested if the first letter is a capital)"
        if not sWord:
            return True
        if "'" in sWord: # ugly hack
            sWord = sWord.replace("'", "’")
        if self.lookup(sWord):
            return True
        if sWord[0:1].isupper():
            if len(sWord) > 1:
                if sWord.istitle():

Modified graphspell/spellchecker.py from [3d385eb49e] to [4dd679e9ae].

209
210
211
212
213
214
215

216
217
218
219
220
221
222
        return dWord


    # IBDAWG functions

    def isValidToken (self, sToken):
        "checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)"

        if self.oMainDic.isValidToken(sToken):
            return True
        if self.bCommunityDic and self.oCommunityDic.isValidToken(sToken):
            return True
        if self.bPersonalDic and self.oPersonalDic.isValidToken(sToken):
            return True
        return False







>







209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
        return dWord


    # IBDAWG functions

    def isValidToken (self, sToken):
        "checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)"
        sToken = sToken.strip("_")
        if self.oMainDic.isValidToken(sToken):
            return True
        if self.bCommunityDic and self.oCommunityDic.isValidToken(sToken):
            return True
        if self.bPersonalDic and self.oPersonalDic.isValidToken(sToken):
            return True
        return False
239
240
241
242
243
244
245

246
247
248
249
250
251
252
253
254
255
256
257
258
259

260
261
262
263
264
265
266
267

268
269
270
271
272
273
274
            return True
        if self.bPersonalDic and self.oPersonalDic.lookup(sWord):
            return True
        return False

    def getMorph (self, sWord):
        "retrieves morphologies list, different casing allowed"

        if self.bStorage and sWord in self._dMorphologies:
            return self._dMorphologies[sWord]
        lMorph = self.oMainDic.getMorph(sWord)
        if self.bCommunityDic:
            lMorph.extend(self.oCommunityDic.getMorph(sWord))
        if self.bPersonalDic:
            lMorph.extend(self.oPersonalDic.getMorph(sWord))
        if self.bStorage:
            self._dMorphologies[sWord] = lMorph
            self._dLemmas[sWord] = { s[1:s.find("/")]  for s in lMorph }
        return lMorph

    def getLemma (self, sWord):
        "retrieves lemmas"

        if self.bStorage:
            if sWord not in self._dLemmas:
                self.getMorph(sWord)
            return self._dLemmas[sWord]
        return { s[1:s.find("/")]  for s in self.getMorph(sWord) }

    def suggest (self, sWord, nSuggLimit=10):
        "generator: returns 1, 2 or 3 lists of suggestions"

        if self.lexicographer:
            if sWord in self.lexicographer.dSugg:
                yield self.lexicographer.dSugg[sWord].split("|")
            elif sWord.istitle() and sWord.lower() in self.lexicographer.dSugg:
                lRes = self.lexicographer.dSugg[sWord.lower()].split("|")
                yield list(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes))
            else:







>














>








>







240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
            return True
        if self.bPersonalDic and self.oPersonalDic.lookup(sWord):
            return True
        return False

    def getMorph (self, sWord):
        "retrieves morphologies list, different casing allowed"
        sWord = sWord.strip("_")
        if self.bStorage and sWord in self._dMorphologies:
            return self._dMorphologies[sWord]
        lMorph = self.oMainDic.getMorph(sWord)
        if self.bCommunityDic:
            lMorph.extend(self.oCommunityDic.getMorph(sWord))
        if self.bPersonalDic:
            lMorph.extend(self.oPersonalDic.getMorph(sWord))
        if self.bStorage:
            self._dMorphologies[sWord] = lMorph
            self._dLemmas[sWord] = { s[1:s.find("/")]  for s in lMorph }
        return lMorph

    def getLemma (self, sWord):
        "retrieves lemmas"
        sWord = sWord.strip("_")
        if self.bStorage:
            if sWord not in self._dLemmas:
                self.getMorph(sWord)
            return self._dLemmas[sWord]
        return { s[1:s.find("/")]  for s in self.getMorph(sWord) }

    def suggest (self, sWord, nSuggLimit=10):
        "generator: returns 1, 2 or 3 lists of suggestions"
        sWord = sWord.strip("_")
        if self.lexicographer:
            if sWord in self.lexicographer.dSugg:
                yield self.lexicographer.dSugg[sWord].split("|")
            elif sWord.istitle() and sWord.lower() in self.lexicographer.dSugg:
                lRes = self.lexicographer.dSugg[sWord.lower()].split("|")
                yield list(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes))
            else: