Overview
Comment: | [graphspell] ignore underscore for spellchecking and morph |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | graphspell |
Files: | files | file ages | folders |
SHA3-256: |
c2a07ab8a257bff5f17d24ef9e3292df |
User & Date: | olr on 2020-09-30 16:06:29 |
Other Links: | manifest | tags |
Context
2020-09-30
| ||
16:10 | [graphspell] replace straight apostrophe in spellingNormalization() check-in: 3f34a1e2e7 user: olr tags: trunk, graphspell | |
16:06 | [graphspell] ignore underscore for spellchecking and morph check-in: c2a07ab8a2 user: olr tags: trunk, graphspell | |
12:52 | [build][fr] fix perf_memo name check-in: fbb6da3fff user: olr tags: trunk, fr, build | |
Changes
Modified gc_lang/fr/perf_memo.txt from [4773fc39c6] to [e113d352f1].
︙ | ︙ | |||
30 31 32 33 34 35 36 | 1.6.0 2020.01.03 20:22 1.38847 0.346214 0.240242 0.0709539 0.0737499 0.0748733 0.176477 0.0969171 0.0187857 0.0025143 (nouveau dictionnaire avec lemmes masculins) 1.9.0 2020.04.20 19:57 1.51183 0.369546 0.25681 0.0734314 0.0764396 0.0785668 0.183922 0.103674 0.0185812 0.002099 (NFC normalization) 1.9.2 2020.05.12 08:43 1.62465 0.398831 0.273012 0.0810811 0.080937 0.0845885 0.204133 0.114146 0.0212864 0.0029547 1.12.2 2020.09.09 13:34 1.50568 0.374504 0.233108 0.0798712 0.0804466 0.0769674 0.171519 0.0945132 0.0165344 0.0019474 1.12.2 2020.09.09 13:35 1.41094 0.359093 0.236443 0.06968 0.0734418 0.0738087 0.169371 0.0946279 0.0167106 0.0019773 1.12.2 2020.09.11 19:16 1.35297 0.330545 0.221731 0.0666998 0.0692539 0.0701707 0.160564 0.0891676 0.015807 0.0045998 1.12.2 2020.09.30 14:50 1.37531 0.330381 0.226012 0.0668063 0.0690574 0.0694727 0.160282 0.0929373 0.0176629 0.0019713 | > | 30 31 32 33 34 35 36 37 | 1.6.0 2020.01.03 20:22 1.38847 0.346214 0.240242 0.0709539 0.0737499 0.0748733 0.176477 0.0969171 0.0187857 0.0025143 (nouveau dictionnaire avec lemmes masculins) 1.9.0 2020.04.20 19:57 1.51183 0.369546 0.25681 0.0734314 0.0764396 0.0785668 0.183922 0.103674 0.0185812 0.002099 (NFC normalization) 1.9.2 2020.05.12 08:43 1.62465 0.398831 0.273012 0.0810811 0.080937 0.0845885 0.204133 0.114146 0.0212864 0.0029547 1.12.2 2020.09.09 13:34 1.50568 0.374504 0.233108 0.0798712 0.0804466 0.0769674 0.171519 0.0945132 0.0165344 0.0019474 1.12.2 2020.09.09 13:35 1.41094 0.359093 0.236443 0.06968 0.0734418 0.0738087 0.169371 0.0946279 0.0167106 0.0019773 1.12.2 2020.09.11 19:16 1.35297 0.330545 0.221731 0.0666998 0.0692539 0.0701707 0.160564 0.0891676 0.015807 0.0045998 1.12.2 2020.09.30 14:50 1.37531 0.330381 0.226012 0.0668063 0.0690574 0.0694727 0.160282 0.0929373 0.0176629 0.0019713 1.12.2 2020.09.30 17:01 1.37168 0.329009 0.248127 0.0670758 0.0701238 0.0910568 0.170556 0.093876 0.0168925 0.0020051 |
Modified graphspell-js/ibdawg.js from [fa91a09215] to [a4ceb45f56].
︙ | ︙ | |||
271 272 273 274 275 276 277 | } return false; } isValid (sWord) { // checks if sWord is valid (different casing tested if the first letter is a capital) if (!sWord) { | | | 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 | } return false; } isValid (sWord) { // checks if sWord is valid (different casing tested if the first letter is a capital) if (!sWord) { return true; } if (sWord.includes("'")) { // ugly hack sWord = sWord.replace("'", "’"); } if (this.lookup(sWord)) { return true; } |
︙ | ︙ |
Modified graphspell-js/spellchecker.js from [a94325f4c6] to [1483d1a455].
︙ | ︙ | |||
10 11 12 13 14 15 16 17 18 19 20 21 22 23 | /* jshint esversion:6, -W097 */ /* jslint esversion:6 */ /* global require, exports, console, IBDAWG, Tokenizer */ "use strict"; ${map} if (typeof(process) !== 'undefined') { var ibdawg = require("./ibdawg.js"); var tokenizer = require("./tokenizer.js"); } | > | 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | /* jshint esversion:6, -W097 */ /* jslint esversion:6 */ /* global require, exports, console, IBDAWG, Tokenizer */ "use strict"; ${map} ${string} if (typeof(process) !== 'undefined') { var ibdawg = require("./ibdawg.js"); var tokenizer = require("./tokenizer.js"); } |
︙ | ︙ | |||
225 226 227 228 229 230 231 232 233 234 235 236 237 238 | return aSpellErr; } // IBDAWG functions isValidToken (sToken) { // checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked) if (this.oMainDic.isValidToken(sToken)) { return true; } if (this.bCommunityDic && this.oCommunityDic.isValidToken(sToken)) { return true; } if (this.bPersonalDic && this.oPersonalDic.isValidToken(sToken)) { | > | 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 | return aSpellErr; } // IBDAWG functions isValidToken (sToken) { // checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked) sToken = sToken.gl_trim("_"); if (this.oMainDic.isValidToken(sToken)) { return true; } if (this.bCommunityDic && this.oCommunityDic.isValidToken(sToken)) { return true; } if (this.bPersonalDic && this.oPersonalDic.isValidToken(sToken)) { |
︙ | ︙ | |||
267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 | return true; } return false; } getMorph (sWord) { // retrieves morphologies list, different casing allowed if (this.bStorage && this._dMorphologies.has(sWord)) { return this._dMorphologies.get(sWord); } let lMorph = this.oMainDic.getMorph(sWord); if (this.bCommunityDic) { lMorph.push(...this.oCommunityDic.getMorph(sWord)); } if (this.bPersonalDic) { lMorph.push(...this.oPersonalDic.getMorph(sWord)); } if (this.bStorage) { this._dMorphologies.set(sWord, lMorph); this._dLemmas.set(sWord, Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); })))); //console.log(sWord, this._dLemmas.get(sWord)); } return lMorph; } getLemma (sWord) { // retrieves lemmas if (this.bStorage) { if (!this._dLemmas.has(sWord)) { this.getMorph(sWord); } return this._dLemmas.get(sWord); } return Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); }))); } * suggest (sWord, nSuggLimit=10) { // generator: returns 1, 2 or 3 lists of suggestions if (this.lexicographer) { if (this.lexicographer.dSugg.has(sWord)) { yield this.lexicographer.dSugg.get(sWord).split("|"); } else if (sWord.gl_isTitle() && this.lexicographer.dSugg.has(sWord.toLowerCase())) { let lRes = this.lexicographer.dSugg.get(sWord.toLowerCase()).split("|"); yield lRes.map((sSugg) => { return sSugg.slice(0,1).toUpperCase() + sSugg.slice(1); }); } else { | > > > | 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 | return true; } return false; } getMorph (sWord) { // retrieves morphologies list, different casing allowed sWord = sWord.gl_trim("_"); if (this.bStorage && this._dMorphologies.has(sWord)) { return this._dMorphologies.get(sWord); } let lMorph = this.oMainDic.getMorph(sWord); if (this.bCommunityDic) { lMorph.push(...this.oCommunityDic.getMorph(sWord)); } if (this.bPersonalDic) { lMorph.push(...this.oPersonalDic.getMorph(sWord)); } if (this.bStorage) { this._dMorphologies.set(sWord, lMorph); this._dLemmas.set(sWord, Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); })))); //console.log(sWord, this._dLemmas.get(sWord)); } return lMorph; } getLemma (sWord) { // retrieves lemmas sWord = sWord.gl_trim("_"); if (this.bStorage) { if (!this._dLemmas.has(sWord)) { this.getMorph(sWord); } return this._dLemmas.get(sWord); } return Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); }))); } * suggest (sWord, nSuggLimit=10) { // generator: returns 1, 2 or 3 lists of suggestions sWord = sWord.gl_trim("_"); if (this.lexicographer) { if (this.lexicographer.dSugg.has(sWord)) { yield this.lexicographer.dSugg.get(sWord).split("|"); } else if (sWord.gl_isTitle() && this.lexicographer.dSugg.has(sWord.toLowerCase())) { let lRes = this.lexicographer.dSugg.get(sWord.toLowerCase()).split("|"); yield lRes.map((sSugg) => { return sSugg.slice(0,1).toUpperCase() + sSugg.slice(1); }); } else { |
︙ | ︙ |
Modified graphspell/ibdawg.py from [d672255b46] to [43975eca5b].
︙ | ︙ | |||
274 275 276 277 278 279 280 | if "." in sToken or "·" in sToken: return True return False def isValid (self, sWord): "checks if <sWord> is valid (different casing tested if the first letter is a capital)" if not sWord: | | | 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 | if "." in sToken or "·" in sToken: return True return False def isValid (self, sWord): "checks if <sWord> is valid (different casing tested if the first letter is a capital)" if not sWord: return True if "'" in sWord: # ugly hack sWord = sWord.replace("'", "’") if self.lookup(sWord): return True if sWord[0:1].isupper(): if len(sWord) > 1: if sWord.istitle(): |
︙ | ︙ |
Modified graphspell/spellchecker.py from [3d385eb49e] to [4dd679e9ae].
︙ | ︙ | |||
209 210 211 212 213 214 215 216 217 218 219 220 221 222 | return dWord # IBDAWG functions def isValidToken (self, sToken): "checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)" if self.oMainDic.isValidToken(sToken): return True if self.bCommunityDic and self.oCommunityDic.isValidToken(sToken): return True if self.bPersonalDic and self.oPersonalDic.isValidToken(sToken): return True return False | > | 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 | return dWord # IBDAWG functions def isValidToken (self, sToken): "checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)" sToken = sToken.strip("_") if self.oMainDic.isValidToken(sToken): return True if self.bCommunityDic and self.oCommunityDic.isValidToken(sToken): return True if self.bPersonalDic and self.oPersonalDic.isValidToken(sToken): return True return False |
︙ | ︙ | |||
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 | return True if self.bPersonalDic and self.oPersonalDic.lookup(sWord): return True return False def getMorph (self, sWord): "retrieves morphologies list, different casing allowed" if self.bStorage and sWord in self._dMorphologies: return self._dMorphologies[sWord] lMorph = self.oMainDic.getMorph(sWord) if self.bCommunityDic: lMorph.extend(self.oCommunityDic.getMorph(sWord)) if self.bPersonalDic: lMorph.extend(self.oPersonalDic.getMorph(sWord)) if self.bStorage: self._dMorphologies[sWord] = lMorph self._dLemmas[sWord] = { s[1:s.find("/")] for s in lMorph } return lMorph def getLemma (self, sWord): "retrieves lemmas" if self.bStorage: if sWord not in self._dLemmas: self.getMorph(sWord) return self._dLemmas[sWord] return { s[1:s.find("/")] for s in self.getMorph(sWord) } def suggest (self, sWord, nSuggLimit=10): "generator: returns 1, 2 or 3 lists of suggestions" if self.lexicographer: if sWord in self.lexicographer.dSugg: yield self.lexicographer.dSugg[sWord].split("|") elif sWord.istitle() and sWord.lower() in self.lexicographer.dSugg: lRes = self.lexicographer.dSugg[sWord.lower()].split("|") yield list(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes)) else: | > > > | 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 | return True if self.bPersonalDic and self.oPersonalDic.lookup(sWord): return True return False def getMorph (self, sWord): "retrieves morphologies list, different casing allowed" sWord = sWord.strip("_") if self.bStorage and sWord in self._dMorphologies: return self._dMorphologies[sWord] lMorph = self.oMainDic.getMorph(sWord) if self.bCommunityDic: lMorph.extend(self.oCommunityDic.getMorph(sWord)) if self.bPersonalDic: lMorph.extend(self.oPersonalDic.getMorph(sWord)) if self.bStorage: self._dMorphologies[sWord] = lMorph self._dLemmas[sWord] = { s[1:s.find("/")] for s in lMorph } return lMorph def getLemma (self, sWord): "retrieves lemmas" sWord = sWord.strip("_") if self.bStorage: if sWord not in self._dLemmas: self.getMorph(sWord) return self._dLemmas[sWord] return { s[1:s.find("/")] for s in self.getMorph(sWord) } def suggest (self, sWord, nSuggLimit=10): "generator: returns 1, 2 or 3 lists of suggestions" sWord = sWord.strip("_") if self.lexicographer: if sWord in self.lexicographer.dSugg: yield self.lexicographer.dSugg[sWord].split("|") elif sWord.istitle() and sWord.lower() in self.lexicographer.dSugg: lRes = self.lexicographer.dSugg[sWord.lower()].split("|") yield list(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes)) else: |
︙ | ︙ |