Overview
| Comment: | [graphspell] replace straight apostrophe in spellingNormalization() |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | graphspell |
| Files: | files | file ages | folders |
| SHA3-256: |
3f34a1e2e7a7fe0ecdde83b18be97ee6 |
| User & Date: | olr on 2020-09-30 16:10:44 |
| Other Links: | manifest | tags |
Context
|
2020-09-30
| ||
| 16:19 | [fr] update tests check-in: ec053526e5 user: olr tags: trunk, fr | |
| 16:10 | [graphspell] replace straight apostrophe in spellingNormalization() check-in: 3f34a1e2e7 user: olr tags: trunk, graphspell | |
| 16:06 | [graphspell] ignore underscore for spellchecking and morph check-in: c2a07ab8a2 user: olr tags: trunk, graphspell | |
Changes
Modified graphspell-js/ibdawg.js from [a4ceb45f56] to [5c5b2cad28].
| ︙ | ︙ | |||
273 274 275 276 277 278 279 |
}
isValid (sWord) {
// checks if sWord is valid (different casing tested if the first letter is a capital)
if (!sWord) {
return true;
}
| < < < | 273 274 275 276 277 278 279 280 281 282 283 284 285 286 |
}
isValid (sWord) {
// checks if sWord is valid (different casing tested if the first letter is a capital)
if (!sWord) {
return true;
}
if (this.lookup(sWord)) {
return true;
}
if (sWord.charAt(0).gl_isUpperCase()) {
if (sWord.length > 1) {
if (sWord.gl_isTitle()) {
return !!this.lookup(sWord.toLowerCase());
|
| ︙ | ︙ |
Modified graphspell-js/str_transform.js from [3bf8e3b480] to [a1596e1ca3].
| ︙ | ︙ | |||
22 23 24 25 26 27 28 |
for (let i=0; i <= sWord.length - n; i++) {
lNgrams.push(sWord.slice(i, i+n));
}
return lNgrams;
},
_xTransCharsForSpelling: new Map([
| | > | 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
for (let i=0; i <= sWord.length - n; i++) {
lNgrams.push(sWord.slice(i, i+n));
}
return lNgrams;
},
_xTransCharsForSpelling: new Map([
['ſ', 's'], ['ffi', 'ffi'], ['ffl', 'ffl'], ['ff', 'ff'], ['ſt', 'ft'], ['fi', 'fi'], ['fl', 'fl'], ['st', 'st'],
["'", '’']
]),
spellingNormalization: function (sWord) {
let sNewWord = "";
for (let c of sWord) {
sNewWord += this._xTransCharsForSpelling.gl_get(c, c);
}
|
| ︙ | ︙ |
Modified graphspell/ibdawg.py from [43975eca5b] to [cae8eca48c].
| ︙ | ︙ | |||
275 276 277 278 279 280 281 |
return True
return False
def isValid (self, sWord):
"checks if <sWord> is valid (different casing tested if the first letter is a capital)"
if not sWord:
return True
| < < | 275 276 277 278 279 280 281 282 283 284 285 286 287 288 |
return True
return False
def isValid (self, sWord):
"checks if <sWord> is valid (different casing tested if the first letter is a capital)"
if not sWord:
return True
if self.lookup(sWord):
return True
if sWord[0:1].isupper():
if len(sWord) > 1:
if sWord.istitle():
return self.lookup(sWord.lower())
if sWord.isupper():
|
| ︙ | ︙ |
Modified graphspell/str_transform.py from [98c57fa9ba] to [ee895b8347].
| ︙ | ︙ | |||
17 18 19 20 21 22 23 |
return [ sWord[i:i+n] for i in range(len(sWord)-n+1) ]
#### WORD NORMALIZATION
_xTransCharsForSpelling = str.maketrans({
| | > | 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
return [ sWord[i:i+n] for i in range(len(sWord)-n+1) ]
#### WORD NORMALIZATION
_xTransCharsForSpelling = str.maketrans({
'ſ': 's', 'ffi': 'ffi', 'ffl': 'ffl', 'ff': 'ff', 'ſt': 'ft', 'fi': 'fi', 'fl': 'fl', 'st': 'st',
"'": '’'
})
def spellingNormalization (sWord):
"nomalization NFC and removing ligatures"
return unicodedata.normalize("NFC", sWord.translate(_xTransCharsForSpelling))
|
| ︙ | ︙ |