17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
|
return [ sWord[i:i+n] for i in range(len(sWord)-n+1) ]
#### WORD NORMALIZATION
_xTransCharsForSpelling = str.maketrans({
'ſ': 's', 'ffi': 'ffi', 'ffl': 'ffl', 'ff': 'ff', 'ſt': 'ft', 'fi': 'fi', 'fl': 'fl', 'st': 'st'
})
def spellingNormalization (sWord):
"nomalization NFC and removing ligatures"
return unicodedata.normalize("NFC", sWord.translate(_xTransCharsForSpelling))
|
|
>
|
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
return [ sWord[i:i+n] for i in range(len(sWord)-n+1) ]
#### WORD NORMALIZATION
_xTransCharsForSpelling = str.maketrans({
'ſ': 's', 'ffi': 'ffi', 'ffl': 'ffl', 'ff': 'ff', 'ſt': 'ft', 'fi': 'fi', 'fl': 'fl', 'st': 'st',
"'": '’'
})
def spellingNormalization (sWord):
"nomalization NFC and removing ligatures"
return unicodedata.normalize("NFC", sWord.translate(_xTransCharsForSpelling))
|