1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
|
"""
Operations on strings:
- calculate distance between two strings
- transform strings with transformation codes
"""
from .char_player import distanceBetweenChars
#### Ngrams
def getNgrams (sWord, n=2):
"return a list of Ngrams strings"
return [ sWord[i:i+n] for i in range(len(sWord)-n+1) ]
#### DISTANCE CALCULATIONS
def longestCommonSubstring (s1, s2):
"longest common substring"
# http://en.wikipedia.org/wiki/Longest_common_substring_problem
|
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
|
"""
Operations on strings:
- calculate distance between two strings
- transform strings with transformation codes
"""
import unicodedata
from .char_player import distanceBetweenChars
#### N-GRAMS
def getNgrams (sWord, n=2):
"return a list of Ngrams strings"
return [ sWord[i:i+n] for i in range(len(sWord)-n+1) ]
#### WORD NORMALIZATION
_xTransCharsForSpelling = str.maketrans({
'ſ': 's', 'ffi': 'ffi', 'ffl': 'ffl', 'ff': 'ff', 'ſt': 'ft', 'fi': 'fi', 'fl': 'fl', 'st': 'st'
})
def spellingNormalization (sWord):
"nomalization NFC and removing ligatures"
return unicodedata.normalize("NFC", sWord.translate(_xTransCharsForSpelling))
#### DISTANCE CALCULATIONS
def longestCommonSubstring (s1, s2):
"longest common substring"
# http://en.wikipedia.org/wiki/Longest_common_substring_problem
|