| 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 | 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
 | 
+
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
 | 
"""
Operations on strings:
- calculate distance between two strings
- transform strings with transformation codes
"""
import unicodedata
from .char_player import distanceBetweenChars
#### N#### N-GRAMS
def getNgrams (sWord, n=2):
    "return a list of Ngrams strings"
    return [ sWord[i:i+n]  for i in range(len(sWord)-n+1) ]
#### WORD NORMALIZATION
_xTransCharsForSpelling = str.maketrans({
    'ſ': 's',  'ffi': 'ffi',  'ffl': 'ffl',  'ff': 'ff',  'ſt': 'ft',  'fi': 'fi',  'fl': 'fl',  'st': 'st'
})
def spellingNormalization (sWord):
    "nomalization NFC and removing ligatures"
    return unicodedata.normalize("NFC", sWord.translate(_xTransCharsForSpelling))
#### DISTANCE CALCULATIONS
def longestCommonSubstring (s1, s2):
    "longest common substring"
    # http://en.wikipedia.org/wiki/Longest_common_substring_problemgrams |