1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
"""
Operations on strings:
- calculate distance between two strings
- transform strings with transformation codes
"""
import unicodedata
import re
from .char_player import distanceBetweenChars, dDistanceBetweenChars
#### N-GRAMS
def getNgrams (sWord, n=2):
"return a list of Ngrams strings"
return [ sWord[i:i+n] for i in range(len(sWord)-n+1) ]
|
>
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
"""
Operations on strings:
- calculate distance between two strings
- transform strings with transformation codes
"""
import unicodedata
import re
from .char_player import distanceBetweenChars, dDistanceBetweenChars
from .echo import echo
#### N-GRAMS
def getNgrams (sWord, n=2):
"return a list of Ngrams strings"
return [ sWord[i:i+n] for i in range(len(sWord)-n+1) ]
|
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
|
def showDistance (s1, s2):
"display Damerau-Levenshtein distance and Sift4 distance between <s1> and <s2>"
nDL = distanceDamerauLevenshtein(s1, s2)
nS4 = distanceSift4(s1, s2)
fJW = distanceJaroWinkler(s1, s2)
print(s1, "≠", s2, "\tDL:", nDL, "\tS4:", nS4, "\tJW:", fJW)
#### STEMMING OPERATIONS
## No stemming
|
|
|
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
|
def showDistance (s1, s2):
"display Damerau-Levenshtein distance and Sift4 distance between <s1> and <s2>"
nDL = distanceDamerauLevenshtein(s1, s2)
nS4 = distanceSift4(s1, s2)
fJW = distanceJaroWinkler(s1, s2)
echo(f"{s1:22} ≠ {s2:22} \tDL: {nDL}\tS4: {nS4}\tJW: {fJW}")
#### STEMMING OPERATIONS
## No stemming
|