Grammalecte  Diff

Differences From Artifact [4b10b3e705]:

To Artifact [928aa819c5]:


190
191
192
193
194
195
196

197
198
199
200
201
202
203
204
        if nMinLen > 4  and  nCommon > i + 1  and  2 * nCommon >= nMinLen + i:
            fWeight += (1 - fWeight) * ((nCommon - i - 1) / (nLen1 * nLen2 - i*2 + 2))
    return fWeight


def distanceSift4 (s1, s2, nMaxOffset=5):
    "implementation of general Sift4."

    # https://siderite.blogspot.com/2014/11/super-fast-and-accurate-string-distance.html
    if not s1:
        return len(s2)
    if not s2:
        return len(s1)
    nLen1, nLen2 = len(s1), len(s2)
    i1, i2 = 0, 0   # Cursors for each string
    nLargestCS = 0  # Largest common substring







>
|







190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
        if nMinLen > 4  and  nCommon > i + 1  and  2 * nCommon >= nMinLen + i:
            fWeight += (1 - fWeight) * ((nCommon - i - 1) / (nLen1 * nLen2 - i*2 + 2))
    return fWeight


def distanceSift4 (s1, s2, nMaxOffset=5):
    "implementation of general Sift4."
    # faster than Damerau-Levenshtein and Jaro-Winkler
    # https://siderite.dev/blog/super-fast-and-accurate-string-distance.html
    if not s1:
        return len(s2)
    if not s2:
        return len(s1)
    nLen1, nLen2 = len(s1), len(s2)
    i1, i2 = 0, 0   # Cursors for each string
    nLargestCS = 0  # Largest common substring
251
252
253
254
255
256
257
258
259
260
261

262
263
264
265
266
267
268
            i1 = i2 = min(i1, i2)
    nLargestCS += nLocalCS
    return round(max(nLen1, nLen2) - nLargestCS + nTrans)


def showDistance (s1, s2):
    "display Damerau-Levenshtein distance and Sift4 distance between <s1> and <s2>"
    print("Jaro-Winkler: " + s1 + "/" + s2 + " = " + distanceJaroWinkler(s1, s2))
    print("Damerau-Levenshtein: " + s1 + "/" + s2 + " = " + distanceDamerauLevenshtein(s1, s2))
    print("Sift4:" + s1 + "/" + s2 + " = " + distanceSift4(s1, s2))





#### STEMMING OPERATIONS

## No stemming








<
|
|
|
>







252
253
254
255
256
257
258

259
260
261
262
263
264
265
266
267
268
269
            i1 = i2 = min(i1, i2)
    nLargestCS += nLocalCS
    return round(max(nLen1, nLen2) - nLargestCS + nTrans)


def showDistance (s1, s2):
    "display Damerau-Levenshtein distance and Sift4 distance between <s1> and <s2>"

    nDL = distanceDamerauLevenshtein(s1, s2)
    nS4 = distanceSift4(s1, s2)
    fJW = distanceJaroWinkler(s1, s2)
    print(s1, "≠", s2, "\tDL:", nDL, "\tS4:", nS4, "\tJW:", fJW)



#### STEMMING OPERATIONS

## No stemming