Grammalecte  Diff

Differences From Artifact [e5dd8880c3]:

To Artifact [2ac4c0eb20]:


1
2
3


4
5
6
7
8
9
10
1
2
3
4
5
6
7
8
9
10
11
12



+
+







# list of similar chars
# useful for suggestion mechanism

import re


def distanceDamerauLevenshtein (s1, s2):
    "distance of Damerau-Levenshtein between <s1> and <s2>"
    # https://fr.wikipedia.org/wiki/Distance_de_Damerau-Levenshtein
    d = {}
    nLen1 = len(s1)
    nLen2 = len(s2)
296
297
298
299
300
301
302
303

304
305
306
307
308
309
310
311
312
313
314
315
















298
299
300
301
302
303
304

305
306
307
308
309
310
311
312
313
314



315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330







-
+









-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
    "on": ("ons", "ont"),
    "ON": ("ONS", "ONT"),
    "oi": ("ois", "oit", "oix"),
    "OI": ("OIS", "OIT", "OIX"),
}


# Préfixes
# Préfixes et suffixes

aPfx1 = frozenset([
    "anti", "archi", "contre", "hyper", "mé", "méta", "im", "in", "ir", "par", "proto",
    "pseudo", "pré", "re", "ré", "sans", "sous", "supra", "sur", "ultra"
])
aPfx2 = frozenset([
    "belgo", "franco", "génito", "gynéco", "médico", "russo"
])

aExcludedSfx = frozenset([
    "je", "tu", "il", "elle", "on", "t-il", "t-elle", "t-on", "nous", "vous", "ils", "elles"
])

_zMotAvecPronom = re.compile("^(?i)(\\w+)(-(?:t-|)(?:ils?|elles?|on|je|tu|nous|vous))$")

def cut (sWord):
    "returns a tuple of strings (prefix, trimed_word, suffix)"
    m = _zMotAvecPronom.search(sWord)
    if m:
        return ("", m.group(1), m.group(2))
    return ("", sWord, "")


# Other functions

def filterSugg (aSugg):
    "exclude suggestions"
    return filter(lambda sSugg: not sSugg.endswith(("è", "È")), aSugg)