1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
|
"""
Lexicographer for the French language
"""
# Note:
# This mode must contains at least:
# <dSugg> : a dictionary for default suggestions.
# <bLexicographer> : a boolean False
# if the boolean is True, 3 functions are required:
# split(sWord) -> returns a list of string (that will be analyzed)
# analyze(sWord) -> returns a string with the meaning of word
# formatTags(sTags) -> returns a string with the meaning of tags
import re
#### Suggestions
dSugg = {
|
|
>
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
|
"""
Lexicographer for the French language
"""
# Note:
# This mode must contains at least:
# <dSugg> : a dictionary for default suggestions.
# <bLexicographer> : a boolean False
# if the boolean is True, 4 functions are required:
# split(sWord) -> returns a list of string (that will be analyzed)
# analyze(sWord) -> returns a string with the meaning of word
# formatTags(sTags) -> returns a string with the meaning of tags
# filterSugg(aWord) -> returns a filtered list of suggestions
import re
#### Suggestions
dSugg = {
|
132
133
134
135
136
137
138
139
140
141
142
143
144
145
|
"XXVIème": "XXVIᵉ",
"XXVIIème": "XXVIIᵉ",
"XXVIIIème": "XXVIIIᵉ",
"XXIXème": "XXIXᵉ",
"XXXème": "XXXᵉ"
}
#### Lexicographer
bLexicographer = True
_dTAGS = {
':N': (" nom,", "Nom"),
|
>
>
>
>
>
>
>
>
>
>
>
|
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
|
"XXVIème": "XXVIᵉ",
"XXVIIème": "XXVIIᵉ",
"XXVIIIème": "XXVIIIᵉ",
"XXIXème": "XXIXᵉ",
"XXXème": "XXXᵉ"
}
# Préfixes et suffixes
aPfx1 = frozenset([
"anti", "archi", "contre", "hyper", "mé", "méta", "im", "in", "ir", "par", "proto",
"pseudo", "pré", "re", "ré", "sans", "sous", "supra", "sur", "ultra"
])
aPfx2 = frozenset([
"belgo", "franco", "génito", "gynéco", "médico", "russo"
])
#### Lexicographer
bLexicographer = True
_dTAGS = {
':N': (" nom,", "Nom"),
|
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
|
'-en': " pronom adverbial",
"-m’en": " (me) pronom personnel objet + (en) pronom adverbial",
"-t’en": " (te) pronom personnel objet + (en) pronom adverbial",
"-s’en": " (se) pronom personnel objet + (en) pronom adverbial",
}
_zElidedPrefix = re.compile("(?i)^((?:[dljmtsncç]|quoiqu|lorsqu|jusqu|puisqu|qu)’)(.+)")
_zCompoundWord = re.compile("(?i)(\\w+)(-(?:(?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts]’(?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous))$")
_zTag = re.compile("[:;/][\\w*][^:;/]*")
def split (sWord):
"split word in 3 parts: prefix, root, suffix"
sWord = sWord.replace("'", "’")
sPrefix = ""
sSuffix = ""
# préfixe élidé
m = _zElidedPrefix.match(sWord)
if m:
sPrefix = m.group(1)
sWord = m.group(2)
# mots composés
m = _zCompoundWord.match(sWord)
if m:
sWord = m.group(1)
sSuffix = m.group(2)
return sPrefix, sWord, sSuffix
|
|
|
<
|
|
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
|
'-en': " pronom adverbial",
"-m’en": " (me) pronom personnel objet + (en) pronom adverbial",
"-t’en": " (te) pronom personnel objet + (en) pronom adverbial",
"-s’en": " (se) pronom personnel objet + (en) pronom adverbial",
}
_zElidedPrefix = re.compile("(?i)^([ldmtsnjcç]|lorsqu|presqu|jusqu|puisqu|quoiqu|quelqu|qu)[’'‘`ʼ]([\\w-]+)")
_zCompoundWord = re.compile("(?i)(\\w+)(-(?:(?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts]’(?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous|ce))$")
_zTag = re.compile("[:;/][\\w*][^:;/]*")
def split (sWord):
"split word in 3 parts: prefix, root, suffix"
sPrefix = ""
sSuffix = ""
# préfixe élidé
m = _zElidedPrefix.match(sWord)
if m:
sPrefix = m.group(1) + "’"
sWord = m.group(2)
# mots composés
m = _zCompoundWord.match(sWord)
if m:
sWord = m.group(1)
sSuffix = m.group(2)
return sPrefix, sWord, sSuffix
|
352
353
354
355
356
357
358
|
sTags = re.sub("(?<=V[1-3])[itpqnmr_eaxz]+", "", sTags)
sTags = re.sub("(?<=V0[ea])[itpqnmr_eaxz]+", "", sTags)
for m in _zTag.finditer(sTags):
sRes += _dTAGS.get(m.group(0), " [{}]".format(m.group(0)))[0]
if sRes.startswith(" verbe") and not sRes.endswith("infinitif"):
sRes += " [{}]".format(sTags[1:sTags.find("/")])
return sRes.rstrip(",")
|
>
>
>
>
>
>
>
|
363
364
365
366
367
368
369
370
371
372
373
374
375
376
|
sTags = re.sub("(?<=V[1-3])[itpqnmr_eaxz]+", "", sTags)
sTags = re.sub("(?<=V0[ea])[itpqnmr_eaxz]+", "", sTags)
for m in _zTag.finditer(sTags):
sRes += _dTAGS.get(m.group(0), " [{}]".format(m.group(0)))[0]
if sRes.startswith(" verbe") and not sRes.endswith("infinitif"):
sRes += " [{}]".format(sTags[1:sTags.find("/")])
return sRes.rstrip(",")
# Other functions
def filterSugg (aSugg):
"exclude suggestions"
return filter(lambda sSugg: not sSugg.endswith(("è", "È")), aSugg)
|