154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
|
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
|
+
+
|
"m'en": " (me) pronom personnel objet + (en) pronom adverbial",
"t'en": " (te) pronom personnel objet + (en) pronom adverbial",
"s'en": " (se) pronom personnel objet + (en) pronom adverbial",
}
class Lexicographe:
"Lexicographer - word analyzer"
def __init__ (self, oSpellChecker):
self.oSpellChecker = oSpellChecker
self._zElidedPrefix = re.compile("(?i)^([dljmtsncç]|quoiqu|lorsqu|jusqu|puisqu|qu)['’](.+)")
self._zCompoundWord = re.compile("(?i)(\\w+)-((?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts][’'](?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous)$")
self._zTag = re.compile("[:;/][\\w*][^:;/]*")
def analyzeWord (self, sWord):
"returns a tuple (a list of morphologies, a set of verb at infinitive form)"
try:
if not sWord:
return (None, None)
if sWord.count("-") > 4:
return (["élément complexe indéterminé"], None)
if sWord.isdigit():
return (["nombre"], None)
|
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
|
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
|
-
+
+
|
aMorph.append( "{} : {}".format(sWord, self.formatTags(lMorph[0])) )
else:
aMorph.append( "{} : inconnu du dictionnaire".format(sWord) )
# suffixe d’un mot composé
if m2:
aMorph.append( "-{} : {}".format(m2.group(2), self._formatSuffix(m2.group(2).lower())) )
# Verbes
aVerb = set([ s[1:s.find(" ")] for s in lMorph if ":V" in s ])
aVerb = set([ s[1:s.find("/")] for s in lMorph if ":V" in s ])
return (aMorph, aVerb)
except:
traceback.print_exc()
return (["#erreur"], None)
def formatTags (self, sTags):
"returns string: readable tags"
sRes = ""
sTags = re.sub("(?<=V[1-3])[itpqnmr_eaxz]+", "", sTags)
sTags = re.sub("(?<=V0[ea])[itpqnmr_eaxz]+", "", sTags)
for m in self._zTag.finditer(sTags):
sRes += _dTAGS.get(m.group(0), " [{}]".format(m.group(0)))[0]
if sRes.startswith(" verbe") and not sRes.endswith("infinitif"):
sRes += " [{}]".format(sTags[1:sTags.find(" ")])
|