Grammalecte  Diff

Differences From Artifact [a9b878f391]:

To Artifact [175c38852d]:


154
155
156
157
158
159
160

161
162
163
164
165
166
167
168

169
170
171
172
173
174
175
    "m'en": " (me) pronom personnel objet + (en) pronom adverbial",
    "t'en": " (te) pronom personnel objet + (en) pronom adverbial",
    "s'en": " (se) pronom personnel objet + (en) pronom adverbial",
}


class Lexicographe:


    def __init__ (self, oSpellChecker):
        self.oSpellChecker = oSpellChecker
        self._zElidedPrefix = re.compile("(?i)^([dljmtsncç]|quoiqu|lorsqu|jusqu|puisqu|qu)['’](.+)")
        self._zCompoundWord = re.compile("(?i)(\\w+)-((?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts][’'](?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous)$")
        self._zTag = re.compile("[:;/][\\w*][^:;/]*")

    def analyzeWord (self, sWord):

        try:
            if not sWord:
                return (None, None)
            if sWord.count("-") > 4:
                return (["élément complexe indéterminé"], None)
            if sWord.isdigit():
                return (["nombre"], None)







>








>







154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
    "m'en": " (me) pronom personnel objet + (en) pronom adverbial",
    "t'en": " (te) pronom personnel objet + (en) pronom adverbial",
    "s'en": " (se) pronom personnel objet + (en) pronom adverbial",
}


class Lexicographe:
    "Lexicographer - word analyzer"

    def __init__ (self, oSpellChecker):
        self.oSpellChecker = oSpellChecker
        self._zElidedPrefix = re.compile("(?i)^([dljmtsncç]|quoiqu|lorsqu|jusqu|puisqu|qu)['’](.+)")
        self._zCompoundWord = re.compile("(?i)(\\w+)-((?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts][’'](?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous)$")
        self._zTag = re.compile("[:;/][\\w*][^:;/]*")

    def analyzeWord (self, sWord):
        "returns a tuple (a list of morphologies, a set of verb at infinitive form)"
        try:
            if not sWord:
                return (None, None)
            if sWord.count("-") > 4:
                return (["élément complexe indéterminé"], None)
            if sWord.isdigit():
                return (["nombre"], None)
193
194
195
196
197
198
199
200
201
202
203
204
205
206

207
208
209
210
211
212
213
                aMorph.append( "{} : {}".format(sWord, self.formatTags(lMorph[0])) )
            else:
                aMorph.append( "{} :  inconnu du dictionnaire".format(sWord) )
            # suffixe d’un mot composé
            if m2:
                aMorph.append( "-{} : {}".format(m2.group(2), self._formatSuffix(m2.group(2).lower())) )
            # Verbes
            aVerb = set([ s[1:s.find(" ")]  for s in lMorph  if ":V" in s ])
            return (aMorph, aVerb)
        except:
            traceback.print_exc()
            return (["#erreur"], None)

    def formatTags (self, sTags):

        sRes = ""
        sTags = re.sub("(?<=V[1-3])[itpqnmr_eaxz]+", "", sTags)
        sTags = re.sub("(?<=V0[ea])[itpqnmr_eaxz]+", "", sTags)
        for m in self._zTag.finditer(sTags):
            sRes += _dTAGS.get(m.group(0), " [{}]".format(m.group(0)))[0]
        if sRes.startswith(" verbe") and not sRes.endswith("infinitif"):
            sRes += " [{}]".format(sTags[1:sTags.find(" ")])







|






>







195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
                aMorph.append( "{} : {}".format(sWord, self.formatTags(lMorph[0])) )
            else:
                aMorph.append( "{} :  inconnu du dictionnaire".format(sWord) )
            # suffixe d’un mot composé
            if m2:
                aMorph.append( "-{} : {}".format(m2.group(2), self._formatSuffix(m2.group(2).lower())) )
            # Verbes
            aVerb = set([ s[1:s.find("/")]  for s in lMorph  if ":V" in s ])
            return (aMorph, aVerb)
        except:
            traceback.print_exc()
            return (["#erreur"], None)

    def formatTags (self, sTags):
        "returns string: readable tags"
        sRes = ""
        sTags = re.sub("(?<=V[1-3])[itpqnmr_eaxz]+", "", sTags)
        sTags = re.sub("(?<=V0[ea])[itpqnmr_eaxz]+", "", sTags)
        for m in self._zTag.finditer(sTags):
            sRes += _dTAGS.get(m.group(0), " [{}]".format(m.group(0)))[0]
        if sRes.startswith(" verbe") and not sRes.endswith("infinitif"):
            sRes += " [{}]".format(sTags[1:sTags.find(" ")])