Grammalecte  Diff

Differences From Artifact [d4b9b2fec8]:

To Artifact [8c24055087]:


1
2
3
4
5
6
7
8
9
10

11
12
13
14
15
16
17
"""
Spellchecker.
Useful to check several dictionaries at once.

To avoid iterating over a pile of dictionaries, it is assumed that 3 are enough:
- the main dictionary, bundled with the package
- the community dictionary, added by an organization
- the personal dictionary, created by the user for its own convenience
"""


import importlib
import traceback

from . import ibdawg
from . import tokenizer












>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
"""
Spellchecker.
Useful to check several dictionaries at once.

To avoid iterating over a pile of dictionaries, it is assumed that 3 are enough:
- the main dictionary, bundled with the package
- the community dictionary, added by an organization
- the personal dictionary, created by the user for its own convenience
"""

import re
import importlib
import traceback

from . import ibdawg
from . import tokenizer


252
253
254
255
256
257
258


















259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276


277
278
279
280
281
282
283
        if self.bPersonalDic:
            lMorph.extend(self.oPersonalDic.getMorph(sWord))
        if self.bStorage:
            self._dMorphologies[sWord] = lMorph
            self._dLemmas[sWord] = { s[1:s.find("/")]  for s in lMorph }
        return lMorph



















    def getLemma (self, sWord):
        "retrieves lemmas"
        if self.bStorage:
            if sWord not in self._dLemmas:
                self.getMorph(sWord)
            return self._dLemmas[sWord]
        return { s[1:s.find("/")]  for s in self.getMorph(sWord) }

    def suggest (self, sWord, nSuggLimit=10):
        "generator: returns 1, 2 or 3 lists of suggestions"
        if self.lexicographer:
            if sWord in self.lexicographer.dSugg:
                yield self.lexicographer.dSugg[sWord].split("|")
            elif sWord.istitle() and sWord.lower() in self.lexicographer.dSugg:
                lRes = self.lexicographer.dSugg[sWord.lower()].split("|")
                yield list(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes))
            else:
                yield self.oMainDic.suggest(sWord, nSuggLimit, True)


        else:
            yield self.oMainDic.suggest(sWord, nSuggLimit, True)
        if self.bCommunityDic:
            yield self.oCommunityDic.suggest(sWord, (nSuggLimit//2)+1)
        if self.bPersonalDic:
            yield self.oPersonalDic.suggest(sWord, (nSuggLimit//2)+1)








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>














|
|

|
>
>







253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
        if self.bPersonalDic:
            lMorph.extend(self.oPersonalDic.getMorph(sWord))
        if self.bStorage:
            self._dMorphologies[sWord] = lMorph
            self._dLemmas[sWord] = { s[1:s.find("/")]  for s in lMorph }
        return lMorph

    def morph (self, sWord, sPattern, sNegPattern=""):
        "analyse a word, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies"
        lMorph = self.getMorph(sWord)
        if not lMorph:
            return False
        # check negative condition
        if sNegPattern:
            if sNegPattern == "*":
                # all morph must match sPattern
                zPattern = re.compile(sPattern)
                return all(zPattern.search(sMorph)  for sMorph in lMorph)
            zNegPattern = re.compile(sNegPattern)
            if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
                return False
        # search sPattern
        zPattern = re.compile(sPattern)
        return any(zPattern.search(sMorph)  for sMorph in lMorph)

    def getLemma (self, sWord):
        "retrieves lemmas"
        if self.bStorage:
            if sWord not in self._dLemmas:
                self.getMorph(sWord)
            return self._dLemmas[sWord]
        return { s[1:s.find("/")]  for s in self.getMorph(sWord) }

    def suggest (self, sWord, nSuggLimit=10):
        "generator: returns 1, 2 or 3 lists of suggestions"
        if self.lexicographer:
            if sWord in self.lexicographer.dSugg:
                yield self.lexicographer.dSugg[sWord].split("|")
            elif sWord.istitle() and sWord.lower() in self.lexicographer.dSugg:
                lSuggs = self.lexicographer.dSugg[sWord.lower()].split("|")
                yield list(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lSuggs))
            else:
                lSuggs = self.oMainDic.suggest(sWord, nSuggLimit, True)
                lSuggs = [ sSugg  for sSugg in lSuggs  if self.lexicographer.isValidSugg(sSugg, self) ]
                yield lSuggs
        else:
            yield self.oMainDic.suggest(sWord, nSuggLimit, True)
        if self.bCommunityDic:
            yield self.oCommunityDic.suggest(sWord, (nSuggLimit//2)+1)
        if self.bPersonalDic:
            yield self.oPersonalDic.suggest(sWord, (nSuggLimit//2)+1)