Grammalecte  Diff

Differences From Artifact [638f8d8cdf]:

To Artifact [dbd02131cc]:


1
2
3
4
5
6
7
8
9
10
11
12
13

14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

32
33
34
35
36
37
38
39
40
41
42
43
44
45



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60






























61
62
63
64
65
66
67
# Spellchecker
# Wrapper for the IBDAWG class.
# Useful to check several dictionaries at once.

# To avoid iterating over a pile of dictionaries, it is assumed that 3 are enough:
# - the main dictionary, bundled with the package
# - the extended dictionary, added by an organization
# - the personal dictionary, created by the user for its own convenience


import traceback

from . import ibdawg



dDefaultDictionaries = {
    "fr": "fr.bdic",
    "en": "en.bdic"
}


class SpellChecker ():

    def __init__ (self, sLangCode, sfMainDic="", sfExtendedDic="", sfPersonalDic=""):
        "returns True if the main dictionary is loaded"
        self.sLangCode = sLangCode
        if not sfMainDic:
            sfMainDic = dDefaultDictionaries.get(sLangCode, "")
        self.oMainDic = self._loadDictionary(sfMainDic, True)
        self.oExtendedDic = self._loadDictionary(sfExtendedDic)
        self.oPersonalDic = self._loadDictionary(sfPersonalDic)


    def _loadDictionary (self, sfDictionary, bNecessary=False):
        "returns an IBDAWG object"
        if not sfDictionary:
            return None
        try:
            return ibdawg.IBDAWG(sfDictionary)
        except Exception as e:
            if bNecessary:
                raise Exception(str(e), "Error: <" + sfDictionary + "> not loaded.")
            print("Error: <" + sfDictionary + "> not loaded.")
            traceback.print_exc()
            return None




    def setMainDictionary (self, sfDictionary):
        "returns True if the dictionary is loaded"
        self.oMainDic = self._loadDictionary(sfDictionary)
        return bool(self.oMainDic)
            
    def setExtendedDictionary (self, sfDictionary):
        "returns True if the dictionary is loaded"
        self.oExtendedDic = self._loadDictionary(sfDictionary)
        return bool(self.oExtendedDic)

    def setPersonalDictionary (self, sfDictionary):
        "returns True if the dictionary is loaded"
        self.oPersonalDic = self._loadDictionary(sfDictionary)
        return bool(self.oPersonalDic)
































    # IBDAWG functions

    def isValidToken (self, sToken):
        "checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)"
        if self.oMainDic.isValidToken(sToken):
            return True













>


















>














>
>
>















>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# Spellchecker
# Wrapper for the IBDAWG class.
# Useful to check several dictionaries at once.

# To avoid iterating over a pile of dictionaries, it is assumed that 3 are enough:
# - the main dictionary, bundled with the package
# - the extended dictionary, added by an organization
# - the personal dictionary, created by the user for its own convenience


import traceback

from . import ibdawg
from . import tokenizer


dDefaultDictionaries = {
    "fr": "fr.bdic",
    "en": "en.bdic"
}


class SpellChecker ():

    def __init__ (self, sLangCode, sfMainDic="", sfExtendedDic="", sfPersonalDic=""):
        "returns True if the main dictionary is loaded"
        self.sLangCode = sLangCode
        if not sfMainDic:
            sfMainDic = dDefaultDictionaries.get(sLangCode, "")
        self.oMainDic = self._loadDictionary(sfMainDic, True)
        self.oExtendedDic = self._loadDictionary(sfExtendedDic)
        self.oPersonalDic = self._loadDictionary(sfPersonalDic)
        self.oTokenizer = None

    def _loadDictionary (self, sfDictionary, bNecessary=False):
        "returns an IBDAWG object"
        if not sfDictionary:
            return None
        try:
            return ibdawg.IBDAWG(sfDictionary)
        except Exception as e:
            if bNecessary:
                raise Exception(str(e), "Error: <" + sfDictionary + "> not loaded.")
            print("Error: <" + sfDictionary + "> not loaded.")
            traceback.print_exc()
            return None

    def loadTokenizer (self):
        self.oTokenizer = tokenizer.Tokenizer(self.sLangCode)

    def setMainDictionary (self, sfDictionary):
        "returns True if the dictionary is loaded"
        self.oMainDic = self._loadDictionary(sfDictionary)
        return bool(self.oMainDic)
            
    def setExtendedDictionary (self, sfDictionary):
        "returns True if the dictionary is loaded"
        self.oExtendedDic = self._loadDictionary(sfDictionary)
        return bool(self.oExtendedDic)

    def setPersonalDictionary (self, sfDictionary):
        "returns True if the dictionary is loaded"
        self.oPersonalDic = self._loadDictionary(sfDictionary)
        return bool(self.oPersonalDic)

    # parse text functions

    def parseParagraph (self, sText, bSpellSugg=False):
        if not self.oTokenizer:
            self.loadTokenizer()
        aSpellErrs = []
        for dToken in self.oTokenizer.genTokens(sText):
            if dToken['sType'] == "WORD" and not self.isValidToken(dToken['sValue']):
                if bSpellSugg:
                    dToken['aSuggestions'] = []
                    for lSugg in self.suggest(dToken['sValue']):
                        dToken['aSuggestions'].extend(lSugg)
                aSpellErrs.append(dToken)
        return aSpellErrs

    def countWordsOccurrences (self, sText, bByLemma=False, bOnlyUnknownWords=False, dWord={}):
        if not self.oTokenizer:
            self.loadTokenizer()
        for dToken in self.oTokenizer.genTokens(sText):
            if dToken['sType'] == "WORD":
                if bOnlyUnknownWords:
                    if not self.isValidToken(dToken['sValue']):
                        dWord[dToken['sValue']] = dWord.get(dToken['sValue'], 0) + 1
                else:
                    if not bByLemma:
                        dWord[dToken['sValue']] = dWord.get(dToken['sValue'], 0) + 1
                    else:
                        for sLemma in self.getLemma(dToken['sValue']):
                            dWord[sLemma] = dWord.get(sLemma, 0) + 1
        return dWord

    # IBDAWG functions

    def isValidToken (self, sToken):
        "checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)"
        if self.oMainDic.isValidToken(sToken):
            return True
96
97
98
99
100
101
102



103
104
105
106
107
108
109
        lResult = self.oMainDic.getMorph(sWord)
        if self.oExtendedDic:
            lResult.extend(self.oExtendedDic.getMorph(sWord))
        if self.oPersonalDic:
            lResult.extend(self.oPersonalDic.getMorph(sWord))
        return lResult




    def suggest (self, sWord, nSuggLimit=10):
        "generator: returns 1, 2 or 3 lists of suggestions"
        yield self.oMainDic.suggest(sWord, nSuggLimit)
        if self.oExtendedDic:
            yield self.oExtendedDic.suggest(sWord, nSuggLimit)
        if self.oPersonalDic:
            yield self.oPersonalDic.suggest(sWord, nSuggLimit)







>
>
>







131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
        lResult = self.oMainDic.getMorph(sWord)
        if self.oExtendedDic:
            lResult.extend(self.oExtendedDic.getMorph(sWord))
        if self.oPersonalDic:
            lResult.extend(self.oPersonalDic.getMorph(sWord))
        return lResult

    def getLemma (self, sWord):
        return set([ s[1:s.find(" ")]  for s in self.getMorph(sWord) ])

    def suggest (self, sWord, nSuggLimit=10):
        "generator: returns 1, 2 or 3 lists of suggestions"
        yield self.oMainDic.suggest(sWord, nSuggLimit)
        if self.oExtendedDic:
            yield self.oExtendedDic.suggest(sWord, nSuggLimit)
        if self.oPersonalDic:
            yield self.oPersonalDic.suggest(sWord, nSuggLimit)