Grammalecte  Check-in [d205a5a601]

Overview
Comment:[graphspell][py] new functions: getLemma() and countWordsOccurrences()
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | graphspell
Files: files | file ages | folders
SHA3-256: d205a5a60172d3c9150901a064507045d27f698085cffce78089999ef11119f8
User & Date: olr on 2018-02-21 19:13:21
Original Comment: [graphspell] new functions: getLemma() and countWordsOccurrences()
Other Links: manifest | tags
Context
2018-02-21
19:14
[graphspell][py] defaut module import check-in: 31837970bd user: olr tags: trunk, graphspell
19:13
[graphspell][py] new functions: getLemma() and countWordsOccurrences() check-in: d205a5a601 user: olr tags: trunk, graphspell
11:53
[build] new command for future graph rules check-in: c4eb507f6d user: olr tags: trunk, build
Changes

Modified graphspell/spellchecker.py from [b9fb2c7b70] to [dbd02131cc].

73
74
75
76
77
78
79
















80
81
82
83
84
85
86
            if dToken['sType'] == "WORD" and not self.isValidToken(dToken['sValue']):
                if bSpellSugg:
                    dToken['aSuggestions'] = []
                    for lSugg in self.suggest(dToken['sValue']):
                        dToken['aSuggestions'].extend(lSugg)
                aSpellErrs.append(dToken)
        return aSpellErrs

















    # IBDAWG functions

    def isValidToken (self, sToken):
        "checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)"
        if self.oMainDic.isValidToken(sToken):
            return True







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
            if dToken['sType'] == "WORD" and not self.isValidToken(dToken['sValue']):
                if bSpellSugg:
                    dToken['aSuggestions'] = []
                    for lSugg in self.suggest(dToken['sValue']):
                        dToken['aSuggestions'].extend(lSugg)
                aSpellErrs.append(dToken)
        return aSpellErrs

    def countWordsOccurrences (self, sText, bByLemma=False, bOnlyUnknownWords=False, dWord={}):
        if not self.oTokenizer:
            self.loadTokenizer()
        for dToken in self.oTokenizer.genTokens(sText):
            if dToken['sType'] == "WORD":
                if bOnlyUnknownWords:
                    if not self.isValidToken(dToken['sValue']):
                        dWord[dToken['sValue']] = dWord.get(dToken['sValue'], 0) + 1
                else:
                    if not bByLemma:
                        dWord[dToken['sValue']] = dWord.get(dToken['sValue'], 0) + 1
                    else:
                        for sLemma in self.getLemma(dToken['sValue']):
                            dWord[sLemma] = dWord.get(sLemma, 0) + 1
        return dWord

    # IBDAWG functions

    def isValidToken (self, sToken):
        "checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)"
        if self.oMainDic.isValidToken(sToken):
            return True
115
116
117
118
119
120
121



122
123
124
125
126
127
128
        lResult = self.oMainDic.getMorph(sWord)
        if self.oExtendedDic:
            lResult.extend(self.oExtendedDic.getMorph(sWord))
        if self.oPersonalDic:
            lResult.extend(self.oPersonalDic.getMorph(sWord))
        return lResult




    def suggest (self, sWord, nSuggLimit=10):
        "generator: returns 1, 2 or 3 lists of suggestions"
        yield self.oMainDic.suggest(sWord, nSuggLimit)
        if self.oExtendedDic:
            yield self.oExtendedDic.suggest(sWord, nSuggLimit)
        if self.oPersonalDic:
            yield self.oPersonalDic.suggest(sWord, nSuggLimit)







>
>
>







131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
        lResult = self.oMainDic.getMorph(sWord)
        if self.oExtendedDic:
            lResult.extend(self.oExtendedDic.getMorph(sWord))
        if self.oPersonalDic:
            lResult.extend(self.oPersonalDic.getMorph(sWord))
        return lResult

    def getLemma (self, sWord):
        return set([ s[1:s.find(" ")]  for s in self.getMorph(sWord) ])

    def suggest (self, sWord, nSuggLimit=10):
        "generator: returns 1, 2 or 3 lists of suggestions"
        yield self.oMainDic.suggest(sWord, nSuggLimit)
        if self.oExtendedDic:
            yield self.oExtendedDic.suggest(sWord, nSuggLimit)
        if self.oPersonalDic:
            yield self.oPersonalDic.suggest(sWord, nSuggLimit)