Overview
Comment: | [graphspell][py] new functions: getLemma() and countWordsOccurrences() |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | graphspell |
Files: | files | file ages | folders |
SHA3-256: |
d205a5a60172d3c9150901a064507045 |
User & Date: | olr on 2018-02-21 19:13:21 |
Original Comment: | [graphspell] new functions: getLemma() and countWordsOccurrences() |
Other Links: | manifest | tags |
Context
2018-02-21
| ||
19:14 | [graphspell][py] defaut module import check-in: 31837970bd user: olr tags: trunk, graphspell | |
19:13 | [graphspell][py] new functions: getLemma() and countWordsOccurrences() check-in: d205a5a601 user: olr tags: trunk, graphspell | |
11:53 | [build] new command for future graph rules check-in: c4eb507f6d user: olr tags: trunk, build | |
Changes
Modified graphspell/spellchecker.py from [b9fb2c7b70] to [dbd02131cc].
︙ | ︙ | |||
73 74 75 76 77 78 79 80 81 82 83 84 85 86 | if dToken['sType'] == "WORD" and not self.isValidToken(dToken['sValue']): if bSpellSugg: dToken['aSuggestions'] = [] for lSugg in self.suggest(dToken['sValue']): dToken['aSuggestions'].extend(lSugg) aSpellErrs.append(dToken) return aSpellErrs # IBDAWG functions def isValidToken (self, sToken): "checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)" if self.oMainDic.isValidToken(sToken): return True | > > > > > > > > > > > > > > > > | 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | if dToken['sType'] == "WORD" and not self.isValidToken(dToken['sValue']): if bSpellSugg: dToken['aSuggestions'] = [] for lSugg in self.suggest(dToken['sValue']): dToken['aSuggestions'].extend(lSugg) aSpellErrs.append(dToken) return aSpellErrs def countWordsOccurrences (self, sText, bByLemma=False, bOnlyUnknownWords=False, dWord={}): if not self.oTokenizer: self.loadTokenizer() for dToken in self.oTokenizer.genTokens(sText): if dToken['sType'] == "WORD": if bOnlyUnknownWords: if not self.isValidToken(dToken['sValue']): dWord[dToken['sValue']] = dWord.get(dToken['sValue'], 0) + 1 else: if not bByLemma: dWord[dToken['sValue']] = dWord.get(dToken['sValue'], 0) + 1 else: for sLemma in self.getLemma(dToken['sValue']): dWord[sLemma] = dWord.get(sLemma, 0) + 1 return dWord # IBDAWG functions def isValidToken (self, sToken): "checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)" if self.oMainDic.isValidToken(sToken): return True |
︙ | ︙ | |||
115 116 117 118 119 120 121 122 123 124 125 126 127 128 | lResult = self.oMainDic.getMorph(sWord) if self.oExtendedDic: lResult.extend(self.oExtendedDic.getMorph(sWord)) if self.oPersonalDic: lResult.extend(self.oPersonalDic.getMorph(sWord)) return lResult def suggest (self, sWord, nSuggLimit=10): "generator: returns 1, 2 or 3 lists of suggestions" yield self.oMainDic.suggest(sWord, nSuggLimit) if self.oExtendedDic: yield self.oExtendedDic.suggest(sWord, nSuggLimit) if self.oPersonalDic: yield self.oPersonalDic.suggest(sWord, nSuggLimit) | > > > | 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | lResult = self.oMainDic.getMorph(sWord) if self.oExtendedDic: lResult.extend(self.oExtendedDic.getMorph(sWord)) if self.oPersonalDic: lResult.extend(self.oPersonalDic.getMorph(sWord)) return lResult def getLemma (self, sWord): return set([ s[1:s.find(" ")] for s in self.getMorph(sWord) ]) def suggest (self, sWord, nSuggLimit=10): "generator: returns 1, 2 or 3 lists of suggestions" yield self.oMainDic.suggest(sWord, nSuggLimit) if self.oExtendedDic: yield self.oExtendedDic.suggest(sWord, nSuggLimit) if self.oPersonalDic: yield self.oPersonalDic.suggest(sWord, nSuggLimit) |
︙ | ︙ |