Overview
| Comment: | [graphspell][py] new functions: getLemma() and countWordsOccurrences() |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | graphspell |
| Files: | files | file ages | folders |
| SHA3-256: |
d205a5a60172d3c9150901a064507045 |
| User & Date: | olr on 2018-02-21 19:13:21 |
| Original Comment: | [graphspell] new functions: getLemma() and countWordsOccurrences() |
| Other Links: | manifest | tags |
Context
|
2018-02-21
| ||
| 19:14 | [graphspell][py] defaut module import check-in: 31837970bd user: olr tags: trunk, graphspell | |
| 19:13 | [graphspell][py] new functions: getLemma() and countWordsOccurrences() check-in: d205a5a601 user: olr tags: trunk, graphspell | |
| 11:53 | [build] new command for future graph rules check-in: c4eb507f6d user: olr tags: trunk, build | |
Changes
Modified graphspell/spellchecker.py from [b9fb2c7b70] to [dbd02131cc].
| ︙ | ︙ | |||
73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
if dToken['sType'] == "WORD" and not self.isValidToken(dToken['sValue']):
if bSpellSugg:
dToken['aSuggestions'] = []
for lSugg in self.suggest(dToken['sValue']):
dToken['aSuggestions'].extend(lSugg)
aSpellErrs.append(dToken)
return aSpellErrs
# IBDAWG functions
def isValidToken (self, sToken):
"checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)"
if self.oMainDic.isValidToken(sToken):
return True
| > > > > > > > > > > > > > > > > | 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
if dToken['sType'] == "WORD" and not self.isValidToken(dToken['sValue']):
if bSpellSugg:
dToken['aSuggestions'] = []
for lSugg in self.suggest(dToken['sValue']):
dToken['aSuggestions'].extend(lSugg)
aSpellErrs.append(dToken)
return aSpellErrs
def countWordsOccurrences (self, sText, bByLemma=False, bOnlyUnknownWords=False, dWord={}):
if not self.oTokenizer:
self.loadTokenizer()
for dToken in self.oTokenizer.genTokens(sText):
if dToken['sType'] == "WORD":
if bOnlyUnknownWords:
if not self.isValidToken(dToken['sValue']):
dWord[dToken['sValue']] = dWord.get(dToken['sValue'], 0) + 1
else:
if not bByLemma:
dWord[dToken['sValue']] = dWord.get(dToken['sValue'], 0) + 1
else:
for sLemma in self.getLemma(dToken['sValue']):
dWord[sLemma] = dWord.get(sLemma, 0) + 1
return dWord
# IBDAWG functions
def isValidToken (self, sToken):
"checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)"
if self.oMainDic.isValidToken(sToken):
return True
|
| ︙ | ︙ | |||
115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
lResult = self.oMainDic.getMorph(sWord)
if self.oExtendedDic:
lResult.extend(self.oExtendedDic.getMorph(sWord))
if self.oPersonalDic:
lResult.extend(self.oPersonalDic.getMorph(sWord))
return lResult
def suggest (self, sWord, nSuggLimit=10):
"generator: returns 1, 2 or 3 lists of suggestions"
yield self.oMainDic.suggest(sWord, nSuggLimit)
if self.oExtendedDic:
yield self.oExtendedDic.suggest(sWord, nSuggLimit)
if self.oPersonalDic:
yield self.oPersonalDic.suggest(sWord, nSuggLimit)
| > > > | 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
lResult = self.oMainDic.getMorph(sWord)
if self.oExtendedDic:
lResult.extend(self.oExtendedDic.getMorph(sWord))
if self.oPersonalDic:
lResult.extend(self.oPersonalDic.getMorph(sWord))
return lResult
def getLemma (self, sWord):
return set([ s[1:s.find(" ")] for s in self.getMorph(sWord) ])
def suggest (self, sWord, nSuggLimit=10):
"generator: returns 1, 2 or 3 lists of suggestions"
yield self.oMainDic.suggest(sWord, nSuggLimit)
if self.oExtendedDic:
yield self.oExtendedDic.suggest(sWord, nSuggLimit)
if self.oPersonalDic:
yield self.oPersonalDic.suggest(sWord, nSuggLimit)
|
| ︙ | ︙ |