73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
if dToken['sType'] == "WORD" and not self.isValidToken(dToken['sValue']):
if bSpellSugg:
dToken['aSuggestions'] = []
for lSugg in self.suggest(dToken['sValue']):
dToken['aSuggestions'].extend(lSugg)
aSpellErrs.append(dToken)
return aSpellErrs
# IBDAWG functions
def isValidToken (self, sToken):
"checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)"
if self.oMainDic.isValidToken(sToken):
return True
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
|
if dToken['sType'] == "WORD" and not self.isValidToken(dToken['sValue']):
if bSpellSugg:
dToken['aSuggestions'] = []
for lSugg in self.suggest(dToken['sValue']):
dToken['aSuggestions'].extend(lSugg)
aSpellErrs.append(dToken)
return aSpellErrs
def countWordsOccurrences (self, sText, bByLemma=False, bOnlyUnknownWords=False, dWord={}):
if not self.oTokenizer:
self.loadTokenizer()
for dToken in self.oTokenizer.genTokens(sText):
if dToken['sType'] == "WORD":
if bOnlyUnknownWords:
if not self.isValidToken(dToken['sValue']):
dWord[dToken['sValue']] = dWord.get(dToken['sValue'], 0) + 1
else:
if not bByLemma:
dWord[dToken['sValue']] = dWord.get(dToken['sValue'], 0) + 1
else:
for sLemma in self.getLemma(dToken['sValue']):
dWord[sLemma] = dWord.get(sLemma, 0) + 1
return dWord
# IBDAWG functions
def isValidToken (self, sToken):
"checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)"
if self.oMainDic.isValidToken(sToken):
return True
|
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
lResult = self.oMainDic.getMorph(sWord)
if self.oExtendedDic:
lResult.extend(self.oExtendedDic.getMorph(sWord))
if self.oPersonalDic:
lResult.extend(self.oPersonalDic.getMorph(sWord))
return lResult
def suggest (self, sWord, nSuggLimit=10):
"generator: returns 1, 2 or 3 lists of suggestions"
yield self.oMainDic.suggest(sWord, nSuggLimit)
if self.oExtendedDic:
yield self.oExtendedDic.suggest(sWord, nSuggLimit)
if self.oPersonalDic:
yield self.oPersonalDic.suggest(sWord, nSuggLimit)
|
>
>
>
|
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
|
lResult = self.oMainDic.getMorph(sWord)
if self.oExtendedDic:
lResult.extend(self.oExtendedDic.getMorph(sWord))
if self.oPersonalDic:
lResult.extend(self.oPersonalDic.getMorph(sWord))
return lResult
def getLemma (self, sWord):
return set([ s[1:s.find(" ")] for s in self.getMorph(sWord) ])
def suggest (self, sWord, nSuggLimit=10):
"generator: returns 1, 2 or 3 lists of suggestions"
yield self.oMainDic.suggest(sWord, nSuggLimit)
if self.oExtendedDic:
yield self.oExtendedDic.suggest(sWord, nSuggLimit)
if self.oPersonalDic:
yield self.oPersonalDic.suggest(sWord, nSuggLimit)
|