1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
"""
Spellchecker.
Useful to check several dictionaries at once.
To avoid iterating over a pile of dictionaries, it is assumed that 3 are enough:
- the main dictionary, bundled with the package
- the community dictionary, added by an organization
- the personal dictionary, created by the user for its own convenience
"""
import importlib
import traceback
from . import ibdawg
from . import tokenizer
|
>
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
"""
Spellchecker.
Useful to check several dictionaries at once.
To avoid iterating over a pile of dictionaries, it is assumed that 3 are enough:
- the main dictionary, bundled with the package
- the community dictionary, added by an organization
- the personal dictionary, created by the user for its own convenience
"""
import re
import importlib
import traceback
from . import ibdawg
from . import tokenizer
|
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
|
if self.bPersonalDic:
lMorph.extend(self.oPersonalDic.getMorph(sWord))
if self.bStorage:
self._dMorphologies[sWord] = lMorph
self._dLemmas[sWord] = { s[1:s.find("/")] for s in lMorph }
return lMorph
def getLemma (self, sWord):
"retrieves lemmas"
if self.bStorage:
if sWord not in self._dLemmas:
self.getMorph(sWord)
return self._dLemmas[sWord]
return { s[1:s.find("/")] for s in self.getMorph(sWord) }
def suggest (self, sWord, nSuggLimit=10):
"generator: returns 1, 2 or 3 lists of suggestions"
if self.lexicographer:
if sWord in self.lexicographer.dSugg:
yield self.lexicographer.dSugg[sWord].split("|")
elif sWord.istitle() and sWord.lower() in self.lexicographer.dSugg:
lRes = self.lexicographer.dSugg[sWord.lower()].split("|")
yield list(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes))
else:
yield self.oMainDic.suggest(sWord, nSuggLimit, True)
else:
yield self.oMainDic.suggest(sWord, nSuggLimit, True)
if self.bCommunityDic:
yield self.oCommunityDic.suggest(sWord, (nSuggLimit//2)+1)
if self.bPersonalDic:
yield self.oPersonalDic.suggest(sWord, (nSuggLimit//2)+1)
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
>
>
|
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
|
if self.bPersonalDic:
lMorph.extend(self.oPersonalDic.getMorph(sWord))
if self.bStorage:
self._dMorphologies[sWord] = lMorph
self._dLemmas[sWord] = { s[1:s.find("/")] for s in lMorph }
return lMorph
def morph (self, sWord, sPattern, sNegPattern=""):
"analyse a word, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies"
lMorph = self.getMorph(sWord)
if not lMorph:
return False
# check negative condition
if sNegPattern:
if sNegPattern == "*":
# all morph must match sPattern
zPattern = re.compile(sPattern)
return all(zPattern.search(sMorph) for sMorph in lMorph)
zNegPattern = re.compile(sNegPattern)
if any(zNegPattern.search(sMorph) for sMorph in lMorph):
return False
# search sPattern
zPattern = re.compile(sPattern)
return any(zPattern.search(sMorph) for sMorph in lMorph)
def getLemma (self, sWord):
"retrieves lemmas"
if self.bStorage:
if sWord not in self._dLemmas:
self.getMorph(sWord)
return self._dLemmas[sWord]
return { s[1:s.find("/")] for s in self.getMorph(sWord) }
def suggest (self, sWord, nSuggLimit=10):
"generator: returns 1, 2 or 3 lists of suggestions"
if self.lexicographer:
if sWord in self.lexicographer.dSugg:
yield self.lexicographer.dSugg[sWord].split("|")
elif sWord.istitle() and sWord.lower() in self.lexicographer.dSugg:
lSuggs = self.lexicographer.dSugg[sWord.lower()].split("|")
yield list(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lSuggs))
else:
lSuggs = self.oMainDic.suggest(sWord, nSuggLimit, True)
lSuggs = [ sSugg for sSugg in lSuggs if self.lexicographer.isValidSugg(sSugg, self) ]
yield lSuggs
else:
yield self.oMainDic.suggest(sWord, nSuggLimit, True)
if self.bCommunityDic:
yield self.oCommunityDic.suggest(sWord, (nSuggLimit//2)+1)
if self.bPersonalDic:
yield self.oPersonalDic.suggest(sWord, (nSuggLimit//2)+1)
|