Overview
| Comment: | [graphspell] ibdawg: spelling suggestion mechanism test |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | graphspell |
| Files: | files | file ages | folders |
| SHA3-256: |
b56f2d61d0f35e920e9e3c56bee89df4 |
| User & Date: | olr on 2024-06-11 17:27:55 |
| Other Links: | manifest | tags |
Context
|
2024-06-11
| ||
| 17:44 | [fr] dictionnaires: màj check-in: 800b3de9ea user: olr tags: trunk, fr, v2.2 | |
| 17:27 | [graphspell] ibdawg: spelling suggestion mechanism test check-in: b56f2d61d0 user: olr tags: trunk, graphspell | |
| 17:17 | [fr] faux positifs check-in: 4d40f61f04 user: olr tags: trunk, fr | |
Changes
Modified graphspell/ibdawg.py from [71b7ddd53e] to [6f7d77073f].
| ︙ | ︙ | |||
40 41 42 43 44 45 46 |
class SuggResult:
"""Structure for storing, classifying and filtering suggestions"""
def __init__ (self, sWord, nSuggLimit=10, nDistLimit=-1):
self.sWord = sWord
self.sSimplifiedWord = st.simplifyWord(sWord)
| | < | > < < | | | | | < < | | | < | | | < | | | | < < < < | | | 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
class SuggResult:
"""Structure for storing, classifying and filtering suggestions"""
def __init__ (self, sWord, nSuggLimit=10, nDistLimit=-1):
self.sWord = sWord
self.sSimplifiedWord = st.simplifyWord(sWord)
self.nDistLimit = nDistLimit if nDistLimit >= 0 else (len(sWord) // 3) + 1 # used in suggest()
self.nMinDist = 1000
# Temporary sets
self.aAllSugg = set() # All suggestions, even the one rejected
self.dAccSugg = {} # Accepted suggestions
# Parameters
self.nSuggLimit = nSuggLimit
self.nTempSuggLimit = nSuggLimit * 6
def addSugg (self, sSugg, nDeep=0):
"add a suggestion"
if sSugg in self.aAllSugg:
return
self.aAllSugg.add(sSugg)
nSimDist = st.distanceSift4(self.sSimplifiedWord, st.simplifyWord(sSugg))
st.showDistance(self.sSimplifiedWord, st.simplifyWord(sSugg))
if nSimDist < self.nMinDist:
self.nMinDist = nSimDist
if nSimDist <= (self.nMinDist + 1):
nDist = st.distanceJaroWinkler(self.sWord, sSugg)
st.showDistance(self.sWord, sSugg)
self.dAccSugg[sSugg] = min(nDist, nSimDist+1)
if len(self.dAccSugg) > self.nTempSuggLimit:
self.nDistLimit = -1 # suggest() ends searching when this variable = -1
self.nDistLimit = min(self.nDistLimit, self.nMinDist+1)
def getSuggestions (self):
"return a list of suggestions"
# we sort the better results with the original word
lRes = []
# sort only with simplified words
lResTmp = sorted(self.dAccSugg.items(), key=lambda x: (x[1], x[0]))
for i in range(min(self.nSuggLimit, len(lResTmp))):
lRes.append(lResTmp[i][0])
#st.showDistance(self.sWord, lResTmp[i][0])
# casing
if self.sWord.isupper():
lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg.upper(), lRes))) # use dict, when Python 3.6+
elif self.sWord[0:1].isupper():
# don’t use <.istitle>
lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes))) # use dict, when Python 3.6+
return lRes[:self.nSuggLimit]
class IBDAWG:
"""INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""
|
| ︙ | ︙ |