Overview
Comment: | [graphspell] ibdawg: spelling suggestion mechanism test |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | graphspell |
Files: | files | file ages | folders |
SHA3-256: |
b56f2d61d0f35e920e9e3c56bee89df4 |
User & Date: | olr on 2024-06-11 17:27:55 |
Other Links: | manifest | tags |
Context
2024-06-11
| ||
17:44 | [fr] dictionnaires: màj check-in: 800b3de9ea user: olr tags: trunk, fr, v2.2 | |
17:27 | [graphspell] ibdawg: spelling suggestion mechanism test check-in: b56f2d61d0 user: olr tags: trunk, graphspell | |
17:17 | [fr] faux positifs check-in: 4d40f61f04 user: olr tags: trunk, fr | |
Changes
Modified graphspell/ibdawg.py from [71b7ddd53e] to [6f7d77073f].
︙ | ︙ | |||
40 41 42 43 44 45 46 | class SuggResult: """Structure for storing, classifying and filtering suggestions""" def __init__ (self, sWord, nSuggLimit=10, nDistLimit=-1): self.sWord = sWord self.sSimplifiedWord = st.simplifyWord(sWord) | | < | > < < | | | | | < < | | | < | | | < | | < < < < | | | 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | class SuggResult: """Structure for storing, classifying and filtering suggestions""" def __init__ (self, sWord, nSuggLimit=10, nDistLimit=-1): self.sWord = sWord self.sSimplifiedWord = st.simplifyWord(sWord) self.nDistLimit = nDistLimit if nDistLimit >= 0 else (len(sWord) // 3) + 1 # used in suggest() self.nMinDist = 1000 # Temporary sets self.aAllSugg = set() # All suggestions, even the one rejected self.dAccSugg = {} # Accepted suggestions # Parameters self.nSuggLimit = nSuggLimit self.nTempSuggLimit = nSuggLimit * 6 def addSugg (self, sSugg, nDeep=0): "add a suggestion" if sSugg in self.aAllSugg: return self.aAllSugg.add(sSugg) nSimDist = st.distanceSift4(self.sSimplifiedWord, st.simplifyWord(sSugg)) st.showDistance(self.sSimplifiedWord, st.simplifyWord(sSugg)) if nSimDist < self.nMinDist: self.nMinDist = nSimDist if nSimDist <= (self.nMinDist + 1): nDist = st.distanceJaroWinkler(self.sWord, sSugg) st.showDistance(self.sWord, sSugg) self.dAccSugg[sSugg] = min(nDist, nSimDist+1) if len(self.dAccSugg) > self.nTempSuggLimit: self.nDistLimit = -1 # suggest() ends searching when this variable = -1 self.nDistLimit = min(self.nDistLimit, self.nMinDist+1) def getSuggestions (self): "return a list of suggestions" # we sort the better results with the original word lRes = [] # sort only with simplified words lResTmp = sorted(self.dAccSugg.items(), key=lambda x: (x[1], x[0])) for i in range(min(self.nSuggLimit, len(lResTmp))): lRes.append(lResTmp[i][0]) #st.showDistance(self.sWord, lResTmp[i][0]) # casing if self.sWord.isupper(): lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg.upper(), lRes))) # use dict, when Python 3.6+ elif self.sWord[0:1].isupper(): # don’t use <.istitle> lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes))) # use dict, when Python 3.6+ return lRes[:self.nSuggLimit] class IBDAWG: """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH""" |
︙ | ︙ |