Overview
| Comment: | [graphspell][py] dawg: use <set> to prevent duplicate entries |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | graphspell | multid |
| Files: | files | file ages | folders |
| SHA3-256: |
9e16a5b1a95b803a3c38eaae1988a6ca |
| User & Date: | olr on 2018-03-20 13:28:21 |
| Other Links: | branch diff | manifest | tags |
Context
|
2018-03-20
| ||
| 13:57 | [graphspell][js] dawg: use <Set> to prevent duplicate entries check-in: 52593d2f38 user: olr tags: graphspell, multid | |
| 13:28 | [graphspell][py] dawg: use <set> to prevent duplicate entries check-in: 9e16a5b1a9 user: olr tags: graphspell, multid | |
| 13:19 | [graphspell] dawg: rename var name <aff> to <sAff> check-in: b89ba9acea user: olr tags: graphspell, multid | |
Changes
Modified graphspell/dawg.py from [6286ff6ef8] to [7cd1b4dc56].
| ︙ | ︙ | |||
49 50 51 52 53 54 55 |
elif cStemming == "S":
funcStemmingGen = st.defineSuffixCode
elif cStemming == "N":
funcStemmingGen = st.noStemming
else:
raise ValueError("# Error. Unknown stemming code: {}".format(cStemming))
| | | 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
elif cStemming == "S":
funcStemmingGen = st.defineSuffixCode
elif cStemming == "N":
funcStemmingGen = st.noStemming
else:
raise ValueError("# Error. Unknown stemming code: {}".format(cStemming))
aEntry = set()
lChar = ['']; dChar = {}; nChar = 1; dCharOccur = {}
lAff = []; dAff = {}; nAff = 0; dAffOccur = {}
lTag = []; dTag = {}; nTag = 0; dTagOccur = {}
nErr = 0
# read lexicon
if type(src) is str:
|
| ︙ | ︙ | |||
82 83 84 85 86 87 88 |
dAffOccur[sAff] = dCharOccur.get(sAff, 0) + 1
# tags
if sTag not in dTag:
dTag[sTag] = nTag
lTag.append(sTag)
nTag += 1
dTagOccur[sTag] = dTagOccur.get(sTag, 0) + 1
| | | | | | 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
dAffOccur[sAff] = dCharOccur.get(sAff, 0) + 1
# tags
if sTag not in dTag:
dTag[sTag] = nTag
lTag.append(sTag)
nTag += 1
dTagOccur[sTag] = dTagOccur.get(sTag, 0) + 1
aEntry.add((sFlex, dAff[sAff], dTag[sTag]))
if not aEntry:
raise ValueError("# Error. Empty lexicon")
# Preparing DAWG
print(" > Preparing list of words")
lVal = lChar + lAff + lTag
lWord = [ [dChar[c] for c in sFlex] + [iAff+nChar] + [iTag+nChar+nAff] for sFlex, iAff, iTag in aEntry ]
aEntry = None
# Dictionary of arc values occurrency, to sort arcs of each node
dValOccur = dict( [ (dChar[c], dCharOccur[c]) for c in dChar ] \
+ [ (dAff[aff]+nChar, dAffOccur[aff]) for aff in dAff ] \
+ [ (dTag[tag]+nChar+nAff, dTagOccur[tag]) for tag in dTag ] )
self.sFileName = src if type(src) is str else "[None]"
|
| ︙ | ︙ |