Overview
Comment: | [core] str_transform: change functions names |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | core |
Files: | files | file ages | folders |
SHA3-256: |
766f20e23c4c92bb43d777545a474447 |
User & Date: | olr on 2017-06-23 17:25:20 |
Other Links: | manifest | tags |
Context
2017-06-23
| ||
19:23 | [build] use one dictionary name instead of two check-in: cfc69abb68 user: olr tags: trunk, build | |
17:25 | [core] str_transform: change functions names check-in: 766f20e23c user: olr tags: trunk, core | |
17:11 | [core] dawg: compressed lexicon check-in: e5f3698eb4 user: olr tags: trunk, build, new_feature | |
Changes
Modified gc_core/py/dawg.py from [7e6ed7295c] to [ddd6fe1cc6].
︙ | ︙ | |||
14 15 16 17 18 19 20 | import collections from . import str_transform as st from .progressbar import ProgressBar def readFile (spf): | | | 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | import collections from . import str_transform as st from .progressbar import ProgressBar def readFile (spf): print(" < Read lexicon: " + spf) if os.path.isfile(spf): with open(spf, "r", encoding="utf-8") as hSrc: for sLine in hSrc: sLine = sLine.strip() if sLine and not sLine.startswith("#"): yield sLine else: |
︙ | ︙ | |||
65 66 67 68 69 70 71 | continue sFlex, sStem = sLine.split("\t") else: sFlex = sStem = sLine #print(sFlex, sStem, sTag) yield (sFlex, sStem, sTag) if sTag2: | | | 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | continue sFlex, sStem = sLine.split("\t") else: sFlex = sStem = sLine #print(sFlex, sStem, sTag) yield (sFlex, sStem, sTag) if sTag2: sFlex2 = st.changeWordWithSuffixCode(sFlex, sSfxCode) #print(sFlex2, sStem, sTag2) yield (sFlex2, sStem, sTag2) if nErr: print(" # Lines ignored: {:>10}".format(nErr)) |
︙ | ︙ | |||
159 160 161 162 163 164 165 | self.nChar = len(dChar) self.nAff = nAff self.lArcVal = lVal self.nArcVal = len(lVal) self.nTag = self.nArcVal - self.nChar - nAff self.cStemming = cStemming if cStemming == "A": | | | | 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | self.nChar = len(dChar) self.nAff = nAff self.lArcVal = lVal self.nArcVal = len(lVal) self.nTag = self.nArcVal - self.nChar - nAff self.cStemming = cStemming if cStemming == "A": self.funcStemming = st.changeWordWithAffixCode elif cStemming == "S": self.funcStemming = st.changeWordWithSuffixCode else: self.funcStemming = st.noStemming # build lWord.sort() oProgBar = ProgressBar(0, len(lWord)) for word in lWord: |
︙ | ︙ |
Modified gc_core/py/ibdawg.py from [9ce1ce821d] to [095d971150].
︙ | ︙ | |||
40 41 42 43 44 45 46 | self.nBytesNodeAddress = int(l[3]) self.nEntries = int(l[4]) self.nNode = int(l[5]) self.nArc = int(l[6]) self.nAff = int(l[7]) self.cStemming = l[8] if self.cStemming == "S": | | | | 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | self.nBytesNodeAddress = int(l[3]) self.nEntries = int(l[4]) self.nNode = int(l[5]) self.nArc = int(l[6]) self.nAff = int(l[7]) self.cStemming = l[8] if self.cStemming == "S": self.funcStemming = st.changeWordWithSuffixCode elif self.cStemming == "A": self.funcStemming = st.changeWordWithAffixCode else: self.funcStemming = st.noStemming self.nTag = self.nArcVal - self.nChar - self.nAff self.dChar = {} for i in range(1, self.nChar): self.dChar[self.lArcVal[i]] = i |
︙ | ︙ |
Modified gc_core/py/str_transform.py from [e86906e5ce] to [7df400eceb].
︙ | ︙ | |||
69 70 71 72 73 74 75 | jSfx = 0 for i in range(min(len(sFlex), len(sStem))): if sFlex[i] != sStem[i]: break jSfx += 1 return chr(len(sFlex)-jSfx+48) + sStem[jSfx:] | | | | | 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | jSfx = 0 for i in range(min(len(sFlex), len(sStem))): if sFlex[i] != sStem[i]: break jSfx += 1 return chr(len(sFlex)-jSfx+48) + sStem[jSfx:] def changeWordWithSuffixCode (sWord, sSfxCode): if sSfxCode == "0": return sWord return sWord[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:] if sSfxCode[0] != '0' else sWord + sSfxCode[1:] # Prefix and suffix def defineAffixCode (sFlex, sStem): """ Returns a string defining how to get stem from flexion. Examples: "0" if stem = flexion "stem" if no common substring |
︙ | ︙ | |||
120 121 122 123 124 125 126 | if M[x][y] > longest: longest = M[x][y] x_longest = x else: M[x][y] = 0 return s1[x_longest-longest : x_longest] | | | | | | 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | if M[x][y] > longest: longest = M[x][y] x_longest = x else: M[x][y] = 0 return s1[x_longest-longest : x_longest] def changeWordWithAffixCode (sWord, sAffCode): if sAffCode == "0": return sWord if '/' not in sAffCode: return "# error #" sPfxCode, sSfxCode = sAffCode.split('/') sWord = sPfxCode[1:] + sWord[(ord(sPfxCode[0])-48):] return sWord[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:] if sSfxCode[0] != '0' else sWord + sSfxCode[1:] |