Overview
| Comment: | [core] str_transform: change functions names |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | core |
| Files: | files | file ages | folders |
| SHA3-256: |
766f20e23c4c92bb43d777545a474447 |
| User & Date: | olr on 2017-06-23 17:25:20 |
| Other Links: | manifest | tags |
Context
|
2017-06-23
| ||
| 19:23 | [build] use one dictionary name instead of two check-in: cfc69abb68 user: olr tags: trunk, build | |
| 17:25 | [core] str_transform: change functions names check-in: 766f20e23c user: olr tags: trunk, core | |
| 17:11 | [core] dawg: compressed lexicon check-in: e5f3698eb4 user: olr tags: trunk, build, new_feature | |
Changes
Modified gc_core/py/dawg.py from [7e6ed7295c] to [ddd6fe1cc6].
| ︙ | ︙ | |||
14 15 16 17 18 19 20 | import collections from . import str_transform as st from .progressbar import ProgressBar def readFile (spf): | | | 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
import collections
from . import str_transform as st
from .progressbar import ProgressBar
def readFile (spf):
print(" < Read lexicon: " + spf)
if os.path.isfile(spf):
with open(spf, "r", encoding="utf-8") as hSrc:
for sLine in hSrc:
sLine = sLine.strip()
if sLine and not sLine.startswith("#"):
yield sLine
else:
|
| ︙ | ︙ | |||
65 66 67 68 69 70 71 |
continue
sFlex, sStem = sLine.split("\t")
else:
sFlex = sStem = sLine
#print(sFlex, sStem, sTag)
yield (sFlex, sStem, sTag)
if sTag2:
| | | 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
continue
sFlex, sStem = sLine.split("\t")
else:
sFlex = sStem = sLine
#print(sFlex, sStem, sTag)
yield (sFlex, sStem, sTag)
if sTag2:
sFlex2 = st.changeWordWithSuffixCode(sFlex, sSfxCode)
#print(sFlex2, sStem, sTag2)
yield (sFlex2, sStem, sTag2)
if nErr:
print(" # Lines ignored: {:>10}".format(nErr))
|
| ︙ | ︙ | |||
159 160 161 162 163 164 165 |
self.nChar = len(dChar)
self.nAff = nAff
self.lArcVal = lVal
self.nArcVal = len(lVal)
self.nTag = self.nArcVal - self.nChar - nAff
self.cStemming = cStemming
if cStemming == "A":
| | | | 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
self.nChar = len(dChar)
self.nAff = nAff
self.lArcVal = lVal
self.nArcVal = len(lVal)
self.nTag = self.nArcVal - self.nChar - nAff
self.cStemming = cStemming
if cStemming == "A":
self.funcStemming = st.changeWordWithAffixCode
elif cStemming == "S":
self.funcStemming = st.changeWordWithSuffixCode
else:
self.funcStemming = st.noStemming
# build
lWord.sort()
oProgBar = ProgressBar(0, len(lWord))
for word in lWord:
|
| ︙ | ︙ |
Modified gc_core/py/ibdawg.py from [9ce1ce821d] to [095d971150].
| ︙ | ︙ | |||
40 41 42 43 44 45 46 |
self.nBytesNodeAddress = int(l[3])
self.nEntries = int(l[4])
self.nNode = int(l[5])
self.nArc = int(l[6])
self.nAff = int(l[7])
self.cStemming = l[8]
if self.cStemming == "S":
| | | | 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
self.nBytesNodeAddress = int(l[3])
self.nEntries = int(l[4])
self.nNode = int(l[5])
self.nArc = int(l[6])
self.nAff = int(l[7])
self.cStemming = l[8]
if self.cStemming == "S":
self.funcStemming = st.changeWordWithSuffixCode
elif self.cStemming == "A":
self.funcStemming = st.changeWordWithAffixCode
else:
self.funcStemming = st.noStemming
self.nTag = self.nArcVal - self.nChar - self.nAff
self.dChar = {}
for i in range(1, self.nChar):
self.dChar[self.lArcVal[i]] = i
|
| ︙ | ︙ |
Modified gc_core/py/str_transform.py from [e86906e5ce] to [7df400eceb].
| ︙ | ︙ | |||
69 70 71 72 73 74 75 |
jSfx = 0
for i in range(min(len(sFlex), len(sStem))):
if sFlex[i] != sStem[i]:
break
jSfx += 1
return chr(len(sFlex)-jSfx+48) + sStem[jSfx:]
| | | | | 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
jSfx = 0
for i in range(min(len(sFlex), len(sStem))):
if sFlex[i] != sStem[i]:
break
jSfx += 1
return chr(len(sFlex)-jSfx+48) + sStem[jSfx:]
def changeWordWithSuffixCode (sWord, sSfxCode):
if sSfxCode == "0":
return sWord
return sWord[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:] if sSfxCode[0] != '0' else sWord + sSfxCode[1:]
# Prefix and suffix
def defineAffixCode (sFlex, sStem):
""" Returns a string defining how to get stem from flexion. Examples:
"0" if stem = flexion
"stem" if no common substring
|
| ︙ | ︙ | |||
120 121 122 123 124 125 126 |
if M[x][y] > longest:
longest = M[x][y]
x_longest = x
else:
M[x][y] = 0
return s1[x_longest-longest : x_longest]
| | | | | | 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
if M[x][y] > longest:
longest = M[x][y]
x_longest = x
else:
M[x][y] = 0
return s1[x_longest-longest : x_longest]
def changeWordWithAffixCode (sWord, sAffCode):
if sAffCode == "0":
return sWord
if '/' not in sAffCode:
return "# error #"
sPfxCode, sSfxCode = sAffCode.split('/')
sWord = sPfxCode[1:] + sWord[(ord(sPfxCode[0])-48):]
return sWord[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:] if sSfxCode[0] != '0' else sWord + sSfxCode[1:]
|