14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
import collections
from . import str_transform as st
from .progressbar import ProgressBar
def readFile (spf):
print("Read lexicon: " + spf)
if os.path.isfile(spf):
with open(spf, "r", encoding="utf-8") as hSrc:
for sLine in hSrc:
sLine = sLine.strip()
if sLine and not sLine.startswith("#"):
yield sLine
else:
|
|
|
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
import collections
from . import str_transform as st
from .progressbar import ProgressBar
def readFile (spf):
print(" < Read lexicon: " + spf)
if os.path.isfile(spf):
with open(spf, "r", encoding="utf-8") as hSrc:
for sLine in hSrc:
sLine = sLine.strip()
if sLine and not sLine.startswith("#"):
yield sLine
else:
|
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
|
continue
sFlex, sStem = sLine.split("\t")
else:
sFlex = sStem = sLine
#print(sFlex, sStem, sTag)
yield (sFlex, sStem, sTag)
if sTag2:
sFlex2 = st.getStemFromSuffixCode(sFlex, sSfxCode)
#print(sFlex2, sStem, sTag2)
yield (sFlex2, sStem, sTag2)
if nErr:
print(" # Lines ignored: {:>10}".format(nErr))
|
|
|
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
|
continue
sFlex, sStem = sLine.split("\t")
else:
sFlex = sStem = sLine
#print(sFlex, sStem, sTag)
yield (sFlex, sStem, sTag)
if sTag2:
sFlex2 = st.changeWordWithSuffixCode(sFlex, sSfxCode)
#print(sFlex2, sStem, sTag2)
yield (sFlex2, sStem, sTag2)
if nErr:
print(" # Lines ignored: {:>10}".format(nErr))
|
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
|
self.nChar = len(dChar)
self.nAff = nAff
self.lArcVal = lVal
self.nArcVal = len(lVal)
self.nTag = self.nArcVal - self.nChar - nAff
self.cStemming = cStemming
if cStemming == "A":
self.funcStemming = st.getStemFromAffixCode
elif cStemming == "S":
self.funcStemming = st.getStemFromSuffixCode
else:
self.funcStemming = st.noStemming
# build
lWord.sort()
oProgBar = ProgressBar(0, len(lWord))
for word in lWord:
|
|
|
|
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
|
self.nChar = len(dChar)
self.nAff = nAff
self.lArcVal = lVal
self.nArcVal = len(lVal)
self.nTag = self.nArcVal - self.nChar - nAff
self.cStemming = cStemming
if cStemming == "A":
self.funcStemming = st.changeWordWithAffixCode
elif cStemming == "S":
self.funcStemming = st.changeWordWithSuffixCode
else:
self.funcStemming = st.noStemming
# build
lWord.sort()
oProgBar = ProgressBar(0, len(lWord))
for word in lWord:
|