Grammalecte  Diff

Differences From Artifact [7e6ed7295c]:

To Artifact [ddd6fe1cc6]:


14
15
16
17
18
19
20
21

22
23
24
25
26
27
28
14
15
16
17
18
19
20

21
22
23
24
25
26
27
28







-
+







import collections

from . import str_transform as st
from .progressbar import ProgressBar


def readFile (spf):
    print("Read lexicon: " + spf)
    print(" < Read lexicon: " + spf)
    if os.path.isfile(spf):
        with open(spf, "r", encoding="utf-8") as hSrc:
            for sLine in hSrc:
                sLine = sLine.strip()
                if sLine and not sLine.startswith("#"):
                    yield sLine
    else:
65
66
67
68
69
70
71
72

73
74
75
76
77
78
79
65
66
67
68
69
70
71

72
73
74
75
76
77
78
79







-
+







                        continue
                    sFlex, sStem = sLine.split("\t")
                else:
                    sFlex = sStem = sLine
                #print(sFlex, sStem, sTag)
                yield (sFlex, sStem, sTag)
                if sTag2:
                    sFlex2 = st.getStemFromSuffixCode(sFlex, sSfxCode)
                    sFlex2 = st.changeWordWithSuffixCode(sFlex, sSfxCode)
                    #print(sFlex2, sStem, sTag2)
                    yield (sFlex2, sStem, sTag2)
    if nErr:
        print(" # Lines ignored: {:>10}".format(nErr))



159
160
161
162
163
164
165
166

167
168

169
170
171
172
173
174
175
159
160
161
162
163
164
165

166
167

168
169
170
171
172
173
174
175







-
+

-
+







        self.nChar = len(dChar)
        self.nAff = nAff
        self.lArcVal = lVal
        self.nArcVal = len(lVal)
        self.nTag = self.nArcVal - self.nChar - nAff
        self.cStemming = cStemming
        if cStemming == "A":
            self.funcStemming = st.getStemFromAffixCode
            self.funcStemming = st.changeWordWithAffixCode
        elif cStemming == "S":    
            self.funcStemming = st.getStemFromSuffixCode
            self.funcStemming = st.changeWordWithSuffixCode
        else:
            self.funcStemming = st.noStemming
        
        # build
        lWord.sort()
        oProgBar = ProgressBar(0, len(lWord))
        for word in lWord: