67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
|
else:
raise ValueError("# Error. Unknown stemming code: {}".format(cStemming))
aEntry = set()
lChar = ['']; dChar = {}; nChar = 1; dCharOccur = {}
lAff = []; dAff = {}; nAff = 0; dAffOccur = {}
lTag = []; dTag = {}; nTag = 0; dTagOccur = {}
nErr = 0
self.a2grams = set()
try:
zFilter = re.compile(sSelectFilterRegex) if sSelectFilterRegex else None
except:
print(" # Error. Wrong filter regex. Filter ignored.")
traceback.print_exc()
zFilter = None
# read lexicon
if type(src) is str:
iterable = readFile(src)
else:
iterable = src
for sFlex, sStem, sTag in iterable:
if not zFilter or zFilter.search(sTag):
self.a2grams.update(st.getNgrams(sFlex))
addWordToCharDict(sFlex)
|
<
|
|
|
|
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
|
else:
raise ValueError("# Error. Unknown stemming code: {}".format(cStemming))
aEntry = set()
lChar = ['']; dChar = {}; nChar = 1; dCharOccur = {}
lAff = []; dAff = {}; nAff = 0; dAffOccur = {}
lTag = []; dTag = {}; nTag = 0; dTagOccur = {}
self.a2grams = set()
try:
zFilter = re.compile(sSelectFilterRegex) if sSelectFilterRegex else None
except re.error:
print("# Error. Wrong filter regex. Filter ignored: ", zFilter)
traceback.print_exc()
zFilter = None
# read lexicon
if isinstance(src, str):
iterable = readFile(src)
else:
iterable = src
for sFlex, sStem, sTag in iterable:
if not zFilter or zFilter.search(sTag):
self.a2grams.update(st.getNgrams(sFlex))
addWordToCharDict(sFlex)
|
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
|
aEntry = None
# Dictionary of arc values occurrency, to sort arcs of each node
dValOccur = dict( [ (dChar[c], dCharOccur[c]) for c in dChar ] \
+ [ (dAff[aff]+nChar, dAffOccur[aff]) for aff in dAff ] \
+ [ (dTag[tag]+nChar+nAff, dTagOccur[tag]) for tag in dTag ] )
self.sFileName = src if type(src) is str else "[None]"
self.sLangCode = sLangCode
self.sLangName = sLangName
self.sDicName = sDicName
self.sDescription = sDescription
if dLexiconData:
self.sLangCode = dLexiconData.get("LangCode", self.sLangCode)
self.sLangName = dLexiconData.get("LangName", self.sLangName)
|
|
|
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
|
aEntry = None
# Dictionary of arc values occurrency, to sort arcs of each node
dValOccur = dict( [ (dChar[c], dCharOccur[c]) for c in dChar ] \
+ [ (dAff[aff]+nChar, dAffOccur[aff]) for aff in dAff ] \
+ [ (dTag[tag]+nChar+nAff, dTagOccur[tag]) for tag in dTag ] )
self.sFileName = src if isinstance(src, str) else "[None]"
self.sLangCode = sLangCode
self.sLangName = sLangName
self.sDicName = sDicName
self.sDescription = sDescription
if dLexiconData:
self.sLangCode = dLexiconData.get("LangCode", self.sLangCode)
self.sLangName = dLexiconData.get("LangName", self.sLangName)
|
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
|
def select (self, sPattern=""):
"generator: returns all entries which morphology fits <sPattern>"
zPattern = None
if sPattern:
try:
zPattern = re.compile(sPattern)
except:
print("# Error in regex pattern")
traceback.print_exc()
yield from self._select(zPattern, self.oRoot, "")
def _select (self, zPattern, oNode, sWord):
# recursive generator
for nVal, oNextNode in oNode.arcs.items():
|
|
|
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
|
def select (self, sPattern=""):
"generator: returns all entries which morphology fits <sPattern>"
zPattern = None
if sPattern:
try:
zPattern = re.compile(sPattern)
except re.error:
print("# Error in regex pattern")
traceback.print_exc()
yield from self._select(zPattern, self.oRoot, "")
def _select (self, zPattern, oNode, sWord):
# recursive generator
for nVal, oNextNode in oNode.arcs.items():
|