266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
|
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
|
-
+
-
+
-
-
+
+
+
|
return
with open(sp+"/data/phonet_simil.txt", 'r', encoding='utf-8') as hSrc:
# set of homophonic words
lSet = []
for sLine in hSrc.readlines():
if not sLine.startswith("#") and sLine.strip():
aWord = set(sLine.strip().split())
lWord = sLine.strip().split()
aMore = set()
for sWord in aWord:
for sWord in lWord:
if sWord.endswith("er") and conj.isVerb(sWord):
aMore = aMore.union(conj.getConjSimilInfiV1(sWord))
aWord = aWord.union(aMore)
lSet.append(aWord)
lWord.extend(list(aMore))
lSet.append(lWord)
#print(lWord)
# dictionary of words
dWord = {}
for i, aSet in enumerate(lSet):
for sWord in aSet:
if oDict.lookup(sWord):
dWord[sWord] = i # warning, what if word in several sets?
else:
|