1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
+
|
#!python3
# FRENCH DATA BUILDER
#
# by Olivier R.
# License: MPL 2
import json
import os
import grammalecte.ibdawg as ibdawg
from grammalecte.echo import echo
from grammalecte.str_transform import defineSuffixCode
import grammalecte.fr.conj as conj
class cd:
"""Context manager for changing the current working directory"""
def __init__ (self, newPath):
self.newPath = os.path.expanduser(newPath)
|
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
|
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
|
-
+
+
+
+
+
+
+
|
return
with open(sp+"/data/phonet_simil.txt", 'r', encoding='utf-8') as hSrc:
# set of homophonic words
lSet = []
for sLine in hSrc.readlines():
if not sLine.startswith("#") and sLine.strip():
lSet.append(sorted(sLine.strip().split()))
aWord = set(sLine.strip().split())
aMore = set()
for sWord in aWord:
if sWord.endswith("er") and conj.isVerb(sWord):
aMore = aMore.union(conj.getConjSimilInfiV1(sWord))
aWord = aWord.union(aMore)
lSet.append(aWord)
# dictionary of words
dWord = {}
for i, aSet in enumerate(lSet):
for sWord in aSet:
if oDict.lookup(sWord):
dWord[sWord] = i # warning, what if word in several sets?
else:
|