Grammalecte  Diff

Differences From Artifact [9294fbef92]:

To Artifact [3f80b32195]:


1
2
3
4
5
6
7
8
9
10
11
12
13

14
15
16
17
18
19
20
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21













+







#!python3

# FRENCH DATA BUILDER
#
# by Olivier R.
# License: MPL 2

import json
import os

import grammalecte.ibdawg as ibdawg
from grammalecte.echo import echo
from grammalecte.str_transform import defineSuffixCode
import grammalecte.fr.conj as conj


class cd:
    """Context manager for changing the current working directory"""
    def __init__ (self, newPath):
        self.newPath = os.path.expanduser(newPath)

265
266
267
268
269
270
271






272


273
274
275
276
277
278
279
266
267
268
269
270
271
272
273
274
275
276
277
278

279
280
281
282
283
284
285
286
287







+
+
+
+
+
+
-
+
+







        return

    with open(sp+"/data/phonet_simil.txt", 'r', encoding='utf-8') as hSrc:
        # set of homophonic words
        lSet = []
        for sLine in hSrc.readlines():
            if not sLine.startswith("#") and sLine.strip():
                lWord = sLine.strip().split()
                aMore = set()
                for sWord in lWord:
                    if sWord.endswith("er") and conj.isVerb(sWord):
                        aMore = aMore.union(conj.getConjSimilInfiV1(sWord))
                lWord.extend(list(aMore))
                lSet.append(sorted(sLine.strip().split()))
                lSet.append(sorted(set(lWord)))
                #print(lWord)
        # dictionary of words
        dWord = {}
        for i, aSet in enumerate(lSet):
            for sWord in aSet:
                if oDict.lookup(sWord):
                    dWord[sWord] = i  # warning, what if word in several sets?
                else: