Grammalecte  Diff

Differences From Artifact [9294fbef92]:

To Artifact [79b36913b6]:


1
2
3
4
5
6
7
8
9
10
11
12
13

14
15
16
17
18
19
20
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21













+







#!python3

# FRENCH DATA BUILDER
#
# by Olivier R.
# License: MPL 2

import json
import os

import grammalecte.ibdawg as ibdawg
from grammalecte.echo import echo
from grammalecte.str_transform import defineSuffixCode
import grammalecte.fr.conj as conj


class cd:
    """Context manager for changing the current working directory"""
    def __init__ (self, newPath):
        self.newPath = os.path.expanduser(newPath)

265
266
267
268
269
270
271
272







273
274
275
276
277
278
279
266
267
268
269
270
271
272

273
274
275
276
277
278
279
280
281
282
283
284
285
286







-
+
+
+
+
+
+
+







        return

    with open(sp+"/data/phonet_simil.txt", 'r', encoding='utf-8') as hSrc:
        # set of homophonic words
        lSet = []
        for sLine in hSrc.readlines():
            if not sLine.startswith("#") and sLine.strip():
                lSet.append(sorted(sLine.strip().split()))
                aWord = set(sLine.strip().split())
                aMore = set()
                for sWord in aWord:
                    if sWord.endswith("er") and conj.isVerb(sWord):
                        aMore = aMore.union(conj.getConjSimilInfiV1(sWord))
                aWord = aWord.union(aMore)
                lSet.append(aWord)
        # dictionary of words
        dWord = {}
        for i, aSet in enumerate(lSet):
            for sWord in aSet:
                if oDict.lookup(sWord):
                    dWord[sWord] = i  # warning, what if word in several sets?
                else: