Grammalecte  Diff

Differences From Artifact [9294fbef92]:

To Artifact [3f80b32195]:


1
2
3
4
5
6
7
8
9
10
11
12
13

14
15
16
17
18
19
20
#!python3

# FRENCH DATA BUILDER
#
# by Olivier R.
# License: MPL 2

import json
import os

import grammalecte.ibdawg as ibdawg
from grammalecte.echo import echo
from grammalecte.str_transform import defineSuffixCode



class cd:
    """Context manager for changing the current working directory"""
    def __init__ (self, newPath):
        self.newPath = os.path.expanduser(newPath)














>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#!python3

# FRENCH DATA BUILDER
#
# by Olivier R.
# License: MPL 2

import json
import os

import grammalecte.ibdawg as ibdawg
from grammalecte.echo import echo
from grammalecte.str_transform import defineSuffixCode
import grammalecte.fr.conj as conj


class cd:
    """Context manager for changing the current working directory"""
    def __init__ (self, newPath):
        self.newPath = os.path.expanduser(newPath)

265
266
267
268
269
270
271






272

273
274
275
276
277
278
279
        return

    with open(sp+"/data/phonet_simil.txt", 'r', encoding='utf-8') as hSrc:
        # set of homophonic words
        lSet = []
        for sLine in hSrc.readlines():
            if not sLine.startswith("#") and sLine.strip():






                lSet.append(sorted(sLine.strip().split()))

        # dictionary of words
        dWord = {}
        for i, aSet in enumerate(lSet):
            for sWord in aSet:
                if oDict.lookup(sWord):
                    dWord[sWord] = i  # warning, what if word in several sets?
                else:







>
>
>
>
>
>
|
>







266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
        return

    with open(sp+"/data/phonet_simil.txt", 'r', encoding='utf-8') as hSrc:
        # set of homophonic words
        lSet = []
        for sLine in hSrc.readlines():
            if not sLine.startswith("#") and sLine.strip():
                lWord = sLine.strip().split()
                aMore = set()
                for sWord in lWord:
                    if sWord.endswith("er") and conj.isVerb(sWord):
                        aMore = aMore.union(conj.getConjSimilInfiV1(sWord))
                lWord.extend(list(aMore))
                lSet.append(sorted(set(lWord)))
                #print(lWord)
        # dictionary of words
        dWord = {}
        for i, aSet in enumerate(lSet):
            for sWord in aSet:
                if oDict.lookup(sWord):
                    dWord[sWord] = i  # warning, what if word in several sets?
                else: