Grammalecte  Check-in [7c91af2a36]

Overview
Comment:[fr] phonet_simil: récupérer automatiquement les homophones des infinitifs du premier groupe
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | fr | new_feature
Files: files | file ages | folders
SHA3-256: 7c91af2a369676718567ab7812760e06db8da023cee1584d60e3517727b46dc3
User & Date: olr on 2017-06-23 22:30:16
Other Links: manifest | tags
Context
2017-06-23
22:46
[build] more options for build_data check-in: 891cbb4e9d user: olr tags: trunk, build
22:30
[fr] phonet_simil: récupérer automatiquement les homophones des infinitifs du premier groupe check-in: 7c91af2a36 user: olr tags: trunk, fr, new_feature
22:04
[fr] phonet_simil: màj + nouvelles entrées check-in: 88bae35a66 user: olr tags: trunk, fr
Changes

Modified gc_lang/fr/build_data.py from [9294fbef92] to [79b36913b6].

1
2
3
4
5
6
7
8
9
10
11
12
13

14
15
16
17
18
19
20
#!python3

# FRENCH DATA BUILDER
#
# by Olivier R.
# License: MPL 2

import json
import os

import grammalecte.ibdawg as ibdawg
from grammalecte.echo import echo
from grammalecte.str_transform import defineSuffixCode



class cd:
    """Context manager for changing the current working directory"""
    def __init__ (self, newPath):
        self.newPath = os.path.expanduser(newPath)














>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#!python3

# FRENCH DATA BUILDER
#
# by Olivier R.
# License: MPL 2

import json
import os

import grammalecte.ibdawg as ibdawg
from grammalecte.echo import echo
from grammalecte.str_transform import defineSuffixCode
import grammalecte.fr.conj as conj


class cd:
    """Context manager for changing the current working directory"""
    def __init__ (self, newPath):
        self.newPath = os.path.expanduser(newPath)

265
266
267
268
269
270
271
272






273
274
275
276
277
278
279
        return

    with open(sp+"/data/phonet_simil.txt", 'r', encoding='utf-8') as hSrc:
        # set of homophonic words
        lSet = []
        for sLine in hSrc.readlines():
            if not sLine.startswith("#") and sLine.strip():
                lSet.append(sorted(sLine.strip().split()))






        # dictionary of words
        dWord = {}
        for i, aSet in enumerate(lSet):
            for sWord in aSet:
                if oDict.lookup(sWord):
                    dWord[sWord] = i  # warning, what if word in several sets?
                else:







|
>
>
>
>
>
>







266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
        return

    with open(sp+"/data/phonet_simil.txt", 'r', encoding='utf-8') as hSrc:
        # set of homophonic words
        lSet = []
        for sLine in hSrc.readlines():
            if not sLine.startswith("#") and sLine.strip():
                aWord = set(sLine.strip().split())
                aMore = set()
                for sWord in aWord:
                    if sWord.endswith("er") and conj.isVerb(sWord):
                        aMore = aMore.union(conj.getConjSimilInfiV1(sWord))
                aWord = aWord.union(aMore)
                lSet.append(aWord)
        # dictionary of words
        dWord = {}
        for i, aSet in enumerate(lSet):
            for sWord in aSet:
                if oDict.lookup(sWord):
                    dWord[sWord] = i  # warning, what if word in several sets?
                else:

Modified gc_lang/fr/modules/conj.py from [06f8c4bce7] to [d5dfd58ad9].

93
94
95
96
97
98
99















100
101
102
103
104
105
106
        aSugg.add(_getConjWithTags(sInfi, tTags, ":PQ", ":Q4"))
        aSugg.discard("")
        # if there is only one past participle (epi inv), unreliable.
        if len(aSugg) == 1:
            aSugg.clear()
    return aSugg

















def _getTags (sVerb):
    "returns tuple of tags (usable with functions _getConjWithTags and _hasConjWithTags)"
    if sVerb not in _dVerb:
        return None
    return _lTags[_dVerb[sVerb][1]]








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
        aSugg.add(_getConjWithTags(sInfi, tTags, ":PQ", ":Q4"))
        aSugg.discard("")
        # if there is only one past participle (epi inv), unreliable.
        if len(aSugg) == 1:
            aSugg.clear()
    return aSugg


def getConjSimilInfiV1 (sInfi):
    if sInfi not in _dVerb:
        return set()
    tTags = _getTags(sInfi)
    aSugg = set()
    aSugg.add(_getConjWithTags(sInfi, tTags, ":Iq", ":2s"))
    aSugg.add(_getConjWithTags(sInfi, tTags, ":Iq", ":3s"))
    aSugg.add(_getConjWithTags(sInfi, tTags, ":Iq", ":3p"))
    aSugg.add(_getConjWithTags(sInfi, tTags, ":Is", ":1s"))
    aSugg.add(_getConjWithTags(sInfi, tTags, ":Ip", ":2p"))
    aSugg.add(_getConjWithTags(sInfi, tTags, ":Iq", ":2p"))
    aSugg.discard("")
    return aSugg


def _getTags (sVerb):
    "returns tuple of tags (usable with functions _getConjWithTags and _hasConjWithTags)"
    if sVerb not in _dVerb:
        return None
    return _lTags[_dVerb[sVerb][1]]