Grammalecte  Check-in [7c91af2a36]

Overview
Comment:[fr] phonet_simil: récupérer automatiquement les homophones des infinitifs du premier groupe
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | fr | new_feature
Files: files | file ages | folders
SHA3-256: 7c91af2a369676718567ab7812760e06db8da023cee1584d60e3517727b46dc3
User & Date: olr on 2017-06-23 22:30:16
Other Links: manifest | tags
Context
2017-06-23
22:46
[build] more options for build_data check-in: 891cbb4e9d user: olr tags: trunk, build
22:30
[fr] phonet_simil: récupérer automatiquement les homophones des infinitifs du premier groupe check-in: 7c91af2a36 user: olr tags: trunk, fr, new_feature
22:04
[fr] phonet_simil: màj + nouvelles entrées check-in: 88bae35a66 user: olr tags: trunk, fr
Changes

Modified gc_lang/fr/build_data.py from [9294fbef92] to [79b36913b6].

1
2
3
4
5
6
7
8
9
10
11
12
13

14
15
16
17
18
19
20
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21













+







#!python3

# FRENCH DATA BUILDER
#
# by Olivier R.
# License: MPL 2

import json
import os

import grammalecte.ibdawg as ibdawg
from grammalecte.echo import echo
from grammalecte.str_transform import defineSuffixCode
import grammalecte.fr.conj as conj


class cd:
    """Context manager for changing the current working directory"""
    def __init__ (self, newPath):
        self.newPath = os.path.expanduser(newPath)

265
266
267
268
269
270
271
272







273
274
275
276
277
278
279
266
267
268
269
270
271
272

273
274
275
276
277
278
279
280
281
282
283
284
285
286







-
+
+
+
+
+
+
+







        return

    with open(sp+"/data/phonet_simil.txt", 'r', encoding='utf-8') as hSrc:
        # set of homophonic words
        lSet = []
        for sLine in hSrc.readlines():
            if not sLine.startswith("#") and sLine.strip():
                lSet.append(sorted(sLine.strip().split()))
                aWord = set(sLine.strip().split())
                aMore = set()
                for sWord in aWord:
                    if sWord.endswith("er") and conj.isVerb(sWord):
                        aMore = aMore.union(conj.getConjSimilInfiV1(sWord))
                aWord = aWord.union(aMore)
                lSet.append(aWord)
        # dictionary of words
        dWord = {}
        for i, aSet in enumerate(lSet):
            for sWord in aSet:
                if oDict.lookup(sWord):
                    dWord[sWord] = i  # warning, what if word in several sets?
                else:

Modified gc_lang/fr/modules/conj.py from [06f8c4bce7] to [d5dfd58ad9].

93
94
95
96
97
98
99















100
101
102
103
104
105
106
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







        aSugg.add(_getConjWithTags(sInfi, tTags, ":PQ", ":Q4"))
        aSugg.discard("")
        # if there is only one past participle (epi inv), unreliable.
        if len(aSugg) == 1:
            aSugg.clear()
    return aSugg


def getConjSimilInfiV1 (sInfi):
    if sInfi not in _dVerb:
        return set()
    tTags = _getTags(sInfi)
    aSugg = set()
    aSugg.add(_getConjWithTags(sInfi, tTags, ":Iq", ":2s"))
    aSugg.add(_getConjWithTags(sInfi, tTags, ":Iq", ":3s"))
    aSugg.add(_getConjWithTags(sInfi, tTags, ":Iq", ":3p"))
    aSugg.add(_getConjWithTags(sInfi, tTags, ":Is", ":1s"))
    aSugg.add(_getConjWithTags(sInfi, tTags, ":Ip", ":2p"))
    aSugg.add(_getConjWithTags(sInfi, tTags, ":Iq", ":2p"))
    aSugg.discard("")
    return aSugg


def _getTags (sVerb):
    "returns tuple of tags (usable with functions _getConjWithTags and _hasConjWithTags)"
    if sVerb not in _dVerb:
        return None
    return _lTags[_dVerb[sVerb][1]]