Overview
Comment: | [fr] phonet_simil: récupérer automatiquement les homophones des infinitifs du premier groupe |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | fr | new_feature |
Files: | files | file ages | folders |
SHA3-256: |
7c91af2a369676718567ab7812760e06 |
User & Date: | olr on 2017-06-23 22:30:16 |
Other Links: | manifest | tags |
Context
2017-06-23
| ||
22:46 | [build] more options for build_data check-in: 891cbb4e9d user: olr tags: trunk, build | |
22:30 | [fr] phonet_simil: récupérer automatiquement les homophones des infinitifs du premier groupe check-in: 7c91af2a36 user: olr tags: trunk, fr, new_feature | |
22:04 | [fr] phonet_simil: màj + nouvelles entrées check-in: 88bae35a66 user: olr tags: trunk, fr | |
Changes
Modified gc_lang/fr/build_data.py from [9294fbef92] to [79b36913b6].
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | #!python3 # FRENCH DATA BUILDER # # by Olivier R. # License: MPL 2 import json import os import grammalecte.ibdawg as ibdawg from grammalecte.echo import echo from grammalecte.str_transform import defineSuffixCode class cd: """Context manager for changing the current working directory""" def __init__ (self, newPath): self.newPath = os.path.expanduser(newPath) | > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | #!python3 # FRENCH DATA BUILDER # # by Olivier R. # License: MPL 2 import json import os import grammalecte.ibdawg as ibdawg from grammalecte.echo import echo from grammalecte.str_transform import defineSuffixCode import grammalecte.fr.conj as conj class cd: """Context manager for changing the current working directory""" def __init__ (self, newPath): self.newPath = os.path.expanduser(newPath) |
︙ | ︙ | |||
265 266 267 268 269 270 271 | return with open(sp+"/data/phonet_simil.txt", 'r', encoding='utf-8') as hSrc: # set of homophonic words lSet = [] for sLine in hSrc.readlines(): if not sLine.startswith("#") and sLine.strip(): | | > > > > > > | 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 | return with open(sp+"/data/phonet_simil.txt", 'r', encoding='utf-8') as hSrc: # set of homophonic words lSet = [] for sLine in hSrc.readlines(): if not sLine.startswith("#") and sLine.strip(): aWord = set(sLine.strip().split()) aMore = set() for sWord in aWord: if sWord.endswith("er") and conj.isVerb(sWord): aMore = aMore.union(conj.getConjSimilInfiV1(sWord)) aWord = aWord.union(aMore) lSet.append(aWord) # dictionary of words dWord = {} for i, aSet in enumerate(lSet): for sWord in aSet: if oDict.lookup(sWord): dWord[sWord] = i # warning, what if word in several sets? else: |
︙ | ︙ |
Modified gc_lang/fr/modules/conj.py from [06f8c4bce7] to [d5dfd58ad9].
︙ | ︙ | |||
93 94 95 96 97 98 99 100 101 102 103 104 105 106 | aSugg.add(_getConjWithTags(sInfi, tTags, ":PQ", ":Q4")) aSugg.discard("") # if there is only one past participle (epi inv), unreliable. if len(aSugg) == 1: aSugg.clear() return aSugg def _getTags (sVerb): "returns tuple of tags (usable with functions _getConjWithTags and _hasConjWithTags)" if sVerb not in _dVerb: return None return _lTags[_dVerb[sVerb][1]] | > > > > > > > > > > > > > > > | 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | aSugg.add(_getConjWithTags(sInfi, tTags, ":PQ", ":Q4")) aSugg.discard("") # if there is only one past participle (epi inv), unreliable. if len(aSugg) == 1: aSugg.clear() return aSugg def getConjSimilInfiV1 (sInfi): if sInfi not in _dVerb: return set() tTags = _getTags(sInfi) aSugg = set() aSugg.add(_getConjWithTags(sInfi, tTags, ":Iq", ":2s")) aSugg.add(_getConjWithTags(sInfi, tTags, ":Iq", ":3s")) aSugg.add(_getConjWithTags(sInfi, tTags, ":Iq", ":3p")) aSugg.add(_getConjWithTags(sInfi, tTags, ":Is", ":1s")) aSugg.add(_getConjWithTags(sInfi, tTags, ":Ip", ":2p")) aSugg.add(_getConjWithTags(sInfi, tTags, ":Iq", ":2p")) aSugg.discard("") return aSugg def _getTags (sVerb): "returns tuple of tags (usable with functions _getConjWithTags and _hasConjWithTags)" if sVerb not in _dVerb: return None return _lTags[_dVerb[sVerb][1]] |
︙ | ︙ |