Overview
Comment: | [build][fr] phonet simil: merge sets if words belongs to several sets |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | fr | build |
Files: | files | file ages | folders |
SHA3-256: |
f82c3ce70e793ad3e001a615db95560f |
User & Date: | olr on 2020-04-29 17:16:40 |
Other Links: | manifest | tags |
Context
2020-04-29
| ||
19:05 | [build][fr] build_data.py: fix build for JS check-in: 6bc8dab4c2 user: olr tags: trunk, fr, build | |
17:16 | [build][fr] phonet simil: merge sets if words belongs to several sets check-in: f82c3ce70e user: olr tags: trunk, fr, build | |
17:08 | [fr] phonet_simil.txt update check-in: 582bf42669 user: olr tags: trunk, fr | |
Changes
Modified gc_lang/fr/build_data.py from [c910fde1c7] to [ce4f084f4e].
︙ | ︙ | |||
12 13 14 15 16 17 18 19 20 21 22 23 24 25 | import platform import graphspell.ibdawg as ibdawg from graphspell.echo import echo from graphspell.str_transform import defineSuffixCode import graphspell.tokenizer as tkz oDict = None class cd: """Context manager for changing the current working directory""" def __init__ (self, newPath): | > > | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | import platform import graphspell.ibdawg as ibdawg from graphspell.echo import echo from graphspell.str_transform import defineSuffixCode import graphspell.tokenizer as tkz import gc_lang.fr.modules.conj as conj oDict = None class cd: """Context manager for changing the current working directory""" def __init__ (self, newPath): |
︙ | ︙ | |||
281 282 283 284 285 286 287 | open(sp+"/modules-js/mfsp_data.json", "w", encoding="utf-8", newline="\n").write(sCode) def makePhonetTable (sp, bJS=False): print("> Correspondances phonétiques ", end="") print("(Python et JavaScript)" if bJS else "(Python seulement)") | < < < | < | > > > > | | > > > > > > > | > > > | | 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 | open(sp+"/modules-js/mfsp_data.json", "w", encoding="utf-8", newline="\n").write(sCode) def makePhonetTable (sp, bJS=False): print("> Correspondances phonétiques ", end="") print("(Python et JavaScript)" if bJS else "(Python seulement)") loadDictionary() # set of homophonic words lSet = [] for sLine in readFile(sp+"/data/phonet_simil.txt"): lWord = sLine.split() for sWord in lWord: if sWord.endswith("er") and conj.isVerb(sWord): lWord.extend(conj.getConjSimilInfiV1(sWord)) lSet.append(set(lWord)) # dictionary of words dWord = {} aMultiSetWord = set() lNewSet = [] nAppend = 0 for i, aSet in enumerate(lSet): for sWord in aSet: if oDict.lookup(sWord): if sWord not in dWord: dWord[sWord] = i else: # word in several set aMultiSetWord.add(sWord) iSet = dWord[sWord] lNewSet.append(lSet[iSet].union(aSet)) dWord[sWord] = len(lSet) + nAppend nAppend += 1 else: echo(f" Mot inconnu : <{sWord}>") lSet.extend(lNewSet) print(" Mots appartenant à plusieurs ensembles: ", ", ".join(aMultiSetWord)) # dictionary of morphologies dMorph = {} for sWord in dWord: dMorph[sWord] = oDict.getMorph(sWord) # write file for Python sCode = "# generated data built in build_data.py (do not edit)\n\n" + \ "dWord = " + str(dWord) + "\n\n" + \ "lSet = " + str(lSet) + "\n\n" + \ "dMorph = " + str(dMorph) + "\n" open(sp+"/modules/phonet_data.py", "w", encoding="utf-8", newline="\n").write(sCode) if bJS: ## write file for JavaScript |
︙ | ︙ | |||
363 364 365 366 367 368 369 | print("========== Build Hunspell dictionaries ==========") makeDictionaries(spLaunch, dVars['oxt_version']) def after (spLaunch, dVars, bJS=False): print("========== Build French data ==========") makeMfsp(spLaunch, bJS) | | | | 375 376 377 378 379 380 381 382 383 384 | print("========== Build Hunspell dictionaries ==========") makeDictionaries(spLaunch, dVars['oxt_version']) def after (spLaunch, dVars, bJS=False): print("========== Build French data ==========") makeMfsp(spLaunch, bJS) makePhonetTable(spLaunch, bJS) makeConj(spLaunch, bJS) makeLocutions(spLaunch, bJS) |
Modified gc_lang/fr/modules/conj_data.py from [24f905ee0b] to [348618f642].
cannot compute difference between binary files
Modified gc_lang/fr/modules/phonet_data.py from [fcf5178674] to [30d2b2eb01].
cannot compute difference between binary files