Overview
| Comment: | [build][fr] phonet simil: merge sets if words belongs to several sets |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | fr | build |
| Files: | files | file ages | folders |
| SHA3-256: |
f82c3ce70e793ad3e001a615db95560f |
| User & Date: | olr on 2020-04-29 17:16:40 |
| Other Links: | manifest | tags |
Context
|
2020-04-29
| ||
| 19:05 | [build][fr] build_data.py: fix build for JS check-in: 6bc8dab4c2 user: olr tags: trunk, fr, build | |
| 17:16 | [build][fr] phonet simil: merge sets if words belongs to several sets check-in: f82c3ce70e user: olr tags: trunk, fr, build | |
| 17:08 | [fr] phonet_simil.txt update check-in: 582bf42669 user: olr tags: trunk, fr | |
Changes
Modified gc_lang/fr/build_data.py from [c910fde1c7] to [ce4f084f4e].
| ︙ | ︙ | |||
12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
import platform
import graphspell.ibdawg as ibdawg
from graphspell.echo import echo
from graphspell.str_transform import defineSuffixCode
import graphspell.tokenizer as tkz
oDict = None
class cd:
"""Context manager for changing the current working directory"""
def __init__ (self, newPath):
| > > | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
import platform
import graphspell.ibdawg as ibdawg
from graphspell.echo import echo
from graphspell.str_transform import defineSuffixCode
import graphspell.tokenizer as tkz
import gc_lang.fr.modules.conj as conj
oDict = None
class cd:
"""Context manager for changing the current working directory"""
def __init__ (self, newPath):
|
| ︙ | ︙ | |||
281 282 283 284 285 286 287 |
open(sp+"/modules-js/mfsp_data.json", "w", encoding="utf-8", newline="\n").write(sCode)
def makePhonetTable (sp, bJS=False):
print("> Correspondances phonétiques ", end="")
print("(Python et JavaScript)" if bJS else "(Python seulement)")
| < < < | < | > > > > | | > > > > > > > | > > > | | 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 |
open(sp+"/modules-js/mfsp_data.json", "w", encoding="utf-8", newline="\n").write(sCode)
def makePhonetTable (sp, bJS=False):
print("> Correspondances phonétiques ", end="")
print("(Python et JavaScript)" if bJS else "(Python seulement)")
loadDictionary()
# set of homophonic words
lSet = []
for sLine in readFile(sp+"/data/phonet_simil.txt"):
lWord = sLine.split()
for sWord in lWord:
if sWord.endswith("er") and conj.isVerb(sWord):
lWord.extend(conj.getConjSimilInfiV1(sWord))
lSet.append(set(lWord))
# dictionary of words
dWord = {}
aMultiSetWord = set()
lNewSet = []
nAppend = 0
for i, aSet in enumerate(lSet):
for sWord in aSet:
if oDict.lookup(sWord):
if sWord not in dWord:
dWord[sWord] = i
else:
# word in several set
aMultiSetWord.add(sWord)
iSet = dWord[sWord]
lNewSet.append(lSet[iSet].union(aSet))
dWord[sWord] = len(lSet) + nAppend
nAppend += 1
else:
echo(f" Mot inconnu : <{sWord}>")
lSet.extend(lNewSet)
print(" Mots appartenant à plusieurs ensembles: ", ", ".join(aMultiSetWord))
# dictionary of morphologies
dMorph = {}
for sWord in dWord:
dMorph[sWord] = oDict.getMorph(sWord)
# write file for Python
sCode = "# generated data built in build_data.py (do not edit)\n\n" + \
"dWord = " + str(dWord) + "\n\n" + \
"lSet = " + str(lSet) + "\n\n" + \
"dMorph = " + str(dMorph) + "\n"
open(sp+"/modules/phonet_data.py", "w", encoding="utf-8", newline="\n").write(sCode)
if bJS:
## write file for JavaScript
|
| ︙ | ︙ | |||
363 364 365 366 367 368 369 |
print("========== Build Hunspell dictionaries ==========")
makeDictionaries(spLaunch, dVars['oxt_version'])
def after (spLaunch, dVars, bJS=False):
print("========== Build French data ==========")
makeMfsp(spLaunch, bJS)
| | | | 375 376 377 378 379 380 381 382 383 384 |
print("========== Build Hunspell dictionaries ==========")
makeDictionaries(spLaunch, dVars['oxt_version'])
def after (spLaunch, dVars, bJS=False):
print("========== Build French data ==========")
makeMfsp(spLaunch, bJS)
makePhonetTable(spLaunch, bJS)
makeConj(spLaunch, bJS)
makeLocutions(spLaunch, bJS)
|
Modified gc_lang/fr/modules/conj_data.py from [24f905ee0b] to [348618f642].
cannot compute difference between binary files
Modified gc_lang/fr/modules/phonet_data.py from [fcf5178674] to [30d2b2eb01].
cannot compute difference between binary files