Comment: | [fr] lexicographe: restructuration des données, réduction de l’échantillon de test, élisons dorénavant acceptées |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | fr | Lexicographe |
Files: | files | file ages | folders |
SHA3-256: |
f8019de85cad93654fd7bd83f676fc05 |
User & Date: | olr on 2017-10-30 13:48:49 |
Other Links: | branch diff | manifest | tags |
2017-10-30
| ||
16:44 | [fr] lexicographe: minor changes check-in: 4e84b8a6cf user: olr tags: fr, Lexicographe | |
13:48 | [fr] lexicographe: restructuration des données, réduction de l’échantillon de test, élisons dorénavant acceptées check-in: f8019de85c user: olr tags: fr, Lexicographe | |
12:00 | [fr] lexicographe: clarification de code check-in: 83fbc36b7a user: olr tags: fr, Lexicographe | |
Modified gc_lang/fr/build_data.py from [b7abe812ee] to [f2198525f4].
︙ | ︙ | |||
8 9 10 11 12 13 14 15 16 17 18 19 20 21 | import json import os import grammalecte.ibdawg as ibdawg from grammalecte.echo import echo from grammalecte.str_transform import defineSuffixCode import grammalecte.fr.conj as conj class cd: """Context manager for changing the current working directory""" def __init__ (self, newPath): self.newPath = os.path.expanduser(newPath) | > | 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | import json import os import grammalecte.ibdawg as ibdawg from grammalecte.echo import echo from grammalecte.str_transform import defineSuffixCode import grammalecte.fr.conj as conj import grammalecte.tokenizer as tkz class cd: """Context manager for changing the current working directory""" def __init__ (self, newPath): self.newPath = os.path.expanduser(newPath) |
︙ | ︙ | |||
308 309 310 311 312 313 314 | def makeLocutions (sp, bJS=False): "compile list of locutions in JSON" print("> Locutions ", end="") print("(Python et JavaScript)" if bJS else "(Python seulement)") with open(sp+"/data/locutions.txt", 'r', encoding='utf-8') as hSrc: | | > < | > > | | < < > | | | 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 | def makeLocutions (sp, bJS=False): "compile list of locutions in JSON" print("> Locutions ", end="") print("(Python et JavaScript)" if bJS else "(Python seulement)") with open(sp+"/data/locutions.txt", 'r', encoding='utf-8') as hSrc: dLocGraph = {} oTokenizer = tkz.Tokenizer("fr") for sLine in hSrc.readlines(): if not sLine.startswith("#") and sLine.strip(): dCur = dLocGraph sLoc, sTag = sLine.strip().split("\t") for oToken in oTokenizer.genTokens(sLoc.strip()): sWord = oToken["sValue"] if sWord not in dCur: dCur[sWord] = {} dCur = dCur[sWord] dCur[":"] = sTag sCode = "# generated data (do not edit)\n\n" + \ "dLocutions = " + str(dLocGraph) + "\n" open(sp+"/modules/locutions_data.py", "w", encoding="utf-8", newline="\n").write(sCode) if bJS: open(sp+"/modules-js/locutions_data.json", "w", encoding="utf-8", newline="\n").write(json.dumps(dLocGraph, ensure_ascii=False)) def before (spLaunch, dVars, bJS=False): print("========== Build Hunspell dictionaries ==========") makeDictionaries(spLaunch, dVars['oxt_version']) def after (spLaunch, dVars, bJS=False): print("========== Build French data ==========") makeMfsp(spLaunch, bJS) makeConj(spLaunch, bJS) makePhonetTable(spLaunch, bJS) makeLocutions(spLaunch, bJS) |
Modified gc_lang/fr/data/locutions.txt from [443f1274df] to [e32c77973b].
more than 10,000 changes
Modified gc_lang/fr/modules-js/lexicographe.js from [57cce4f9d0] to [ffb5515826].
︙ | ︙ | |||
346 347 348 349 350 351 352 | aElem.push(oToken); } } } return aElem; } | < < < < | | | | | | | 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 | aElem.push(oToken); } } } return aElem; } getListOfTokensReduc (sText, bInfo = true) { let aTokenList = this.getListOfTokens(sText.replace("'", "’").trim(), false); let iKey = 0; let aElem = []; do { let oToken = aTokenList[iKey]; let sMorphLoc = ''; let aTokenTempList = [oToken]; if (oToken.sType == "WORD" || oToken.sType == "ELPFX"){ let iKeyTree = iKey + 1; let oLocNode = this.oLocGraph[oToken.sValue.toLowerCase()]; while (oLocNode) { let oTokenNext = aTokenList[iKeyTree]; iKeyTree++; if (oTokenNext) { oLocNode = oLocNode[oTokenNext.sValue.toLowerCase()]; } if (oLocNode && iKeyTree <= aTokenList.length) { sMorphLoc = oLocNode[":"]; aTokenTempList.push(oTokenNext); } else { break; } } } if (sMorphLoc) { let sWord = ''; for (let oTokenWord of aTokenTempList) { sWord += oTokenWord.sValue+' '; } iKey = iKey + aTokenTempList.length-1; let oTokenLocution = { 'nEnd': aTokenTempList[aTokenTempList.length-1].nEnd, 'nStart': aTokenTempList[0].nStart, 'sType': "LOC", 'sValue': sWord.replace('’ ','’').trim() }; if (bInfo) { let aFormatedTag = []; for (let sTagMulti of sMorphLoc.split('|') ){ aFormatedTag.push( this._formatTags(sTagMulti).replace(/( \(él.\))/g,'') ); } aElem.push({ sType: oTokenLocution.sType, sValue: oTokenLocution.sValue, aLabel: aFormatedTag }); |
︙ | ︙ |
Modified gc_lang/fr/modules-js/locutions_data.json from [249fef4e46] to [18410d013e].
cannot compute difference between binary files
Modified gc_lang/fr/modules-js/phonet_data.json from [45669375f1] to [63815e6d96].
cannot compute difference between binary files
Modified gc_lang/fr/modules/locutions_data.py from [cbeadb1ff5] to [02f99ec70a].
cannot compute difference between binary files
Modified gc_lang/fr/modules/phonet_data.py from [497cbd30f5] to [be2bb20c17].
cannot compute difference between binary files