Grammalecte  Diff

Differences From Artifact [c910fde1c7]:

To Artifact [6e865955c0]:


12
13
14
15
16
17
18


19
20
21
22
23
24
25
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27







+
+







import platform

import graphspell.ibdawg as ibdawg
from graphspell.echo import echo
from graphspell.str_transform import defineSuffixCode
import graphspell.tokenizer as tkz

import gc_lang.fr.modules.conj as conj


oDict = None


class cd:
    """Context manager for changing the current working directory"""
    def __init__ (self, newPath):
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299

300
301

302
303
304



305
306
307

308
309
310














311
312
313
314
315
316
317

318
319
320
321
322
323
324
283
284
285
286
287
288
289


290
291
292
293
294
295

296
297

298


299
300
301
302
303
304
305
306
307
308
309



310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329

330
331
332
333
334
335
336
337







-
-






-


-
+
-
-
+



+
+
+



+
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+






-
+







        open(sp+"/modules-js/mfsp_data.json", "w", encoding="utf-8", newline="\n").write(sCode)


def makePhonetTable (sp, bJS=False):
    print("> Correspondances phonétiques ", end="")
    print("(Python et JavaScript)"  if bJS  else "(Python seulement)")

    import gc_lang.fr.modules.conj as conj

    loadDictionary()

    # set of homophonic words
    lSet = []
    for sLine in readFile(sp+"/data/phonet_simil.txt"):
        lWord = sLine.split()
        aMore = set()
        for sWord in lWord:
            if sWord.endswith("er") and conj.isVerb(sWord):
                aMore = aMore.union(conj.getConjSimilInfiV1(sWord))
                lWord.extend(conj.getConjSimilInfiV1(sWord))
        lWord.extend(list(aMore))
        lSet.append(sorted(set(lWord)))
        lSet.append(set(lWord))

    # dictionary of words
    dWord = {}
    aMultiSetWord = set()
    lNewSet = []
    nAppend = 0
    for i, aSet in enumerate(lSet):
        for sWord in aSet:
            if oDict.lookup(sWord):
                if sWord not in dWord:
                dWord[sWord] = i  # warning, what if word in several sets?
            else:
                echo("Mot inconnu : " + sWord)
                    dWord[sWord] = i
                else:
                    # word in several set
                    aMultiSetWord.add(sWord)
                    iSet = dWord[sWord]
                    lNewSet.append(lSet[iSet].union(aSet))
                    dWord[sWord] = len(lSet) + nAppend
                    nAppend += 1
            else:
                echo(f"  Mot inconnu : <{sWord}>")
    lSet.extend(lNewSet)
    lSet = [ sorted(aSet) for aSet in lSet ]
    print("  Mots appartenant à plusieurs ensembles: ", ", ".join(aMultiSetWord))

    # dictionary of morphologies
    dMorph = {}
    for sWord in dWord:
        dMorph[sWord] = oDict.getMorph(sWord)

    # write file for Python
    sCode = "# generated data (do not edit)\n\n" + \
    sCode = "# generated data built in build_data.py (do not edit)\n\n" + \
            "dWord = " + str(dWord) + "\n\n" + \
            "lSet = " + str(lSet) + "\n\n" + \
            "dMorph = " + str(dMorph) + "\n"
    open(sp+"/modules/phonet_data.py", "w", encoding="utf-8", newline="\n").write(sCode)

    if bJS:
        ## write file for JavaScript
363
364
365
366
367
368
369
370
371


372
376
377
378
379
380
381
382


383
384
385







-
-
+
+

    print("========== Build Hunspell dictionaries ==========")
    makeDictionaries(spLaunch, dVars['oxt_version'])


def after (spLaunch, dVars, bJS=False):
    print("========== Build French data ==========")
    makeMfsp(spLaunch, bJS)
    makeConj(spLaunch, bJS)
    makePhonetTable(spLaunch, bJS)
    makePhonetTable(spLaunch, bJS)
    makeConj(spLaunch, bJS)
    makeLocutions(spLaunch, bJS)