Grammalecte  Check-in [854ccdff14]

Overview
Comment:[fr] locutions: màj du build
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fr | Lexicographe
Files: files | file ages | folders
SHA3-256: 854ccdff143b524ebe89d564be9a8daca8752a1ba6b9ff069431979ab933c1e5
User & Date: olr on 2017-11-04 07:31:34
Other Links: branch diff | manifest | tags
Context
2017-11-04
09:49
[fr] locutions: tri et corrections check-in: 68fbc93bb9 user: olr tags: fr, Lexicographe
07:31
[fr] locutions: màj du build check-in: 854ccdff14 user: olr tags: fr, Lexicographe
07:27
[fr] locutions: nouveaux fichiers check-in: 0645dffbea user: olr tags: fr, Lexicographe
Changes

Modified gc_lang/fr/build_data.py from [1e628c0406] to [1c5ad0c32d].

30
31
32
33
34
35
36


37
38
39
40
41
42
43
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45







+
+









def readFile (spf):
    if os.path.isfile(spf):
        with open(spf, "r", encoding="utf-8") as hSrc:
            for sLine in hSrc:
                sLine = sLine.strip()
                if sLine == "__END__":
                    break
                if sLine and not sLine.startswith("#"):
                    yield sLine
    else:
        raise OSError("# Error. File not found or not loadable: " + spf)


def makeDictionaries (sp, sVersion):
314
315
316
317
318
319
320
321
322
323








324
325
326
327
328
329
330
316
317
318
319
320
321
322



323
324
325
326
327
328
329
330
331
332
333
334
335
336
337







-
-
-
+
+
+
+
+
+
+
+








def makeLocutions (sp, bJS=False):
    "compile list of locutions in JSON"
    print("> Locutions ", end="")
    print("(Python et JavaScript)"  if bJS  else "(Python seulement)")
    dLocGraph = {}
    oTokenizer = tkz.Tokenizer("fr")
    for sLine in itertools.chain(readFile(sp+"/data/locutions.txt"), readFile(sp+"/data/locutions_vrac.txt")):
        if sLine == "__END__":
            break
    for sLine in itertools.chain(readFile(sp+"/data/locutions_adverbiales.txt"), \
                                 readFile(sp+"/data/locutions_prépositives.txt"), \
                                 readFile(sp+"/data/locutions_conjonctives.txt"), \
                                 readFile(sp+"/data/locutions_pronominales.txt"), \
                                 readFile(sp+"/data/locutions_adjectivales.txt"), \
                                 readFile(sp+"/data/locutions_interjectives.txt"), \
                                 readFile(sp+"/data/locutions_nominales.txt"), \
                                 readFile(sp+"/data/locutions_verbales.txt")):
        dCur = dLocGraph
        sLoc, sTag = sLine.split("\t")
        for oToken in oTokenizer.genTokens(sLoc.strip()):
            sWord = oToken["sValue"]
            if sWord not in dCur:
                dCur[sWord] = {}
            dCur = dCur[sWord]