Grammalecte  Check-in [854ccdff14]

Overview
Comment:[fr] locutions: màj du build
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fr | Lexicographe
Files: files | file ages | folders
SHA3-256: 854ccdff143b524ebe89d564be9a8daca8752a1ba6b9ff069431979ab933c1e5
User & Date: olr on 2017-11-04 07:31:34
Other Links: branch diff | manifest | tags
Context
2017-11-04
09:49
[fr] locutions: tri et corrections check-in: 68fbc93bb9 user: olr tags: fr, Lexicographe
07:31
[fr] locutions: màj du build check-in: 854ccdff14 user: olr tags: fr, Lexicographe
07:27
[fr] locutions: nouveaux fichiers check-in: 0645dffbea user: olr tags: fr, Lexicographe
Changes

Modified gc_lang/fr/build_data.py from [1e628c0406] to [1c5ad0c32d].

30
31
32
33
34
35
36


37
38
39
40
41
42
43


def readFile (spf):
    if os.path.isfile(spf):
        with open(spf, "r", encoding="utf-8") as hSrc:
            for sLine in hSrc:
                sLine = sLine.strip()


                if sLine and not sLine.startswith("#"):
                    yield sLine
    else:
        raise OSError("# Error. File not found or not loadable: " + spf)


def makeDictionaries (sp, sVersion):







>
>







30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45


def readFile (spf):
    if os.path.isfile(spf):
        with open(spf, "r", encoding="utf-8") as hSrc:
            for sLine in hSrc:
                sLine = sLine.strip()
                if sLine == "__END__":
                    break
                if sLine and not sLine.startswith("#"):
                    yield sLine
    else:
        raise OSError("# Error. File not found or not loadable: " + spf)


def makeDictionaries (sp, sVersion):
314
315
316
317
318
319
320
321
322





323
324
325
326
327
328
329
330

def makeLocutions (sp, bJS=False):
    "compile list of locutions in JSON"
    print("> Locutions ", end="")
    print("(Python et JavaScript)"  if bJS  else "(Python seulement)")
    dLocGraph = {}
    oTokenizer = tkz.Tokenizer("fr")
    for sLine in itertools.chain(readFile(sp+"/data/locutions.txt"), readFile(sp+"/data/locutions_vrac.txt")):
        if sLine == "__END__":





            break
        dCur = dLocGraph
        sLoc, sTag = sLine.split("\t")
        for oToken in oTokenizer.genTokens(sLoc.strip()):
            sWord = oToken["sValue"]
            if sWord not in dCur:
                dCur[sWord] = {}
            dCur = dCur[sWord]







|
|
>
>
>
>
>
|







316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337

def makeLocutions (sp, bJS=False):
    "compile list of locutions in JSON"
    print("> Locutions ", end="")
    print("(Python et JavaScript)"  if bJS  else "(Python seulement)")
    dLocGraph = {}
    oTokenizer = tkz.Tokenizer("fr")
    for sLine in itertools.chain(readFile(sp+"/data/locutions_adverbiales.txt"), \
                                 readFile(sp+"/data/locutions_prépositives.txt"), \
                                 readFile(sp+"/data/locutions_conjonctives.txt"), \
                                 readFile(sp+"/data/locutions_pronominales.txt"), \
                                 readFile(sp+"/data/locutions_adjectivales.txt"), \
                                 readFile(sp+"/data/locutions_interjectives.txt"), \
                                 readFile(sp+"/data/locutions_nominales.txt"), \
                                 readFile(sp+"/data/locutions_verbales.txt")):
        dCur = dLocGraph
        sLoc, sTag = sLine.split("\t")
        for oToken in oTokenizer.genTokens(sLoc.strip()):
            sWord = oToken["sValue"]
            if sWord not in dCur:
                dCur[sWord] = {}
            dCur = dCur[sWord]