Grammalecte  Diff

Differences From Artifact [39bc8d4153]:

To Artifact [732462dec8]:


1
2
3
4
5
6
7
8
9

10
11
12
13
14
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15









+





#!python3

# FRENCH DATA BUILDER
#
# by Olivier R.
# License: MPL 2

import json
import os
import itertools

import grammalecte.ibdawg as ibdawg
from grammalecte.echo import echo
from grammalecte.str_transform import defineSuffixCode
import grammalecte.fr.conj as conj
315
316
317
318
319
320

321
322
323
324
325
316
317
318
319
320

321
322
323
324
325
326





-
+





    "compile list of locutions in JSON"
    print("> Locutions ", end="")
    print("(Python et JavaScript)"  if bJS  else "(Python seulement)")
    dLocGraph = {}
    oTokenizer = tkz.Tokenizer("fr")
    for sLine in readFile(sp+"/data/locutions.txt"):
    for sLine in itertools.chain(readFile(sp+"/data/locutions.txt"), readFile(sp+"/data/locutions_vrac.txt")):
        dCur = dLocGraph
        sLoc, sTag = sLine.split("\t")
        for oToken in oTokenizer.genTokens(sLoc.strip()):
            sWord = oToken["sValue"]
            if sWord not in dCur: