1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
#!python3
# FRENCH DATA BUILDER
#
# by Olivier R.
# License: MPL 2
import json
import os
import grammalecte.ibdawg as ibdawg
from grammalecte.echo import echo
from grammalecte.str_transform import defineSuffixCode
import grammalecte.fr.conj as conj
import grammalecte.tokenizer as tkz
|
>
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
#!python3
# FRENCH DATA BUILDER
#
# by Olivier R.
# License: MPL 2
import json
import os
import itertools
import grammalecte.ibdawg as ibdawg
from grammalecte.echo import echo
from grammalecte.str_transform import defineSuffixCode
import grammalecte.fr.conj as conj
import grammalecte.tokenizer as tkz
|
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
|
def makeLocutions (sp, bJS=False):
"compile list of locutions in JSON"
print("> Locutions ", end="")
print("(Python et JavaScript)" if bJS else "(Python seulement)")
dLocGraph = {}
oTokenizer = tkz.Tokenizer("fr")
for sLine in readFile(sp+"/data/locutions.txt"):
dCur = dLocGraph
sLoc, sTag = sLine.split("\t")
for oToken in oTokenizer.genTokens(sLoc.strip()):
sWord = oToken["sValue"]
if sWord not in dCur:
dCur[sWord] = {}
dCur = dCur[sWord]
|
|
|
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
|
def makeLocutions (sp, bJS=False):
"compile list of locutions in JSON"
print("> Locutions ", end="")
print("(Python et JavaScript)" if bJS else "(Python seulement)")
dLocGraph = {}
oTokenizer = tkz.Tokenizer("fr")
for sLine in itertools.chain(readFile(sp+"/data/locutions.txt"), readFile(sp+"/data/locutions_vrac.txt")):
dCur = dLocGraph
sLoc, sTag = sLine.split("\t")
for oToken in oTokenizer.genTokens(sLoc.strip()):
sWord = oToken["sValue"]
if sWord not in dCur:
dCur[sWord] = {}
dCur = dCur[sWord]
|