30
31
32
33
34
35
36
37
38
39
40
41
42
43
|
def readFile (spf):
if os.path.isfile(spf):
with open(spf, "r", encoding="utf-8") as hSrc:
for sLine in hSrc:
sLine = sLine.strip()
if sLine and not sLine.startswith("#"):
yield sLine
else:
raise OSError("# Error. File not found or not loadable: " + spf)
def makeDictionaries (sp, sVersion):
|
>
>
|
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
|
def readFile (spf):
if os.path.isfile(spf):
with open(spf, "r", encoding="utf-8") as hSrc:
for sLine in hSrc:
sLine = sLine.strip()
if sLine == "__END__":
break
if sLine and not sLine.startswith("#"):
yield sLine
else:
raise OSError("# Error. File not found or not loadable: " + spf)
def makeDictionaries (sp, sVersion):
|
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
|
def makeLocutions (sp, bJS=False):
"compile list of locutions in JSON"
print("> Locutions ", end="")
print("(Python et JavaScript)" if bJS else "(Python seulement)")
dLocGraph = {}
oTokenizer = tkz.Tokenizer("fr")
for sLine in itertools.chain(readFile(sp+"/data/locutions.txt"), readFile(sp+"/data/locutions_vrac.txt")):
if sLine == "__END__":
break
dCur = dLocGraph
sLoc, sTag = sLine.split("\t")
for oToken in oTokenizer.genTokens(sLoc.strip()):
sWord = oToken["sValue"]
if sWord not in dCur:
dCur[sWord] = {}
dCur = dCur[sWord]
|
|
|
>
>
>
>
>
|
|
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
|
def makeLocutions (sp, bJS=False):
"compile list of locutions in JSON"
print("> Locutions ", end="")
print("(Python et JavaScript)" if bJS else "(Python seulement)")
dLocGraph = {}
oTokenizer = tkz.Tokenizer("fr")
for sLine in itertools.chain(readFile(sp+"/data/locutions_adverbiales.txt"), \
readFile(sp+"/data/locutions_prépositives.txt"), \
readFile(sp+"/data/locutions_conjonctives.txt"), \
readFile(sp+"/data/locutions_pronominales.txt"), \
readFile(sp+"/data/locutions_adjectivales.txt"), \
readFile(sp+"/data/locutions_interjectives.txt"), \
readFile(sp+"/data/locutions_nominales.txt"), \
readFile(sp+"/data/locutions_verbales.txt")):
dCur = dLocGraph
sLoc, sTag = sLine.split("\t")
for oToken in oTokenizer.genTokens(sLoc.strip()):
sWord = oToken["sValue"]
if sWord not in dCur:
dCur[sWord] = {}
dCur = dCur[sWord]
|