Index: gc_lang/fr/build_data.py ================================================================== --- gc_lang/fr/build_data.py +++ gc_lang/fr/build_data.py @@ -32,10 +32,12 @@ def readFile (spf): if os.path.isfile(spf): with open(spf, "r", encoding="utf-8") as hSrc: for sLine in hSrc: sLine = sLine.strip() + if sLine == "__END__": + break if sLine and not sLine.startswith("#"): yield sLine else: raise OSError("# Error. File not found or not loadable: " + spf) @@ -316,13 +318,18 @@ "compile list of locutions in JSON" print("> Locutions ", end="") print("(Python et JavaScript)" if bJS else "(Python seulement)") dLocGraph = {} oTokenizer = tkz.Tokenizer("fr") - for sLine in itertools.chain(readFile(sp+"/data/locutions.txt"), readFile(sp+"/data/locutions_vrac.txt")): - if sLine == "__END__": - break + for sLine in itertools.chain(readFile(sp+"/data/locutions_adverbiales.txt"), \ + readFile(sp+"/data/locutions_prépositives.txt"), \ + readFile(sp+"/data/locutions_conjonctives.txt"), \ + readFile(sp+"/data/locutions_pronominales.txt"), \ + readFile(sp+"/data/locutions_adjectivales.txt"), \ + readFile(sp+"/data/locutions_interjectives.txt"), \ + readFile(sp+"/data/locutions_nominales.txt"), \ + readFile(sp+"/data/locutions_verbales.txt")): dCur = dLocGraph sLoc, sTag = sLine.split("\t") for oToken in oTokenizer.genTokens(sLoc.strip()): sWord = oToken["sValue"] if sWord not in dCur: