Grammalecte  Diff

Differences From Artifact [1e628c0406]:

To Artifact [1c5ad0c32d]:


30
31
32
33
34
35
36


37
38
39
40
41
42
43


def readFile (spf):
    if os.path.isfile(spf):
        with open(spf, "r", encoding="utf-8") as hSrc:
            for sLine in hSrc:
                sLine = sLine.strip()


                if sLine and not sLine.startswith("#"):
                    yield sLine
    else:
        raise OSError("# Error. File not found or not loadable: " + spf)


def makeDictionaries (sp, sVersion):







>
>







30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45


def readFile (spf):
    if os.path.isfile(spf):
        with open(spf, "r", encoding="utf-8") as hSrc:
            for sLine in hSrc:
                sLine = sLine.strip()
                if sLine == "__END__":
                    break
                if sLine and not sLine.startswith("#"):
                    yield sLine
    else:
        raise OSError("# Error. File not found or not loadable: " + spf)


def makeDictionaries (sp, sVersion):
314
315
316
317
318
319
320
321
322





323
324
325
326
327
328
329
330

def makeLocutions (sp, bJS=False):
    "compile list of locutions in JSON"
    print("> Locutions ", end="")
    print("(Python et JavaScript)"  if bJS  else "(Python seulement)")
    dLocGraph = {}
    oTokenizer = tkz.Tokenizer("fr")
    for sLine in itertools.chain(readFile(sp+"/data/locutions.txt"), readFile(sp+"/data/locutions_vrac.txt")):
        if sLine == "__END__":





            break
        dCur = dLocGraph
        sLoc, sTag = sLine.split("\t")
        for oToken in oTokenizer.genTokens(sLoc.strip()):
            sWord = oToken["sValue"]
            if sWord not in dCur:
                dCur[sWord] = {}
            dCur = dCur[sWord]







|
|
>
>
>
>
>
|







316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337

def makeLocutions (sp, bJS=False):
    "compile list of locutions in JSON"
    print("> Locutions ", end="")
    print("(Python et JavaScript)"  if bJS  else "(Python seulement)")
    dLocGraph = {}
    oTokenizer = tkz.Tokenizer("fr")
    for sLine in itertools.chain(readFile(sp+"/data/locutions_adverbiales.txt"), \
                                 readFile(sp+"/data/locutions_prépositives.txt"), \
                                 readFile(sp+"/data/locutions_conjonctives.txt"), \
                                 readFile(sp+"/data/locutions_pronominales.txt"), \
                                 readFile(sp+"/data/locutions_adjectivales.txt"), \
                                 readFile(sp+"/data/locutions_interjectives.txt"), \
                                 readFile(sp+"/data/locutions_nominales.txt"), \
                                 readFile(sp+"/data/locutions_verbales.txt")):
        dCur = dLocGraph
        sLoc, sTag = sLine.split("\t")
        for oToken in oTokenizer.genTokens(sLoc.strip()):
            sWord = oToken["sValue"]
            if sWord not in dCur:
                dCur[sWord] = {}
            dCur = dCur[sWord]