Grammalecte  Diff

Differences From Artifact [3eb0f89147]:

To Artifact [94a1ff7b31]:


1
2
3
4
5
6
7
8
9
10
11
12

13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


20
21
22
23
24
25
26












+






-
-







#!python3

# FRENCH DATA BUILDER
#
# by Olivier R.
# License: MPL 2

import json
import os
import itertools
import traceback
import platform
import importlib

import graphspell.ibdawg as ibdawg
from graphspell.echo import echo
from graphspell.str_transform import defineSuffixCode
import graphspell.tokenizer as tkz

import gc_lang.fr.modules.conj as conj


oDict = None


class cd:
    """Context manager for changing the current working directory"""
    def __init__ (self, newPath):
70
71
72
73
74
75
76
77

78
79
80

81
82
83
84
85

86
87
88
89
90
91
92
93
94
95
96
97


98
99
100
101
102
103
104
69
70
71
72
73
74
75

76
77
78

79
80
81
82
83

84
85
86
87
88
89
90
91
92
93
94
95

96
97
98
99
100
101
102
103
104







-
+


-
+




-
+











-
+
+







    print("(Python et JavaScript)"  if bJS  else "(Python seulement)")
    dVerb = {}
    lVinfo = []; dVinfo = {}; nVinfo = 0
    lTags = []; dTags = {}; nTags = 0
    dVerbNames = {}

    dPatternList = {
        ":PQ": [], ":Ip": [], ":Iq": [], ":Is": [], ":If": [], ":K": [], ":Sp": [], ":Sq": [], ":E": []
        ":P": [], ":Q": [], ":Ip": [], ":Iq": [], ":Is": [], ":If": [], ":K": [], ":Sp": [], ":Sq": [], ":E": []
    }
    dTrad = {
        "infi": ":Y", "ppre": ":PQ", "ppas": ":PQ",
        "infi": ":Y", "ppre": ":P", "ppas": ":Q",
        "ipre": ":Ip", "iimp": ":Iq", "ipsi": ":Is", "ifut": ":If",
        "spre": ":Sp", "simp": ":Sq",
        "cond": ":K", "impe": ":E",
        "1sg": ":1s", "2sg": ":2s", "3sg": ":3s", "1pl": ":1p", "2pl": ":2p", "3pl": ":3p", "1isg": ":1ś",
        "mas sg": ":Q1", "mas pl": ":Q2", "mas inv": ":Q1", "fem sg": ":Q3", "fem pl": ":Q4", "epi inv": ":Q1"
        "mas sg": ":m:s", "mas pl": ":m:p", "mas inv": ":m:s", "fem sg": ":f:s", "fem pl": ":f:p", "epi inv": ":m:s"
    }

    loadDictionary()

    # read lexicon
    nStop = 0
    for n, sLine in enumerate(readFile(sp+"/data/dictConj.txt")):
        nTab = sLine.count("\t")
        if nTab == 1:
            # new entry
            sLemma, sVinfo = sLine.split("\t")
            dConj = {   ":PQ": { ":P": "", ":Q1": "", ":Q2": "", ":Q3": "", ":Q4": ""},
            dConj = {   ":P": { ":P": "" },
                        ":Q": { ":m:s": "", ":f:s": "", ":m:p": "", ":f:p": "" },
                        ":Ip": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "", ":1ś": "" },
                        ":Iq": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" },
                        ":Is": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" },
                        ":If": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" },
                        ":K":  { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" },
                        ":Sp": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "", ":1ś": "" },
                        ":Sq": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "", ":1ś": "" },
114
115
116
117
118
119
120
121

122
123
124
125
126
127
128
114
115
116
117
118
119
120

121
122
123
124
125
126
127
128







-
+







                    dVerbNames[sLemma] = { sLemma }
                    break
        elif nTab == 2:
            # flexion
            _, sTag, sFlex = sLine.split("\t")
            if sTag.count(" ") == 0:
                if sTag == "ppre":
                    dConj[":PQ"][":P"] = defineSuffixCode(sLemma, sFlex)
                    dConj[":P"][":P"] = defineSuffixCode(sLemma, sFlex)
            else:
                try:
                    mode, g = sTag.split(maxsplit=1)
                    mode = dTrad[mode]
                    g = dTrad[g]
                    if dConj[mode][g] == "":
                        dConj[mode][g] = defineSuffixCode(sLemma, sFlex)
142
143
144
145
146
147
148
149

150
151
152
153
154
155
156
142
143
144
145
146
147
148

149
150
151
152
153
154
155
156







-
+







        elif sLine == "$":
            # we store the dictionary of rules for this lemma
            if dConj[":Ip"][":1ś"] == "2è":
                dConj[":Ip"][":1ś"] = "2é"
            elif sLemma == "pouvoir":
                dConj[":Ip"][":1ś"] = "6uis"
            lConjTags = []
            for sTense in [":PQ", ":Ip", ":Iq", ":Is", ":If", ":K", ":Sp", ":Sq", ":E"]:
            for sTense in [":P", ":Q", ":Ip", ":Iq", ":Is", ":If", ":K", ":Sp", ":Sq", ":E"]:
                bFound = False
                for i, d in enumerate(dPatternList[sTense]):
                    if dConj[sTense] == d:
                        bFound = True
                        lConjTags.append(i)
                        break
                if not bFound:
285
286
287
288
289
290
291


292
293
294
295
296
297
298
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300







+
+








def makePhonetTable (sp, bJS=False):
    print("> Correspondances phonétiques ", end="")
    print("(Python et JavaScript)"  if bJS  else "(Python seulement)")

    loadDictionary()

    conj = importlib.import_module("gc_lang.fr.modules.conj")

    # set of homophonic words
    lSet = []
    for sLine in readFile(sp+"/data/phonet_simil.txt"):
        lWord = sLine.split()
        for sWord in lWord:
            if sWord.endswith("er") and conj.isVerb(sWord):
                lWord.extend(conj.getConjSimilInfiV1(sWord))
376
377
378
379
380
381
382
383
384


385
378
379
380
381
382
383
384


385
386
387







-
-
+
+

    print("========== Build Hunspell dictionaries ==========")
    makeDictionaries(spLaunch, dVars['oxt_version'])


def after (spLaunch, dVars, bJS=False):
    print("========== Build French data ==========")
    makeMfsp(spLaunch, bJS)
    makePhonetTable(spLaunch, bJS)
    makeConj(spLaunch, bJS)
    makeConj(spLaunch, bJS)
    makePhonetTable(spLaunch, bJS)
    #makeLocutions(spLaunch, bJS)