Grammalecte  Diff

Differences From Artifact [3eb0f89147]:

To Artifact [94a1ff7b31]:


1
2
3
4
5
6
7
8
9
10
11
12

13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!python3

# FRENCH DATA BUILDER
#
# by Olivier R.
# License: MPL 2

import json
import os
import itertools
import traceback
import platform


import graphspell.ibdawg as ibdawg
from graphspell.echo import echo
from graphspell.str_transform import defineSuffixCode
import graphspell.tokenizer as tkz

import gc_lang.fr.modules.conj as conj


oDict = None


class cd:
    """Context manager for changing the current working directory"""
    def __init__ (self, newPath):












>






<
<







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


20
21
22
23
24
25
26
#!python3

# FRENCH DATA BUILDER
#
# by Olivier R.
# License: MPL 2

import json
import os
import itertools
import traceback
import platform
import importlib

import graphspell.ibdawg as ibdawg
from graphspell.echo import echo
from graphspell.str_transform import defineSuffixCode
import graphspell.tokenizer as tkz




oDict = None


class cd:
    """Context manager for changing the current working directory"""
    def __init__ (self, newPath):
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97

98
99
100
101
102
103
104
    print("(Python et JavaScript)"  if bJS  else "(Python seulement)")
    dVerb = {}
    lVinfo = []; dVinfo = {}; nVinfo = 0
    lTags = []; dTags = {}; nTags = 0
    dVerbNames = {}

    dPatternList = {
        ":PQ": [], ":Ip": [], ":Iq": [], ":Is": [], ":If": [], ":K": [], ":Sp": [], ":Sq": [], ":E": []
    }
    dTrad = {
        "infi": ":Y", "ppre": ":PQ", "ppas": ":PQ",
        "ipre": ":Ip", "iimp": ":Iq", "ipsi": ":Is", "ifut": ":If",
        "spre": ":Sp", "simp": ":Sq",
        "cond": ":K", "impe": ":E",
        "1sg": ":1s", "2sg": ":2s", "3sg": ":3s", "1pl": ":1p", "2pl": ":2p", "3pl": ":3p", "1isg": ":1ś",
        "mas sg": ":Q1", "mas pl": ":Q2", "mas inv": ":Q1", "fem sg": ":Q3", "fem pl": ":Q4", "epi inv": ":Q1"
    }

    loadDictionary()

    # read lexicon
    nStop = 0
    for n, sLine in enumerate(readFile(sp+"/data/dictConj.txt")):
        nTab = sLine.count("\t")
        if nTab == 1:
            # new entry
            sLemma, sVinfo = sLine.split("\t")
            dConj = {   ":PQ": { ":P": "", ":Q1": "", ":Q2": "", ":Q3": "", ":Q4": ""},

                        ":Ip": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "", ":1ś": "" },
                        ":Iq": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" },
                        ":Is": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" },
                        ":If": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" },
                        ":K":  { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" },
                        ":Sp": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "", ":1ś": "" },
                        ":Sq": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "", ":1ś": "" },







|


|




|











|
>







69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
    print("(Python et JavaScript)"  if bJS  else "(Python seulement)")
    dVerb = {}
    lVinfo = []; dVinfo = {}; nVinfo = 0
    lTags = []; dTags = {}; nTags = 0
    dVerbNames = {}

    dPatternList = {
        ":P": [], ":Q": [], ":Ip": [], ":Iq": [], ":Is": [], ":If": [], ":K": [], ":Sp": [], ":Sq": [], ":E": []
    }
    dTrad = {
        "infi": ":Y", "ppre": ":P", "ppas": ":Q",
        "ipre": ":Ip", "iimp": ":Iq", "ipsi": ":Is", "ifut": ":If",
        "spre": ":Sp", "simp": ":Sq",
        "cond": ":K", "impe": ":E",
        "1sg": ":1s", "2sg": ":2s", "3sg": ":3s", "1pl": ":1p", "2pl": ":2p", "3pl": ":3p", "1isg": ":1ś",
        "mas sg": ":m:s", "mas pl": ":m:p", "mas inv": ":m:s", "fem sg": ":f:s", "fem pl": ":f:p", "epi inv": ":m:s"
    }

    loadDictionary()

    # read lexicon
    nStop = 0
    for n, sLine in enumerate(readFile(sp+"/data/dictConj.txt")):
        nTab = sLine.count("\t")
        if nTab == 1:
            # new entry
            sLemma, sVinfo = sLine.split("\t")
            dConj = {   ":P": { ":P": "" },
                        ":Q": { ":m:s": "", ":f:s": "", ":m:p": "", ":f:p": "" },
                        ":Ip": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "", ":1ś": "" },
                        ":Iq": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" },
                        ":Is": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" },
                        ":If": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" },
                        ":K":  { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" },
                        ":Sp": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "", ":1ś": "" },
                        ":Sq": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "", ":1ś": "" },
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
                    dVerbNames[sLemma] = { sLemma }
                    break
        elif nTab == 2:
            # flexion
            _, sTag, sFlex = sLine.split("\t")
            if sTag.count(" ") == 0:
                if sTag == "ppre":
                    dConj[":PQ"][":P"] = defineSuffixCode(sLemma, sFlex)
            else:
                try:
                    mode, g = sTag.split(maxsplit=1)
                    mode = dTrad[mode]
                    g = dTrad[g]
                    if dConj[mode][g] == "":
                        dConj[mode][g] = defineSuffixCode(sLemma, sFlex)







|







114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
                    dVerbNames[sLemma] = { sLemma }
                    break
        elif nTab == 2:
            # flexion
            _, sTag, sFlex = sLine.split("\t")
            if sTag.count(" ") == 0:
                if sTag == "ppre":
                    dConj[":P"][":P"] = defineSuffixCode(sLemma, sFlex)
            else:
                try:
                    mode, g = sTag.split(maxsplit=1)
                    mode = dTrad[mode]
                    g = dTrad[g]
                    if dConj[mode][g] == "":
                        dConj[mode][g] = defineSuffixCode(sLemma, sFlex)
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
        elif sLine == "$":
            # we store the dictionary of rules for this lemma
            if dConj[":Ip"][":1ś"] == "2è":
                dConj[":Ip"][":1ś"] = "2é"
            elif sLemma == "pouvoir":
                dConj[":Ip"][":1ś"] = "6uis"
            lConjTags = []
            for sTense in [":PQ", ":Ip", ":Iq", ":Is", ":If", ":K", ":Sp", ":Sq", ":E"]:
                bFound = False
                for i, d in enumerate(dPatternList[sTense]):
                    if dConj[sTense] == d:
                        bFound = True
                        lConjTags.append(i)
                        break
                if not bFound:







|







142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
        elif sLine == "$":
            # we store the dictionary of rules for this lemma
            if dConj[":Ip"][":1ś"] == "2è":
                dConj[":Ip"][":1ś"] = "2é"
            elif sLemma == "pouvoir":
                dConj[":Ip"][":1ś"] = "6uis"
            lConjTags = []
            for sTense in [":P", ":Q", ":Ip", ":Iq", ":Is", ":If", ":K", ":Sp", ":Sq", ":E"]:
                bFound = False
                for i, d in enumerate(dPatternList[sTense]):
                    if dConj[sTense] == d:
                        bFound = True
                        lConjTags.append(i)
                        break
                if not bFound:
285
286
287
288
289
290
291


292
293
294
295
296
297
298

def makePhonetTable (sp, bJS=False):
    print("> Correspondances phonétiques ", end="")
    print("(Python et JavaScript)"  if bJS  else "(Python seulement)")

    loadDictionary()



    # set of homophonic words
    lSet = []
    for sLine in readFile(sp+"/data/phonet_simil.txt"):
        lWord = sLine.split()
        for sWord in lWord:
            if sWord.endswith("er") and conj.isVerb(sWord):
                lWord.extend(conj.getConjSimilInfiV1(sWord))







>
>







285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300

def makePhonetTable (sp, bJS=False):
    print("> Correspondances phonétiques ", end="")
    print("(Python et JavaScript)"  if bJS  else "(Python seulement)")

    loadDictionary()

    conj = importlib.import_module("gc_lang.fr.modules.conj")

    # set of homophonic words
    lSet = []
    for sLine in readFile(sp+"/data/phonet_simil.txt"):
        lWord = sLine.split()
        for sWord in lWord:
            if sWord.endswith("er") and conj.isVerb(sWord):
                lWord.extend(conj.getConjSimilInfiV1(sWord))
376
377
378
379
380
381
382
383
384
385
    print("========== Build Hunspell dictionaries ==========")
    makeDictionaries(spLaunch, dVars['oxt_version'])


def after (spLaunch, dVars, bJS=False):
    print("========== Build French data ==========")
    makeMfsp(spLaunch, bJS)
    makePhonetTable(spLaunch, bJS)
    makeConj(spLaunch, bJS)
    #makeLocutions(spLaunch, bJS)







|
|

378
379
380
381
382
383
384
385
386
387
    print("========== Build Hunspell dictionaries ==========")
    makeDictionaries(spLaunch, dVars['oxt_version'])


def after (spLaunch, dVars, bJS=False):
    print("========== Build French data ==========")
    makeMfsp(spLaunch, bJS)
    makeConj(spLaunch, bJS)
    makePhonetTable(spLaunch, bJS)
    #makeLocutions(spLaunch, bJS)