Differences From Artifact [3eb0f89147]:
- File gc_lang/fr/build_data.py — part of check-in [c4896b5dd8] at 2020-11-16 22:51:48 on branch trunk — [fr] mise à jour du dictionnaire (user: olr, size: 15398) [annotate] [blame] [check-ins using]
To Artifact [94a1ff7b31]:
- File gc_lang/fr/build_data.py — part of check-in [a56f46447a] at 2021-02-05 21:07:08 on branch trunk — [build][core][lo][fx][fr] change data structure for conjugation tool (user: olr, size: 15493) [annotate] [blame] [check-ins using]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | #!python3 # FRENCH DATA BUILDER # # by Olivier R. # License: MPL 2 import json import os import itertools import traceback import platform import graphspell.ibdawg as ibdawg from graphspell.echo import echo from graphspell.str_transform import defineSuffixCode import graphspell.tokenizer as tkz | > < < | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
#!python3
# FRENCH DATA BUILDER
#
# by Olivier R.
# License: MPL 2
import json
import os
import itertools
import traceback
import platform
import importlib
import graphspell.ibdawg as ibdawg
from graphspell.echo import echo
from graphspell.str_transform import defineSuffixCode
import graphspell.tokenizer as tkz
oDict = None
class cd:
"""Context manager for changing the current working directory"""
def __init__ (self, newPath):
|
| ︙ | ︙ | |||
70 71 72 73 74 75 76 |
print("(Python et JavaScript)" if bJS else "(Python seulement)")
dVerb = {}
lVinfo = []; dVinfo = {}; nVinfo = 0
lTags = []; dTags = {}; nTags = 0
dVerbNames = {}
dPatternList = {
| | | | | > | 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
print("(Python et JavaScript)" if bJS else "(Python seulement)")
dVerb = {}
lVinfo = []; dVinfo = {}; nVinfo = 0
lTags = []; dTags = {}; nTags = 0
dVerbNames = {}
dPatternList = {
":P": [], ":Q": [], ":Ip": [], ":Iq": [], ":Is": [], ":If": [], ":K": [], ":Sp": [], ":Sq": [], ":E": []
}
dTrad = {
"infi": ":Y", "ppre": ":P", "ppas": ":Q",
"ipre": ":Ip", "iimp": ":Iq", "ipsi": ":Is", "ifut": ":If",
"spre": ":Sp", "simp": ":Sq",
"cond": ":K", "impe": ":E",
"1sg": ":1s", "2sg": ":2s", "3sg": ":3s", "1pl": ":1p", "2pl": ":2p", "3pl": ":3p", "1isg": ":1ś",
"mas sg": ":m:s", "mas pl": ":m:p", "mas inv": ":m:s", "fem sg": ":f:s", "fem pl": ":f:p", "epi inv": ":m:s"
}
loadDictionary()
# read lexicon
nStop = 0
for n, sLine in enumerate(readFile(sp+"/data/dictConj.txt")):
nTab = sLine.count("\t")
if nTab == 1:
# new entry
sLemma, sVinfo = sLine.split("\t")
dConj = { ":P": { ":P": "" },
":Q": { ":m:s": "", ":f:s": "", ":m:p": "", ":f:p": "" },
":Ip": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "", ":1ś": "" },
":Iq": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" },
":Is": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" },
":If": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" },
":K": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "" },
":Sp": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "", ":1ś": "" },
":Sq": { ":1s": "", ":2s": "", ":3s": "", ":1p": "", ":2p": "", ":3p": "", ":1ś": "" },
|
| ︙ | ︙ | |||
114 115 116 117 118 119 120 |
dVerbNames[sLemma] = { sLemma }
break
elif nTab == 2:
# flexion
_, sTag, sFlex = sLine.split("\t")
if sTag.count(" ") == 0:
if sTag == "ppre":
| | | 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
dVerbNames[sLemma] = { sLemma }
break
elif nTab == 2:
# flexion
_, sTag, sFlex = sLine.split("\t")
if sTag.count(" ") == 0:
if sTag == "ppre":
dConj[":P"][":P"] = defineSuffixCode(sLemma, sFlex)
else:
try:
mode, g = sTag.split(maxsplit=1)
mode = dTrad[mode]
g = dTrad[g]
if dConj[mode][g] == "":
dConj[mode][g] = defineSuffixCode(sLemma, sFlex)
|
| ︙ | ︙ | |||
142 143 144 145 146 147 148 |
elif sLine == "$":
# we store the dictionary of rules for this lemma
if dConj[":Ip"][":1ś"] == "2è":
dConj[":Ip"][":1ś"] = "2é"
elif sLemma == "pouvoir":
dConj[":Ip"][":1ś"] = "6uis"
lConjTags = []
| | | 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
elif sLine == "$":
# we store the dictionary of rules for this lemma
if dConj[":Ip"][":1ś"] == "2è":
dConj[":Ip"][":1ś"] = "2é"
elif sLemma == "pouvoir":
dConj[":Ip"][":1ś"] = "6uis"
lConjTags = []
for sTense in [":P", ":Q", ":Ip", ":Iq", ":Is", ":If", ":K", ":Sp", ":Sq", ":E"]:
bFound = False
for i, d in enumerate(dPatternList[sTense]):
if dConj[sTense] == d:
bFound = True
lConjTags.append(i)
break
if not bFound:
|
| ︙ | ︙ | |||
285 286 287 288 289 290 291 292 293 294 295 296 297 298 |
def makePhonetTable (sp, bJS=False):
print("> Correspondances phonétiques ", end="")
print("(Python et JavaScript)" if bJS else "(Python seulement)")
loadDictionary()
# set of homophonic words
lSet = []
for sLine in readFile(sp+"/data/phonet_simil.txt"):
lWord = sLine.split()
for sWord in lWord:
if sWord.endswith("er") and conj.isVerb(sWord):
lWord.extend(conj.getConjSimilInfiV1(sWord))
| > > | 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 |
def makePhonetTable (sp, bJS=False):
print("> Correspondances phonétiques ", end="")
print("(Python et JavaScript)" if bJS else "(Python seulement)")
loadDictionary()
conj = importlib.import_module("gc_lang.fr.modules.conj")
# set of homophonic words
lSet = []
for sLine in readFile(sp+"/data/phonet_simil.txt"):
lWord = sLine.split()
for sWord in lWord:
if sWord.endswith("er") and conj.isVerb(sWord):
lWord.extend(conj.getConjSimilInfiV1(sWord))
|
| ︙ | ︙ | |||
376 377 378 379 380 381 382 |
print("========== Build Hunspell dictionaries ==========")
makeDictionaries(spLaunch, dVars['oxt_version'])
def after (spLaunch, dVars, bJS=False):
print("========== Build French data ==========")
makeMfsp(spLaunch, bJS)
| | | | 378 379 380 381 382 383 384 385 386 387 |
print("========== Build Hunspell dictionaries ==========")
makeDictionaries(spLaunch, dVars['oxt_version'])
def after (spLaunch, dVars, bJS=False):
print("========== Build French data ==========")
makeMfsp(spLaunch, bJS)
makeConj(spLaunch, bJS)
makePhonetTable(spLaunch, bJS)
#makeLocutions(spLaunch, bJS)
|