Grammalecte  Check-in [b71cbd8aad]

Overview
Comment:[build][graphspell] dawg builder: use data from lexicon when found
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | build | graphspell | comdic
Files: files | file ages | folders
SHA3-256: b71cbd8aad86f86d8b287442577c893998477f3d1f10b02ce7fa5d2ff67a4619
User & Date: olr on 2019-01-16 15:57:20
Other Links: branch diff | manifest | tags
Context
2019-01-22
10:12
[build][fx] Firefox Nightly has a new installation folder check-in: 3fc5e58719 user: olr tags: build, fx, comdic
2019-01-16
15:57
[build][graphspell] dawg builder: use data from lexicon when found check-in: b71cbd8aad user: olr tags: build, graphspell, comdic
2019-01-11
16:37
[build][fx][tb][lo] description field for dictionaries check-in: 831b79d96c user: olr tags: build, tb, fx, lo, comdic
Changes

Modified gc_lang/fr/dictionnaire/genfrdic.py from [321fa8c26f] to [fef905a8c3].

522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
522
523
524
525
526
527
528





529
530
531
532
533
534
535







-
-
-
-
-







                hDst.write(oFlex.__str__(oStatsLex))

    def writeGrammarCheckerLexicon (self, spfDst, version):
        echo(' * Lexique simplifié >> [ {} ] '.format(spfDst))
        with open(spfDst[:-4]+".lex", 'w', encoding='utf-8', newline="\n") as hDst:
            hDst.write(MPLHEADER)
            hDst.write("# Lexique simplifié pour Grammalecte v{}\n# Licence : MPL v2.0\n\n".format(version))
            hDst.write("## LangCode: fr\n")
            hDst.write("## LangName: Français\n")
            hDst.write("## DicName: fr.commun\n")
            hDst.write("## Description: Français commun (toutes variantes)\n")
            hDst.write("## Author: Olivier R.\n\n")
            hDst.write(Flexion.simpleHeader())
            for oFlex in self.lFlexions:
                hDst.write(oFlex.getGrammarCheckerRepr())

    def createFiles (self, spDst, lDictVars, nMode, bSimplified):
        sDicName = PREFIX_DICT_PATH + self.sVersion
        spDic = spDst + '/' + sDicName

Modified graphspell/dawg.py from [70e7a3c81c] to [12eff6a146].

18
19
20
21
22
23
24


25
26
27
28
29

30
31
32




33

34



35
36
37
38
39
40
41
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39

40
41
42
43
44
45
46
47
48
49
50
51







+
+





+



+
+
+
+
-
+

+
+
+







import re
import traceback

from . import str_transform as st
from .progressbar import ProgressBar



dLexiconData = {}

def readFile (spf):
    "generator: read file <spf> and return for each line a list of elements separated by a tabulation."
    print(" < Read lexicon: " + spf)
    if os.path.isfile(spf):
        dLexiconData.clear()
        with open(spf, "r", encoding="utf-8") as hSrc:
            for sLine in hSrc:
                sLine = sLine.strip()
                if sLine.startswith("##") :
                    m = re.match("## *(\\w+) *:(.*)$", sLine)
                    if m:
                        dLexiconData[m.group(1)] = m.group(2).strip()
                if sLine and not sLine.startswith("#"):
                elif sLine and not sLine.startswith("#"):
                    yield sLine.split("\t")
        if dLexiconData:
            print("Data from dictionary:")
            print(dLexiconData)
    else:
        raise OSError("# Error. File not found or not loadable: " + spf)



class DAWG:
    """DIRECT ACYCLIC WORD GRAPH"""
118
119
120
121
122
123
124





125
126
127
128
129
130
131
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146







+
+
+
+
+







                        + [ (dTag[tag]+nChar+nAff, dTagOccur[tag]) for tag in dTag ] )

        self.sFileName = src  if type(src) is str  else "[None]"
        self.sLangCode = sLangCode
        self.sLangName = sLangName
        self.sDicName = sDicName
        self.sDescription = sDescription
        if dLexiconData:
            self.sLangCode = dLexiconData.get("LangCode", self.sLangCode)
            self.sLangName = dLexiconData.get("LangName", self.sLangName)
            self.sDicName = dLexiconData.get("DicName", self.sDicName)
            self.sDescription = dLexiconData.get("Description", self.sDescription)
        self.nEntry = len(lWord)
        self.aPreviousEntry = []
        DawgNode.resetNextId()
        self.oRoot = DawgNode()
        self.lUncheckedNodes = []  # list of nodes that have not been checked for duplication.
        self.lMinimizedNodes = {}  # list of unique nodes that have been checked for duplication.
        self.lSortedNodes = []     # version 2 and 3

Modified lexicons/French.lex from [b69ae271b0] to [af55bd82c0].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
1
2
3
4
5
6
7






8
9
10
11
12
13
14







-
-
-
-
-
-







# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

# Lexique simplifié pour Grammalecte v7.0
# Licence : MPL v2.0

## LangCode: fr
## LangName: Français
## DicName: fr.commun
## Description: Français commun (toutes variantes)
## Author: Olivier R.

# :POS ;LEX ~SEM =FQ /DIC
de	de	:G:D:e:i/*
de	de	:G:R:Rv/*
et	et	:G:Cc/*
à	à	:G:R:Rv/*
des	des	:G:D:e:p/*
du	du	:G:D:m:s/*