Grammalecte  Check-in [b71cbd8aad]

Overview
Comment:[build][graphspell] dawg builder: use data from lexicon when found
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | build | graphspell | comdic
Files: files | file ages | folders
SHA3-256: b71cbd8aad86f86d8b287442577c893998477f3d1f10b02ce7fa5d2ff67a4619
User & Date: olr on 2019-01-16 15:57:20
Other Links: branch diff | manifest | tags
Context
2019-01-22
10:12
[build][fx] Firefox Nightly has a new installation folder check-in: 3fc5e58719 user: olr tags: build, fx, comdic
2019-01-16
15:57
[build][graphspell] dawg builder: use data from lexicon when found check-in: b71cbd8aad user: olr tags: build, graphspell, comdic
2019-01-11
16:37
[build][fx][tb][lo] description field for dictionaries check-in: 831b79d96c user: olr tags: build, tb, fx, lo, comdic
Changes

Modified gc_lang/fr/dictionnaire/genfrdic.py from [321fa8c26f] to [fef905a8c3].

522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
                hDst.write(oFlex.__str__(oStatsLex))

    def writeGrammarCheckerLexicon (self, spfDst, version):
        echo(' * Lexique simplifié >> [ {} ] '.format(spfDst))
        with open(spfDst[:-4]+".lex", 'w', encoding='utf-8', newline="\n") as hDst:
            hDst.write(MPLHEADER)
            hDst.write("# Lexique simplifié pour Grammalecte v{}\n# Licence : MPL v2.0\n\n".format(version))
            hDst.write("## LangCode: fr\n")
            hDst.write("## LangName: Français\n")
            hDst.write("## DicName: fr.commun\n")
            hDst.write("## Description: Français commun (toutes variantes)\n")
            hDst.write("## Author: Olivier R.\n\n")
            hDst.write(Flexion.simpleHeader())
            for oFlex in self.lFlexions:
                hDst.write(oFlex.getGrammarCheckerRepr())

    def createFiles (self, spDst, lDictVars, nMode, bSimplified):
        sDicName = PREFIX_DICT_PATH + self.sVersion
        spDic = spDst + '/' + sDicName







<
<
<
<
<







522
523
524
525
526
527
528





529
530
531
532
533
534
535
                hDst.write(oFlex.__str__(oStatsLex))

    def writeGrammarCheckerLexicon (self, spfDst, version):
        echo(' * Lexique simplifié >> [ {} ] '.format(spfDst))
        with open(spfDst[:-4]+".lex", 'w', encoding='utf-8', newline="\n") as hDst:
            hDst.write(MPLHEADER)
            hDst.write("# Lexique simplifié pour Grammalecte v{}\n# Licence : MPL v2.0\n\n".format(version))





            hDst.write(Flexion.simpleHeader())
            for oFlex in self.lFlexions:
                hDst.write(oFlex.getGrammarCheckerRepr())

    def createFiles (self, spDst, lDictVars, nMode, bSimplified):
        sDicName = PREFIX_DICT_PATH + self.sVersion
        spDic = spDst + '/' + sDicName

Modified graphspell/dawg.py from [70e7a3c81c] to [12eff6a146].

18
19
20
21
22
23
24


25
26
27
28
29

30
31
32




33
34



35
36
37
38
39
40
41
import re
import traceback

from . import str_transform as st
from .progressbar import ProgressBar





def readFile (spf):
    "generator: read file <spf> and return for each line a list of elements separated by a tabulation."
    print(" < Read lexicon: " + spf)
    if os.path.isfile(spf):

        with open(spf, "r", encoding="utf-8") as hSrc:
            for sLine in hSrc:
                sLine = sLine.strip()




                if sLine and not sLine.startswith("#"):
                    yield sLine.split("\t")



    else:
        raise OSError("# Error. File not found or not loadable: " + spf)



class DAWG:
    """DIRECT ACYCLIC WORD GRAPH"""







>
>





>



>
>
>
>
|

>
>
>







18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import re
import traceback

from . import str_transform as st
from .progressbar import ProgressBar



dLexiconData = {}

def readFile (spf):
    "generator: read file <spf> and return for each line a list of elements separated by a tabulation."
    print(" < Read lexicon: " + spf)
    if os.path.isfile(spf):
        dLexiconData.clear()
        with open(spf, "r", encoding="utf-8") as hSrc:
            for sLine in hSrc:
                sLine = sLine.strip()
                if sLine.startswith("##") :
                    m = re.match("## *(\\w+) *:(.*)$", sLine)
                    if m:
                        dLexiconData[m.group(1)] = m.group(2).strip()
                elif sLine and not sLine.startswith("#"):
                    yield sLine.split("\t")
        if dLexiconData:
            print("Data from dictionary:")
            print(dLexiconData)
    else:
        raise OSError("# Error. File not found or not loadable: " + spf)



class DAWG:
    """DIRECT ACYCLIC WORD GRAPH"""
118
119
120
121
122
123
124





125
126
127
128
129
130
131
                        + [ (dTag[tag]+nChar+nAff, dTagOccur[tag]) for tag in dTag ] )

        self.sFileName = src  if type(src) is str  else "[None]"
        self.sLangCode = sLangCode
        self.sLangName = sLangName
        self.sDicName = sDicName
        self.sDescription = sDescription





        self.nEntry = len(lWord)
        self.aPreviousEntry = []
        DawgNode.resetNextId()
        self.oRoot = DawgNode()
        self.lUncheckedNodes = []  # list of nodes that have not been checked for duplication.
        self.lMinimizedNodes = {}  # list of unique nodes that have been checked for duplication.
        self.lSortedNodes = []     # version 2 and 3







>
>
>
>
>







128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
                        + [ (dTag[tag]+nChar+nAff, dTagOccur[tag]) for tag in dTag ] )

        self.sFileName = src  if type(src) is str  else "[None]"
        self.sLangCode = sLangCode
        self.sLangName = sLangName
        self.sDicName = sDicName
        self.sDescription = sDescription
        if dLexiconData:
            self.sLangCode = dLexiconData.get("LangCode", self.sLangCode)
            self.sLangName = dLexiconData.get("LangName", self.sLangName)
            self.sDicName = dLexiconData.get("DicName", self.sDicName)
            self.sDescription = dLexiconData.get("Description", self.sDescription)
        self.nEntry = len(lWord)
        self.aPreviousEntry = []
        DawgNode.resetNextId()
        self.oRoot = DawgNode()
        self.lUncheckedNodes = []  # list of nodes that have not been checked for duplication.
        self.lMinimizedNodes = {}  # list of unique nodes that have been checked for duplication.
        self.lSortedNodes = []     # version 2 and 3

Modified lexicons/French.lex from [b69ae271b0] to [af55bd82c0].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

# Lexique simplifié pour Grammalecte v7.0
# Licence : MPL v2.0

## LangCode: fr
## LangName: Français
## DicName: fr.commun
## Description: Français commun (toutes variantes)
## Author: Olivier R.

# :POS ;LEX ~SEM =FQ /DIC
de	de	:G:D:e:i/*
de	de	:G:R:Rv/*
et	et	:G:Cc/*
à	à	:G:R:Rv/*
des	des	:G:D:e:p/*
du	du	:G:D:m:s/*







<
<
<
<
<
<







1
2
3
4
5
6
7






8
9
10
11
12
13
14
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

# Lexique simplifié pour Grammalecte v7.0
# Licence : MPL v2.0







# :POS ;LEX ~SEM =FQ /DIC
de	de	:G:D:e:i/*
de	de	:G:R:Rv/*
et	et	:G:Cc/*
à	à	:G:R:Rv/*
des	des	:G:D:e:p/*
du	du	:G:D:m:s/*