Overview
| Comment: | [build][graphspell] dawg builder: use data from lexicon when found |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | build | graphspell | comdic |
| Files: | files | file ages | folders |
| SHA3-256: |
b71cbd8aad86f86d8b287442577c8939 |
| User & Date: | olr on 2019-01-16 15:57:20 |
| Other Links: | branch diff | manifest | tags |
Context
|
2019-01-22
| ||
| 10:12 | [build][fx] Firefox Nightly has a new installation folder check-in: 3fc5e58719 user: olr tags: build, fx, comdic | |
|
2019-01-16
| ||
| 15:57 | [build][graphspell] dawg builder: use data from lexicon when found check-in: b71cbd8aad user: olr tags: build, graphspell, comdic | |
|
2019-01-11
| ||
| 16:37 | [build][fx][tb][lo] description field for dictionaries check-in: 831b79d96c user: olr tags: build, tb, fx, lo, comdic | |
Changes
Modified gc_lang/fr/dictionnaire/genfrdic.py from [321fa8c26f] to [fef905a8c3].
| ︙ | ︙ | |||
522 523 524 525 526 527 528 |
hDst.write(oFlex.__str__(oStatsLex))
def writeGrammarCheckerLexicon (self, spfDst, version):
echo(' * Lexique simplifié >> [ {} ] '.format(spfDst))
with open(spfDst[:-4]+".lex", 'w', encoding='utf-8', newline="\n") as hDst:
hDst.write(MPLHEADER)
hDst.write("# Lexique simplifié pour Grammalecte v{}\n# Licence : MPL v2.0\n\n".format(version))
| < < < < < | 522 523 524 525 526 527 528 529 530 531 532 533 534 535 |
hDst.write(oFlex.__str__(oStatsLex))
def writeGrammarCheckerLexicon (self, spfDst, version):
echo(' * Lexique simplifié >> [ {} ] '.format(spfDst))
with open(spfDst[:-4]+".lex", 'w', encoding='utf-8', newline="\n") as hDst:
hDst.write(MPLHEADER)
hDst.write("# Lexique simplifié pour Grammalecte v{}\n# Licence : MPL v2.0\n\n".format(version))
hDst.write(Flexion.simpleHeader())
for oFlex in self.lFlexions:
hDst.write(oFlex.getGrammarCheckerRepr())
def createFiles (self, spDst, lDictVars, nMode, bSimplified):
sDicName = PREFIX_DICT_PATH + self.sVersion
spDic = spDst + '/' + sDicName
|
| ︙ | ︙ |
Modified graphspell/dawg.py from [70e7a3c81c] to [12eff6a146].
| ︙ | ︙ | |||
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
import re
import traceback
from . import str_transform as st
from .progressbar import ProgressBar
def readFile (spf):
"generator: read file <spf> and return for each line a list of elements separated by a tabulation."
print(" < Read lexicon: " + spf)
if os.path.isfile(spf):
with open(spf, "r", encoding="utf-8") as hSrc:
for sLine in hSrc:
sLine = sLine.strip()
| > > > > > > > | > > > | 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import re
import traceback
from . import str_transform as st
from .progressbar import ProgressBar
dLexiconData = {}
def readFile (spf):
"generator: read file <spf> and return for each line a list of elements separated by a tabulation."
print(" < Read lexicon: " + spf)
if os.path.isfile(spf):
dLexiconData.clear()
with open(spf, "r", encoding="utf-8") as hSrc:
for sLine in hSrc:
sLine = sLine.strip()
if sLine.startswith("##") :
m = re.match("## *(\\w+) *:(.*)$", sLine)
if m:
dLexiconData[m.group(1)] = m.group(2).strip()
elif sLine and not sLine.startswith("#"):
yield sLine.split("\t")
if dLexiconData:
print("Data from dictionary:")
print(dLexiconData)
else:
raise OSError("# Error. File not found or not loadable: " + spf)
class DAWG:
"""DIRECT ACYCLIC WORD GRAPH"""
|
| ︙ | ︙ | |||
118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
+ [ (dTag[tag]+nChar+nAff, dTagOccur[tag]) for tag in dTag ] )
self.sFileName = src if type(src) is str else "[None]"
self.sLangCode = sLangCode
self.sLangName = sLangName
self.sDicName = sDicName
self.sDescription = sDescription
self.nEntry = len(lWord)
self.aPreviousEntry = []
DawgNode.resetNextId()
self.oRoot = DawgNode()
self.lUncheckedNodes = [] # list of nodes that have not been checked for duplication.
self.lMinimizedNodes = {} # list of unique nodes that have been checked for duplication.
self.lSortedNodes = [] # version 2 and 3
| > > > > > | 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
+ [ (dTag[tag]+nChar+nAff, dTagOccur[tag]) for tag in dTag ] )
self.sFileName = src if type(src) is str else "[None]"
self.sLangCode = sLangCode
self.sLangName = sLangName
self.sDicName = sDicName
self.sDescription = sDescription
if dLexiconData:
self.sLangCode = dLexiconData.get("LangCode", self.sLangCode)
self.sLangName = dLexiconData.get("LangName", self.sLangName)
self.sDicName = dLexiconData.get("DicName", self.sDicName)
self.sDescription = dLexiconData.get("Description", self.sDescription)
self.nEntry = len(lWord)
self.aPreviousEntry = []
DawgNode.resetNextId()
self.oRoot = DawgNode()
self.lUncheckedNodes = [] # list of nodes that have not been checked for duplication.
self.lMinimizedNodes = {} # list of unique nodes that have been checked for duplication.
self.lSortedNodes = [] # version 2 and 3
|
| ︙ | ︙ |
Modified lexicons/French.lex from [b69ae271b0] to [af55bd82c0].
1 2 3 4 5 6 7 | # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. # Lexique simplifié pour Grammalecte v7.0 # Licence : MPL v2.0 | < < < < < < | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 | # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. # Lexique simplifié pour Grammalecte v7.0 # Licence : MPL v2.0 # :POS ;LEX ~SEM =FQ /DIC de de :G:D:e:i/* de de :G:R:Rv/* et et :G:Cc/* à à :G:R:Rv/* des des :G:D:e:p/* du du :G:D:m:s/* |
| ︙ | ︙ |