Grammalecte  Check-in [831b79d96c]

Overview
Comment:[build][fx][tb][lo] description field for dictionaries
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | build | tb | fx | lo | comdic
Files: files | file ages | folders
SHA3-256: 831b79d96cbb5b06ca6695b7fc5dc1915c2fd2a7ca74ad526d2b9f8b9e3b40cf
User & Date: olr on 2019-01-11 16:37:03
Other Links: branch diff | manifest | tags
Context
2019-01-16
15:57
[build][graphspell] dawg builder: use data from lexicon when found check-in: b71cbd8aad user: olr tags: build, graphspell, comdic
2019-01-11
16:37
[build][fx][tb][lo] description field for dictionaries check-in: 831b79d96c user: olr tags: build, tb, fx, lo, comdic
2019-01-09
17:22
merge trunk check-in: 096875de48 user: olr tags: comdic
Changes

Modified gc_lang/fr/config.ini from [aee238173d] to [c145aeaae2].

13
14
15
16
17
18
19

20
21
22
23
24
25
26
27

28
29
30
31

32
33
34
35

36
37
38
39
40
41
42
description = Correcteur grammatical pour le français.
extras = README_fr.txt
logo = logo.png

# main dictionary
lexicon_src = lexicons/French.lex
dic_filenames = fr-allvars,fr-classic,fr-reform

dic_name = Français,Français (Classique/Moderne),Français (Réforme 1990)
dic_filter = ,[*CMPX]$,[*RPX]$
dic_default_filename_py = fr-allvars
dic_default_filename_js = fr-allvars
# extended dictionary
lexicon_extended_src = lexicons/French.extended.lex
dic_extended_filename = fr.extended
dic_extended_name = Français - dictionnaire étendu

# community dictionary
lexicon_community_src = lexicons/French.community.lex
dic_community_filename = fr.community
dic_community_name = Français - dictionnaire communautaire

# personal dictionary
lexicon_personal_src = lexicons/French.personal.lex
dic_personal_filename = fr.personal
dic_personal_name = Français - dictionnaire personnel

# Finite state automaton compression: 1, 2 (experimental) or 3 (experimental)
fsa_method = 1
# stemming method: S for suffixes only, A for prefixes and suffixes
stemming_method = S

# LibreOffice
unopkg = C:/Program Files/LibreOffice/program/unopkg.com







>
|






|
>



|
>



|
>







13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
description = Correcteur grammatical pour le français.
extras = README_fr.txt
logo = logo.png

# main dictionary
lexicon_src = lexicons/French.lex
dic_filenames = fr-allvars,fr-classic,fr-reform
dic_name = fr-allvars,fr-classic,fr-reform
dic_description = Français (Toutes variantes),Français (Classique),Français (Réforme 1990)
dic_filter = ,[*CMPX]$,[*RPX]$
dic_default_filename_py = fr-allvars
dic_default_filename_js = fr-allvars
# extended dictionary
lexicon_extended_src = lexicons/French.extended.lex
dic_extended_filename = fr.extended
dic_extended_name = fr.extended
dic_extended_description = Français - dictionnaire étendu
# community dictionary
lexicon_community_src = lexicons/French.community.lex
dic_community_filename = fr.community
dic_community_name = fr.community
dic_community_description = Français - dictionnaire communautaire
# personal dictionary
lexicon_personal_src = lexicons/French.personal.lex
dic_personal_filename = fr.personal
dic_personal_name = fr.personal
dic_personal_description = Français - dictionnaire personnel
# Finite state automaton compression: 1, 2 (experimental) or 3 (experimental)
fsa_method = 1
# stemming method: S for suffixes only, A for prefixes and suffixes
stemming_method = S

# LibreOffice
unopkg = C:/Program Files/LibreOffice/program/unopkg.com

Modified gc_lang/fr/oxt/DictOptions/LexiconEditor.py from [30177fb38e] to [4e150dc058].

407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
    @_waitPointer
    def saveLexicon (self):
        xGridDataModel = self.xGridModelLex.GridDataModel
        lEntry = []
        for i in range(xGridDataModel.RowCount):
            lEntry.append(xGridDataModel.getRowData(i))
        if lEntry:
            oDAWG = dawg.DAWG(lEntry, "S", "fr", "Français", "fr.personal")
            self.oPersonalDicJSON = oDAWG.getBinaryAsJSON()
            self.xOptionNode.setPropertyValue("personal_dic", json.dumps(self.oPersonalDicJSON, ensure_ascii=False))
            self.xSettingNode.commitChanges()
            self.xNumDic.Label = str(self.oPersonalDicJSON["nEntry"])
            self.xDateDic.Label = self.oPersonalDicJSON["sDate"]
        else:
            self.xOptionNode.setPropertyValue("personal_dic", "")







|







407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
    @_waitPointer
    def saveLexicon (self):
        xGridDataModel = self.xGridModelLex.GridDataModel
        lEntry = []
        for i in range(xGridDataModel.RowCount):
            lEntry.append(xGridDataModel.getRowData(i))
        if lEntry:
            oDAWG = dawg.DAWG(lEntry, "S", "fr", "Français", "fr.personal", "Dictionnaire personnel")
            self.oPersonalDicJSON = oDAWG.getBinaryAsJSON()
            self.xOptionNode.setPropertyValue("personal_dic", json.dumps(self.oPersonalDicJSON, ensure_ascii=False))
            self.xSettingNode.commitChanges()
            self.xNumDic.Label = str(self.oPersonalDicJSON["nEntry"])
            self.xDateDic.Label = self.oPersonalDicJSON["sDate"]
        else:
            self.xOptionNode.setPropertyValue("personal_dic", "")

Modified gc_lang/fr/oxt/Dictionnaires/dictionaries/fr-classique.aff from [a9c2422fab] to [5be819083a].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

# AFFIXES DU DICTIONNAIRE ORTHOGRAPHIQUE FRANÇAIS “CLASSIQUE” v7.0
# par Olivier R. -- licence MPL 2.0
# Généré le 09-01-2019 à 18:08
# Pour améliorer le dictionnaire, allez sur https://grammalecte.net/



SET UTF-8

WORDCHARS -’'1234567890.






|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

# AFFIXES DU DICTIONNAIRE ORTHOGRAPHIQUE FRANÇAIS “CLASSIQUE” v7.0
# par Olivier R. -- licence MPL 2.0
# Généré le 11-01-2019 à 17:14
# Pour améliorer le dictionnaire, allez sur https://grammalecte.net/



SET UTF-8

WORDCHARS -’'1234567890.

Modified gc_lang/fr/oxt/Dictionnaires/dictionaries/fr-moderne.aff from [cf1eb7064c] to [93c2cdd8ee].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

# AFFIXES DU DICTIONNAIRE ORTHOGRAPHIQUE FRANÇAIS “MODERNE” v7.0
# par Olivier R. -- licence MPL 2.0
# Généré le 09-01-2019 à 18:08
# Pour améliorer le dictionnaire, allez sur https://grammalecte.net/



SET UTF-8

WORDCHARS -’'1234567890.






|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

# AFFIXES DU DICTIONNAIRE ORTHOGRAPHIQUE FRANÇAIS “MODERNE” v7.0
# par Olivier R. -- licence MPL 2.0
# Généré le 11-01-2019 à 17:14
# Pour améliorer le dictionnaire, allez sur https://grammalecte.net/



SET UTF-8

WORDCHARS -’'1234567890.

Modified gc_lang/fr/oxt/Dictionnaires/dictionaries/fr-reforme1990.aff from [b9b9132d08] to [84824df4fa].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

# AFFIXES DU DICTIONNAIRE ORTHOGRAPHIQUE FRANÇAIS “RÉFORME 1990” v7.0
# par Olivier R. -- licence MPL 2.0
# Généré le 09-01-2019 à 18:08
# Pour améliorer le dictionnaire, allez sur https://grammalecte.net/



SET UTF-8

WORDCHARS -’'1234567890.






|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

# AFFIXES DU DICTIONNAIRE ORTHOGRAPHIQUE FRANÇAIS “RÉFORME 1990” v7.0
# par Olivier R. -- licence MPL 2.0
# Généré le 11-01-2019 à 17:14
# Pour améliorer le dictionnaire, allez sur https://grammalecte.net/



SET UTF-8

WORDCHARS -’'1234567890.

Modified gc_lang/fr/oxt/Dictionnaires/dictionaries/fr-toutesvariantes.aff from [b23332b58c] to [3297933a4a].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

# AFFIXES DU DICTIONNAIRE ORTHOGRAPHIQUE FRANÇAIS “TOUTES VARIANTES” v7.0
# par Olivier R. -- licence MPL 2.0
# Généré le 09-01-2019 à 18:08
# Pour améliorer le dictionnaire, allez sur https://grammalecte.net/



SET UTF-8

WORDCHARS -’'1234567890.






|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

# AFFIXES DU DICTIONNAIRE ORTHOGRAPHIQUE FRANÇAIS “TOUTES VARIANTES” v7.0
# par Olivier R. -- licence MPL 2.0
# Généré le 11-01-2019 à 17:14
# Pour améliorer le dictionnaire, allez sur https://grammalecte.net/



SET UTF-8

WORDCHARS -’'1234567890.

Modified gc_lang/fr/tb/content/lex_editor.js from [0814cc49a1] to [52568b3e47].

474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
        document.getElementById("import_button").addEventListener("click", () => { this.import(); }, false);
    },

    build: function () {
        let xProgressNode = document.getElementById("wait_progress");
        let lEntry = oLexiconTable.getEntries();
        if (lEntry.length > 0) {
            let oDAWG = new DAWG(lEntry, "S", "fr", "Français", "fr.personal", xProgressNode);
            let oJSON = oDAWG.createBinaryJSON(1);
            oFileHandler.saveFile("fr.personal.json", JSON.stringify(oJSON));
            this.oIBDAWG = new IBDAWG(oJSON);
            this.setDictData(this.oIBDAWG.nEntry, this.oIBDAWG.sDate);
            //browser.runtime.sendMessage({ sCommand: "setDictionary", dParam: {sType: "personal", oDict: oJSON}, dInfo: {} });
            enableElement("export_button");
        } else {







|







474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
        document.getElementById("import_button").addEventListener("click", () => { this.import(); }, false);
    },

    build: function () {
        let xProgressNode = document.getElementById("wait_progress");
        let lEntry = oLexiconTable.getEntries();
        if (lEntry.length > 0) {
            let oDAWG = new DAWG(lEntry, "S", "fr", "Français", "fr.personal", "Dictionnaire personnel", xProgressNode);
            let oJSON = oDAWG.createBinaryJSON(1);
            oFileHandler.saveFile("fr.personal.json", JSON.stringify(oJSON));
            this.oIBDAWG = new IBDAWG(oJSON);
            this.setDictData(this.oIBDAWG.nEntry, this.oIBDAWG.sDate);
            //browser.runtime.sendMessage({ sCommand: "setDictionary", dParam: {sType: "personal", oDict: oJSON}, dInfo: {} });
            enableElement("export_button");
        } else {

Modified gc_lang/fr/webext/panel/lex_editor.js from [e8232416a9] to [380093a31f].

541
542
543
544
545
546
547
548

549
550
551
552
553
554
555
        browser.storage.local.set({ "dictionaries": this.oDictionaries });
    }
}

const oBinaryDict = {

    oIBDAWG: null,
    sName: null,


    load: function (sName) {
        console.log("lexicon editor, load: " + sName);
        this.sName = sName;
        let oJSON = oDictHandler.getDictionary(sName);
        if (oJSON) {
            this.parseDict(oJSON);







|
>







541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
        browser.storage.local.set({ "dictionaries": this.oDictionaries });
    }
}

const oBinaryDict = {

    oIBDAWG: null,
    sName: "",
    sDescription: "",

    load: function (sName) {
        console.log("lexicon editor, load: " + sName);
        this.sName = sName;
        let oJSON = oDictHandler.getDictionary(sName);
        if (oJSON) {
            this.parseDict(oJSON);
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
        document.getElementById("import_input").addEventListener("change", () => { this.import(); }, false);
    },

    build: function () {
        let xProgressNode = document.getElementById("wait_progress");
        let lEntry = oLexiconTable.getEntries();
        if (lEntry.length > 0) {
            let oDAWG = new DAWG(lEntry, "S", "fr", "Français", this.sName, xProgressNode);
            let oJSON = oDAWG.createBinaryJSON(1);
            oDictHandler.saveDictionary(this.sName, oJSON);
            this.oIBDAWG = new IBDAWG(oJSON);
            this.setDictData(this.oIBDAWG.nEntry, this.oIBDAWG.sDate);
        } else {
            oDictHandler.saveDictionary(this.sName, null);
            this.setDictData(0, "[néant]");







|







618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
        document.getElementById("import_input").addEventListener("change", () => { this.import(); }, false);
    },

    build: function () {
        let xProgressNode = document.getElementById("wait_progress");
        let lEntry = oLexiconTable.getEntries();
        if (lEntry.length > 0) {
            let oDAWG = new DAWG(lEntry, "S", "fr", "Français", this.sName, this.sDescription, xProgressNode);
            let oJSON = oDAWG.createBinaryJSON(1);
            oDictHandler.saveDictionary(this.sName, oJSON);
            this.oIBDAWG = new IBDAWG(oJSON);
            this.setDictData(this.oIBDAWG.nEntry, this.oIBDAWG.sDate);
        } else {
            oDictHandler.saveDictionary(this.sName, null);
            this.setDictData(0, "[néant]");

Modified lex_build.py from [346704203c] to [72263209a8].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!python3

# Lexicon builder

import argparse
from distutils import dir_util

import graphspell.dawg as fsa
from graphspell.ibdawg import IBDAWG


def build (spfSrc, sLangCode, sLangName, sfDict, bJSON=False, sDicName="", sFilter="", cStemmingMethod="S", nCompressMethod=1):
    "transform a text lexicon as a binary indexable dictionary"
    oDAWG = fsa.DAWG(spfSrc, cStemmingMethod, sLangCode, sLangName, sDicName, sFilter)
    dir_util.mkpath("graphspell/_dictionaries")
    oDAWG.writeInfo("graphspell/_dictionaries/" + sfDict + ".info.txt")
    oDAWG.writeBinary("graphspell/_dictionaries/" + sfDict + ".bdic", int(nCompressMethod))
    if bJSON:
        dir_util.mkpath("graphspell-js/_dictionaries")
        oDic = IBDAWG(sfDict + ".bdic")
        oDic.writeAsJSObject("graphspell-js/_dictionaries/" + sfDict + ".json", bBinaryDictAsHexString=True)


def main ():
    xParser = argparse.ArgumentParser()
    xParser.add_argument("src_lexicon", type=str, help="path and file name of the source lexicon")
    xParser.add_argument("lang_code", type=str, help="language code")
    xParser.add_argument("lang_name", type=str, help="language name")
    xParser.add_argument("dic_filename", type=str, help="dictionary file name (without extension)")
    xParser.add_argument("-js", "--json", help="Build dictionary in JSON", action="store_true")
    xParser.add_argument("-s", "--stemming", help="stemming method: S=suffixes, A=affixes, N=no stemming", type=str, choices=["S", "A", "N"], default="S")
    xParser.add_argument("-c", "--compress", help="compression method: 1, 2 (beta), 3, (beta)", type=int, choices=[1, 2, 3], default=1)
    xArgs = xParser.parse_args()
    build(xArgs.src_lexicon, xArgs.lang_code, xArgs.lang_name, xArgs.dic_filename, xArgs.json)
    

if __name__ == '__main__':
    main()











|

|




















|



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!python3

# Lexicon builder

import argparse
from distutils import dir_util

import graphspell.dawg as fsa
from graphspell.ibdawg import IBDAWG


def build (spfSrc, sLangCode, sLangName, sfDict, bJSON=False, sDicName="", sDescription="", sFilter="", cStemmingMethod="S", nCompressMethod=1):
    "transform a text lexicon as a binary indexable dictionary"
    oDAWG = fsa.DAWG(spfSrc, cStemmingMethod, sLangCode, sLangName, sDicName, sDescription, sFilter)
    dir_util.mkpath("graphspell/_dictionaries")
    oDAWG.writeInfo("graphspell/_dictionaries/" + sfDict + ".info.txt")
    oDAWG.writeBinary("graphspell/_dictionaries/" + sfDict + ".bdic", int(nCompressMethod))
    if bJSON:
        dir_util.mkpath("graphspell-js/_dictionaries")
        oDic = IBDAWG(sfDict + ".bdic")
        oDic.writeAsJSObject("graphspell-js/_dictionaries/" + sfDict + ".json", bBinaryDictAsHexString=True)


def main ():
    xParser = argparse.ArgumentParser()
    xParser.add_argument("src_lexicon", type=str, help="path and file name of the source lexicon")
    xParser.add_argument("lang_code", type=str, help="language code")
    xParser.add_argument("lang_name", type=str, help="language name")
    xParser.add_argument("dic_filename", type=str, help="dictionary file name (without extension)")
    xParser.add_argument("-js", "--json", help="Build dictionary in JSON", action="store_true")
    xParser.add_argument("-s", "--stemming", help="stemming method: S=suffixes, A=affixes, N=no stemming", type=str, choices=["S", "A", "N"], default="S")
    xParser.add_argument("-c", "--compress", help="compression method: 1, 2 (beta), 3, (beta)", type=int, choices=[1, 2, 3], default=1)
    xArgs = xParser.parse_args()
    build(xArgs.src_lexicon, xArgs.lang_code, xArgs.lang_name, xArgs.dic_filename, xArgs.json)


if __name__ == '__main__':
    main()

Modified make.py from [7a5afc5f1b] to [67b200350b].

346
347
348
349
350
351
352

353
354
355
356
357
358
359
360

361
362
363
364

365
366
367
368

369
370
371
372
373
374
375
376

def buildDictionary (dVars, sType, bJavaScript=False):
    "build binary dictionary for Graphspell from lexicons"
    if sType == "main":
        spfLexSrc = dVars['lexicon_src']
        lSfDictDst = dVars['dic_filenames'].split(",")
        lDicName = dVars['dic_name'].split(",")

        lFilter = dVars['dic_filter'].split(",")
        for sfDictDst, sDicName, sFilter in zip(lSfDictDst, lDicName, lFilter):
            lex_build.build(spfLexSrc, dVars['lang'], dVars['lang_name'], sfDictDst, bJavaScript, sDicName, sFilter, dVars['stemming_method'], int(dVars['fsa_method']))
    else:
        if sType == "extended":
            spfLexSrc = dVars['lexicon_extended_src']
            sfDictDst = dVars['dic_extended_filename']
            sDicName = dVars['dic_extended_name']

        elif sType == "community":
            spfLexSrc = dVars['lexicon_community_src']
            sfDictDst = dVars['dic_community_filename']
            sDicName = dVars['dic_community_name']

        elif sType == "personal":
            spfLexSrc = dVars['lexicon_personal_src']
            sfDictDst = dVars['dic_personal_filename']
            sDicName = dVars['dic_personal_name']

        lex_build.build(spfLexSrc, dVars['lang'], dVars['lang_name'], sfDictDst, bJavaScript, sDicName, "", dVars['stemming_method'], int(dVars['fsa_method']))



def main ():
    "build Grammalecte with requested options"
    print("Python: " + sys.version)
    xParser = argparse.ArgumentParser()







>

|
|





>




>




>
|







346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380

def buildDictionary (dVars, sType, bJavaScript=False):
    "build binary dictionary for Graphspell from lexicons"
    if sType == "main":
        spfLexSrc = dVars['lexicon_src']
        lSfDictDst = dVars['dic_filenames'].split(",")
        lDicName = dVars['dic_name'].split(",")
        lDescription = dVars['dic_description'].split(",")
        lFilter = dVars['dic_filter'].split(",")
        for sfDictDst, sDicName, sDescription, sFilter in zip(lSfDictDst, lDicName, lDescription, lFilter):
            lex_build.build(spfLexSrc, dVars['lang'], dVars['lang_name'], sfDictDst, bJavaScript, sDicName, sDescription, sFilter, dVars['stemming_method'], int(dVars['fsa_method']))
    else:
        if sType == "extended":
            spfLexSrc = dVars['lexicon_extended_src']
            sfDictDst = dVars['dic_extended_filename']
            sDicName = dVars['dic_extended_name']
            sDescription = dVars['dic_extended_description']
        elif sType == "community":
            spfLexSrc = dVars['lexicon_community_src']
            sfDictDst = dVars['dic_community_filename']
            sDicName = dVars['dic_community_name']
            sDescription = dVars['dic_community_description']
        elif sType == "personal":
            spfLexSrc = dVars['lexicon_personal_src']
            sfDictDst = dVars['dic_personal_filename']
            sDicName = dVars['dic_personal_name']
            sDescription = dVars['dic_personal_description']
        lex_build.build(spfLexSrc, dVars['lang'], dVars['lang_name'], sfDictDst, bJavaScript, sDicName, sDescription, "", dVars['stemming_method'], int(dVars['fsa_method']))



def main ():
    "build Grammalecte with requested options"
    print("Python: " + sys.version)
    xParser = argparse.ArgumentParser()