Grammalecte  Changes On Branch cf5aecc33fd56317

Changes In Branch graphspell Through [cf5aecc33f] Excluding Merge-Ins

This is equivalent to a diff from d27b7d08ff to cf5aecc33f

2017-12-25
13:19
[build] merge graphspell: graphspell as independant package check-in: 37ee1a1b0d user: olr tags: trunk, build
2017-12-24
18:39
[build][py] move files from gc_core to graphspell check-in: bb8356bd7d user: olr tags: build, graphspell
18:15
[build] modify imports check-in: cf5aecc33f user: olr tags: build, graphspell
17:50
[build] graphspell as separate package check-in: ab436f24fc user: olr tags: build, graphspell
2017-12-22
15:08
[fr] formateur de texte: tiret en début de paragraphe avant points check-in: d27b7d08ff user: olr tags: trunk, fr
09:12
[fx] new description for WebExtension (necessary to be found via search engine) check-in: c008f61541 user: olr tags: trunk, fx

Modified gc_core/js/lang_core/gc_engine.js from [a71cf52ea3] to [5d510d771f].

318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
    },

    //// Initialization

    load: function (sContext="JavaScript", sPath="") {
        try {
            if (typeof(require) !== 'undefined') {
                var ibdawg = require("resource://grammalecte/ibdawg.js");
                _oDict = new ibdawg.IBDAWG("${dic_name}.json");
            } else {
                _oDict = new IBDAWG("${dic_name}.json", sPath);
            }
            _sAppContext = sContext;
            _dOptions = gc_options.getOptions(sContext).gl_shallowCopy();     // duplication necessary, to be able to reset to default
        }







|







318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
    },

    //// Initialization

    load: function (sContext="JavaScript", sPath="") {
        try {
            if (typeof(require) !== 'undefined') {
                var ibdawg = require("resource://grammalecte/graphspell/ibdawg.js");
                _oDict = new ibdawg.IBDAWG("${dic_name}.json");
            } else {
                _oDict = new IBDAWG("${dic_name}.json", sPath);
            }
            _sAppContext = sContext;
            _dOptions = gc_options.getOptions(sContext).gl_shallowCopy();     // duplication necessary, to be able to reset to default
        }

Modified gc_core/py/lang_core/gc_engine.py from [f3dfb392d0] to [e1c3ad1859].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# Grammalecte
# Grammar checker engine

import re
import sys
import os
import traceback
#import unicodedata
from itertools import chain

from ..ibdawg import IBDAWG
from ..echo import echo
from . import gc_options


__all__ = [ "lang", "locales", "pkg", "name", "version", "author", \
            "load", "parse", "getDictionary", \
            "setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \
            "ignoreRule", "resetIgnoreRules", "reactivateRule", "listRules", "displayRules" ]










|
|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# Grammalecte
# Grammar checker engine

import re
import sys
import os
import traceback
#import unicodedata
from itertools import chain

from ..graphspell.ibdawg import IBDAWG
from ..graphspell.echo import echo
from . import gc_options


__all__ = [ "lang", "locales", "pkg", "name", "version", "author", \
            "load", "parse", "getDictionary", \
            "setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \
            "ignoreRule", "resetIgnoreRules", "reactivateRule", "listRules", "displayRules" ]

Modified gc_lang/fr/webext/gce_worker.js from [666eb5f433] to [af9c897bd6].

29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
"use strict";


//console.log("[Worker] GC Engine Worker [start]");
//console.log(self);

importScripts("grammalecte/helpers.js");
importScripts("grammalecte/str_transform.js");
importScripts("grammalecte/char_player.js");
importScripts("grammalecte/ibdawg.js");
importScripts("grammalecte/text.js");
importScripts("grammalecte/tokenizer.js");
importScripts("grammalecte/fr/conj.js");
importScripts("grammalecte/fr/mfsp.js");
importScripts("grammalecte/fr/phonet.js");
importScripts("grammalecte/fr/cregex.js");
importScripts("grammalecte/fr/gc_options.js");
importScripts("grammalecte/fr/gc_rules.js");
importScripts("grammalecte/fr/gc_engine.js");







|
|
|

|







29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
"use strict";


//console.log("[Worker] GC Engine Worker [start]");
//console.log(self);

importScripts("grammalecte/helpers.js");
importScripts("grammalecte/graphspell/str_transform.js");
importScripts("grammalecte/graphspell/char_player.js");
importScripts("grammalecte/graphspell/ibdawg.js");
importScripts("grammalecte/text.js");
importScripts("grammalecte/graphspell/tokenizer.js");
importScripts("grammalecte/fr/conj.js");
importScripts("grammalecte/fr/mfsp.js");
importScripts("grammalecte/fr/phonet.js");
importScripts("grammalecte/fr/cregex.js");
importScripts("grammalecte/fr/gc_options.js");
importScripts("grammalecte/fr/gc_rules.js");
importScripts("grammalecte/fr/gc_engine.js");

Modified grammalecte-cli.py from [7a36d16e28] to [07800caa2b].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#!/usr/bin/env python3

import sys
import os.path
import argparse
import json

import grammalecte.fr as gce
import grammalecte.fr.lexicographe as lxg
import grammalecte.fr.textformatter as tf
import grammalecte.text as txt
import grammalecte.tokenizer as tkz
from grammalecte.echo import echo


_EXAMPLE = "Quoi ? Racontes ! Racontes-moi ! Bon sangg, parles ! Oui. Il y a des menteur partout. " \
           "Je suit sidéré par la brutales arrogance de cette homme-là. Quelle salopard ! Un escrocs de la pire espece. " \
           "Quant sera t’il châtiés pour ses mensonge ?             Merde ! J’en aie marre."

_HELP = """











|
|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#!/usr/bin/env python3

import sys
import os.path
import argparse
import json

import grammalecte.fr as gce
import grammalecte.fr.lexicographe as lxg
import grammalecte.fr.textformatter as tf
import grammalecte.text as txt
import grammalecte.graphspell.tokenizer as tkz
from grammalecte.graphspell.echo import echo


_EXAMPLE = "Quoi ? Racontes ! Racontes-moi ! Bon sangg, parles ! Oui. Il y a des menteur partout. " \
           "Je suit sidéré par la brutales arrogance de cette homme-là. Quelle salopard ! Un escrocs de la pire espece. " \
           "Quant sera t’il châtiés pour ses mensonge ?             Merde ! J’en aie marre."

_HELP = """

Modified grammalecte-server.py from [a304d7cf85] to [6dbdf10c60].

10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

from bottle import Bottle, run, request, response, template, static_file

import grammalecte.fr as gce
import grammalecte.fr.lexicographe as lxg
import grammalecte.fr.textformatter as tf
import grammalecte.text as txt
import grammalecte.tokenizer as tkz
from grammalecte.echo import echo


HOMEPAGE = """
<!DOCTYPE HTML>
<html>
    <head>
        <meta http-equiv="content-type" content="text/html; charset=UTF-8" />







|
|







10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

from bottle import Bottle, run, request, response, template, static_file

import grammalecte.fr as gce
import grammalecte.fr.lexicographe as lxg
import grammalecte.fr.textformatter as tf
import grammalecte.text as txt
import grammalecte.graphspell.tokenizer as tkz
from grammalecte.graphspell.echo import echo


HOMEPAGE = """
<!DOCTYPE HTML>
<html>
    <head>
        <meta http-equiv="content-type" content="text/html; charset=UTF-8" />

Modified helpers.py from [4468e2a847] to [b55cd82bf9].

1
2
3
4

5
6
7
8
9
10
11
# Useful tools

import os
import shutil

import zipfile

from string import Template


class cd:
    "Context manager for changing the current working directory"




>







1
2
3
4
5
6
7
8
9
10
11
12
# Useful tools

import os
import shutil
import errno
import zipfile

from string import Template


class cd:
    "Context manager for changing the current working directory"
53
54
55
56
57
58
59










60
61
62
63
64
65
66
def createCleanFolder (sp):
    "make an empty folder or erase its content if not empty"
    if not os.path.exists(sp):
        os.makedirs(sp, exist_ok=True)
    else:
        eraseFolder(sp)












def fileFile (spf, dVars):
    "return file <spf> as a text filed with variables from <dVars>"
    return Template(open(spf, "r", encoding="utf-8").read()).safe_substitute(dVars)


def copyAndFileTemplate (spfSrc, spfDst, dVars):







>
>
>
>
>
>
>
>
>
>







54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
def createCleanFolder (sp):
    "make an empty folder or erase its content if not empty"
    if not os.path.exists(sp):
        os.makedirs(sp, exist_ok=True)
    else:
        eraseFolder(sp)


def copyFolderContent (spSrc, spDst):
    try:
        shutil.copytree(spSrc, spDst)
    except OSError as e:
        if e.errno == errno.ENOTDIR:
            shutil.copy(spSrc, spDst)
        else:
            raise


def fileFile (spf, dVars):
    "return file <spf> as a text filed with variables from <dVars>"
    return Template(open(spf, "r", encoding="utf-8").read()).safe_substitute(dVars)


def copyAndFileTemplate (spfSrc, spfDst, dVars):

Modified lex_build.py from [c786502779] to [57c70320f0].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!python3

# Lexicon builder

import argparse
from distutils import dir_util

import grammalecte.dawg as fsa
from grammalecte.ibdawg import IBDAWG


def build (spfSrc, sLangName, sDicName, bJSON=False, cStemmingMethod="S", nCompressMethod=1):
    "transform a text lexicon as a binary indexable dictionary"
    oDAWG = fsa.DAWG(spfSrc, sLangName, cStemmingMethod)
    dir_util.mkpath("grammalecte/_dictionaries")
    oDAWG.writeInfo("grammalecte/_dictionaries/" + sDicName + ".info.txt")
    oDAWG.createBinary("grammalecte/_dictionaries/" + sDicName + ".bdic", int(nCompressMethod))
    if bJSON:
        dir_util.mkpath("grammalecte-js/_dictionaries")
        oDic = IBDAWG(sDicName + ".bdic")
        oDic.writeAsJSObject("grammalecte-js/_dictionaries/" + sDicName + ".json", bBinaryDictAsHexString=True)


def main ():
    xParser = argparse.ArgumentParser()
    xParser.add_argument("src_lexicon", type=str, help="path and file name of the source lexicon")
    xParser.add_argument("lang_name", type=str, help="language name")
    xParser.add_argument("dic_name", type=str, help="dictionary file name (without extension)")







|
|





|
|
|

|

|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!python3

# Lexicon builder

import argparse
from distutils import dir_util

import graphspell.dawg as fsa
from graphspell.ibdawg import IBDAWG


def build (spfSrc, sLangName, sDicName, bJSON=False, cStemmingMethod="S", nCompressMethod=1):
    "transform a text lexicon as a binary indexable dictionary"
    oDAWG = fsa.DAWG(spfSrc, sLangName, cStemmingMethod)
    dir_util.mkpath("graphspell/_dictionaries")
    oDAWG.writeInfo("graphspell/_dictionaries/" + sDicName + ".info.txt")
    oDAWG.createBinary("graphspell/_dictionaries/" + sDicName + ".bdic", int(nCompressMethod))
    if bJSON:
        dir_util.mkpath("graphspell-js/_dictionaries")
        oDic = IBDAWG(sDicName + ".bdic")
        oDic.writeAsJSObject("graphspell-js/_dictionaries/" + sDicName + ".json", bBinaryDictAsHexString=True)


def main ():
    xParser = argparse.ArgumentParser()
    xParser.add_argument("src_lexicon", type=str, help="path and file name of the source lexicon")
    xParser.add_argument("lang_name", type=str, help="language name")
    xParser.add_argument("dic_name", type=str, help="dictionary file name (without extension)")

Modified make.py from [29a87bfcf1] to [b659589000].

272
273
274
275
276
277
278



















279
280
281
282
283
284
285
        except ImportError:
            print("# No complementary builder <build.py> in folder gc_lang/"+sLang)
        else:
            build_module.build(sLang, dVars, spLangPack)

    return dVars['version']





















def main ():
    print("Python: " + sys.version)
    xParser = argparse.ArgumentParser()
    xParser.add_argument("lang", type=str, nargs='+', help="lang project to generate (name of folder in /lang)")
    xParser.add_argument("-b", "--build_data", help="launch build_data.py (part 1 and 2)", action="store_true")
    xParser.add_argument("-bb", "--build_data_before", help="launch build_data.py (only part 1: before dictionary building)", action="store_true")







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
        except ImportError:
            print("# No complementary builder <build.py> in folder gc_lang/"+sLang)
        else:
            build_module.build(sLang, dVars, spLangPack)

    return dVars['version']


def copyGraphspellCore ():
    helpers.createCleanFolder("grammalecte/graphspell")
    helpers.createCleanFolder("grammalecte-js/graphspell")
    dir_util.mkpath("grammalecte/graphspell/_dictionaries")
    dir_util.mkpath("grammalecte-js/graphspell/_dictionaries")
    for sf in os.listdir("graphspell"):
        if not os.path.isdir("graphspell/"+sf):
            file_util.copy_file("graphspell/"+sf, "grammalecte/graphspell")
    for sf in os.listdir("graphspell-js"):
        if not os.path.isdir("graphspell-js/"+sf):
            file_util.copy_file("graphspell-js/"+sf, "grammalecte-js/graphspell")


def copyGraphspellDictionary (sDicName):
    file_util.copy_file("graphspell/_dictionaries/"+sDicName.strip()+".bdic", "grammalecte/graphspell/_dictionaries")
    file_util.copy_file("graphspell/_dictionaries/"+sDicName.strip()+".info.txt", "grammalecte/graphspell/_dictionaries")
    file_util.copy_file("graphspell-js/_dictionaries/"+sDicName.strip()+".json", "grammalecte-js/graphspell/_dictionaries")


def main ():
    print("Python: " + sys.version)
    xParser = argparse.ArgumentParser()
    xParser.add_argument("lang", type=str, nargs='+', help="lang project to generate (name of folder in /lang)")
    xParser.add_argument("-b", "--build_data", help="launch build_data.py (part 1 and 2)", action="store_true")
    xParser.add_argument("-bb", "--build_data_before", help="launch build_data.py (only part 1: before dictionary building)", action="store_true")
298
299
300
301
302
303
304


305
306
307
308
309
310
311
    if xArgs.build_data:
        xArgs.build_data_before = True
        xArgs.build_data_after = True

    dir_util.mkpath("_build")
    dir_util.mkpath("grammalecte")
    dir_util.mkpath("grammalecte-js")



    for sLang in xArgs.lang:
        if os.path.exists("gc_lang/"+sLang) and os.path.isdir("gc_lang/"+sLang):
            xConfig = getConfig(sLang)
            dVars = xConfig._sections['args']

            # copy gc_core common file in Python now to be able to compile dictionary if required







>
>







317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
    if xArgs.build_data:
        xArgs.build_data_before = True
        xArgs.build_data_after = True

    dir_util.mkpath("_build")
    dir_util.mkpath("grammalecte")
    dir_util.mkpath("grammalecte-js")

    copyGraphspellCore()

    for sLang in xArgs.lang:
        if os.path.exists("gc_lang/"+sLang) and os.path.isdir("gc_lang/"+sLang):
            xConfig = getConfig(sLang)
            dVars = xConfig._sections['args']

            # copy gc_core common file in Python now to be able to compile dictionary if required
320
321
322
323
324
325
326
327
328
329
330
331
332




333
334
335
336
337
338
339
                # lang data
                try:
                    build_data_module = importlib.import_module("gc_lang."+sLang+".build_data")
                except ImportError:
                    print("# Error. Couldn’t import file build_data.py in folder gc_lang/"+sLang)
            if build_data_module and xArgs.build_data_before:
                build_data_module.before('gc_lang/'+sLang, dVars, xArgs.javascript)
            if xArgs.dict or not os.path.exists("grammalecte/_dictionaries"):
                import lex_build
                lex_build.build(dVars['lexicon_src'], dVars['lang_name'], dVars['dic_name'], xArgs.javascript, dVars['stemming_method'], int(dVars['fsa_method']))
            if build_data_module and xArgs.build_data_after:
                build_data_module.after('gc_lang/'+sLang, dVars, xArgs.javascript)





            # make
            sVersion = create(sLang, xConfig, xArgs.install, xArgs.javascript, )

            # tests
            if xArgs.tests or xArgs.perf or xArgs.perf_memo:
                print("> Running tests")
                try:







|





>
>
>
>







341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
                # lang data
                try:
                    build_data_module = importlib.import_module("gc_lang."+sLang+".build_data")
                except ImportError:
                    print("# Error. Couldn’t import file build_data.py in folder gc_lang/"+sLang)
            if build_data_module and xArgs.build_data_before:
                build_data_module.before('gc_lang/'+sLang, dVars, xArgs.javascript)
            if xArgs.dict:
                import lex_build
                lex_build.build(dVars['lexicon_src'], dVars['lang_name'], dVars['dic_name'], xArgs.javascript, dVars['stemming_method'], int(dVars['fsa_method']))
            if build_data_module and xArgs.build_data_after:
                build_data_module.after('gc_lang/'+sLang, dVars, xArgs.javascript)

            # copy dictionaries from Graphspell
            for sDicName in dVars['dic_name'].split(","):
                copyGraphspellDictionary(sDicName)

            # make
            sVersion = create(sLang, xConfig, xArgs.install, xArgs.javascript, )

            # tests
            if xArgs.tests or xArgs.perf or xArgs.perf_memo:
                print("> Running tests")
                try:

Modified reader.py from [0c60e1da75] to [66f5eb17ae].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#!python3
# Just a file for one-shot scripts

import os
import sys
import re

import grammalecte.ibdawg as ibdawg

oDict = ibdawg.IBDAWG("French.bdic")


def readFile (spf):
    if os.path.isfile(spf):
        with open(spf, "r", encoding="utf-8") as hSrc:







|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#!python3
# Just a file for one-shot scripts

import os
import sys
import re

import graphspell.ibdawg as ibdawg

oDict = ibdawg.IBDAWG("French.bdic")


def readFile (spf):
    if os.path.isfile(spf):
        with open(spf, "r", encoding="utf-8") as hSrc: