Grammalecte  Check-in [62304c0cd5]

Overview
Comment:[core] use spellchecker instead of ibdawg directly
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | multid
Files: files | file ages | folders
SHA3-256: 62304c0cd5d17eab2062f1c643570ae3ce324f1f84c70c44986d5b9c243fc2b4
User & Date: olr on 2018-02-13 14:26:26
Other Links: branch diff | manifest | tags
Context
2018-02-13
15:44
[core][cli][server][graphspell][fx] use spellchecker instead of ibdawg check-in: 18db5d65f0 user: olr tags: cli, core, server, fx, graphspell, multid
14:26
[core] use spellchecker instead of ibdawg directly check-in: 62304c0cd5 user: olr tags: core, multid
14:25
[graphspell][js] ibdawg: remove useless var in constructor check-in: 1b2e823757 user: olr tags: graphspell, multid
Changes

Modified compile_rules.py from [7fa2e820bf] to [9bd1433006].

52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
    s = re.sub(r"textarea0\(\s*", 'look(sx, ', s)                                           # textarea0(s)
    s = re.sub(r"before0_chk1\(\s*", 'look_chk1(dDA, sx[:m.start()], 0, ', s)               # before0_chk1(s)
    s = re.sub(r"after0_chk1\(\s*", 'look_chk1(dDA, sx[m.end():], m.end(), ', s)            # after0_chk1(s)
    s = re.sub(r"textarea0_chk1\(\s*", 'look_chk1(dDA, sx, 0, ', s)                         # textarea0_chk1(s)
    s = re.sub(r"isEndOfNG\(\s*\)", 'isEndOfNG(dDA, s[m.end():], m.end())', s)              # isEndOfNG(s)
    s = re.sub(r"isNextNotCOD\(\s*\)", 'isNextNotCOD(dDA, s[m.end():], m.end())', s)        # isNextNotCOD(s)
    s = re.sub(r"isNextVerb\(\s*\)", 'isNextVerb(dDA, s[m.end():], m.end())', s)            # isNextVerb(s)
    s = re.sub(r"\bspell *[(]", '_oDict.isValid(', s)
    s = re.sub(r"[\\](\d+)", 'm.group(\\1)', s)
    return s


def uppercase (s, sLang):
    "(flag i is not enough): converts regex to uppercase regex: 'foo' becomes '[Ff][Oo][Oo]', but 'Bar' becomes 'B[Aa][Rr]'."
    sUp = ""







|







52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
    s = re.sub(r"textarea0\(\s*", 'look(sx, ', s)                                           # textarea0(s)
    s = re.sub(r"before0_chk1\(\s*", 'look_chk1(dDA, sx[:m.start()], 0, ', s)               # before0_chk1(s)
    s = re.sub(r"after0_chk1\(\s*", 'look_chk1(dDA, sx[m.end():], m.end(), ', s)            # after0_chk1(s)
    s = re.sub(r"textarea0_chk1\(\s*", 'look_chk1(dDA, sx, 0, ', s)                         # textarea0_chk1(s)
    s = re.sub(r"isEndOfNG\(\s*\)", 'isEndOfNG(dDA, s[m.end():], m.end())', s)              # isEndOfNG(s)
    s = re.sub(r"isNextNotCOD\(\s*\)", 'isNextNotCOD(dDA, s[m.end():], m.end())', s)        # isNextNotCOD(s)
    s = re.sub(r"isNextVerb\(\s*\)", 'isNextVerb(dDA, s[m.end():], m.end())', s)            # isNextVerb(s)
    s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)
    s = re.sub(r"[\\](\d+)", 'm.group(\\1)', s)
    return s


def uppercase (s, sLang):
    "(flag i is not enough): converts regex to uppercase regex: 'foo' becomes '[Ff][Oo][Oo]', but 'Bar' becomes 'B[Aa][Rr]'."
    sUp = ""

Modified gc_core/js/lang_core/gc_engine.js from [08b8dd1900] to [dcb651b444].

34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
}


// data
let _sAppContext = "";                                  // what software is running
let _dOptions = null;
let _aIgnoredRules = new Set();
let _oDict = null;
let _oSpellChecker = null;
let _dAnalyses = new Map();                             // cache for data from dictionary


var gc_engine = {

    //// Informations







<







34
35
36
37
38
39
40

41
42
43
44
45
46
47
}


// data
let _sAppContext = "";                                  // what software is running
let _dOptions = null;
let _aIgnoredRules = new Set();

let _oSpellChecker = null;
let _dAnalyses = new Map();                             // cache for data from dictionary


var gc_engine = {

    //// Informations
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
    },

    //// Initialization

    load: function (sContext="JavaScript", sPath="") {
        try {
            if (typeof(require) !== 'undefined') {
                //var ibdawg = require("resource://grammalecte/graphspell/ibdawg.js");
                //_oDict = new ibdawg.IBDAWG("${dic_filename}.json");
                console.log("<resource:>");
                var spellchecker = require("resource://grammalecte/graphspell/spellchecker.js");
                _oSpellChecker = new spellchecker.Spellchecker("${lang}", "${dic_filename}.json");
            } else {
                //_oDict = new IBDAWG("${dic_filename}.json", sPath);
                console.log("no <resource:>");
                _oSpellChecker = new Spellchecker("${lang}", "${dic_filename}.json", sPath);
            }
            _sAppContext = sContext;
            _dOptions = gc_options.getOptions(sContext).gl_shallowCopy();     // duplication necessary, to be able to reset to default
        }
        catch (e) {
            helpers.logerror(e);
        }







<
<
<

|

<
<
|







318
319
320
321
322
323
324



325
326
327


328
329
330
331
332
333
334
335
    },

    //// Initialization

    load: function (sContext="JavaScript", sPath="") {
        try {
            if (typeof(require) !== 'undefined') {



                var spellchecker = require("resource://grammalecte/graphspell/spellchecker.js");
                _oSpellChecker = new spellchecker.SpellChecker("${lang}", "", "${dic_filename}.json");
            } else {


                _oSpellChecker = new SpellChecker("${lang}", sPath, "${dic_filename}.json");
            }
            _sAppContext = sContext;
            _dOptions = gc_options.getOptions(sContext).gl_shallowCopy();     // duplication necessary, to be able to reset to default
        }
        catch (e) {
            helpers.logerror(e);
        }

Modified gc_core/py/lang_core/gc_engine.py from [9fc11201d4] to [e36972dc16].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# Grammalecte
# Grammar checker engine

import re
import sys
import os
import traceback
#import unicodedata
from itertools import chain

from ..graphspell.ibdawg import IBDAWG
from ..graphspell.echo import echo
from . import gc_options


__all__ = [ "lang", "locales", "pkg", "name", "version", "author", \
            "load", "parse", "getDictionary", \
            "setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \










|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# Grammalecte
# Grammar checker engine

import re
import sys
import os
import traceback
#import unicodedata
from itertools import chain

from ..graphspell.spellchecker import SpellChecker
from ..graphspell.echo import echo
from . import gc_options


__all__ = [ "lang", "locales", "pkg", "name", "version", "author", \
            "load", "parse", "getDictionary", \
            "setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

_rules = None                               # module gc_rules

# data
_sAppContext = ""                           # what software is running
_dOptions = None
_aIgnoredRules = set()
_oDict = None
_dAnalyses = {}                             # cache for data from dictionary



#### Parsing

def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):







|







30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

_rules = None                               # module gc_rules

# data
_sAppContext = ""                           # what software is running
_dOptions = None
_aIgnoredRules = set()
_oSpellChecker = None
_dAnalyses = {}                             # cache for data from dictionary



#### Parsing

def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
    #import lightproof_handler_${implname} as opt
    _createError = _createWriterError
except ImportError:
    _createError = _createDictError


def load (sContext="Python"):
    global _oDict
    global _sAppContext
    global _dOptions
    try:
        _oDict = IBDAWG("${dic_filename}.bdic")
        _sAppContext = sContext
        _dOptions = dict(gc_options.getOptions(sContext))   # duplication necessary, to be able to reset to default
    except:
        traceback.print_exc()


def setOption (sOpt, bVal):







|



|







284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
    #import lightproof_handler_${implname} as opt
    _createError = _createWriterError
except ImportError:
    _createError = _createDictError


def load (sContext="Python"):
    global _oSpellChecker
    global _sAppContext
    global _dOptions
    try:
        _oSpellChecker = SpellChecker("${lang}", "${dic_filename}.bdic")
        _sAppContext = sContext
        _dOptions = dict(gc_options.getOptions(sContext))   # duplication necessary, to be able to reset to default
    except:
        traceback.print_exc()


def setOption (sOpt, bVal):
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344

def resetOptions ():
    global _dOptions
    _dOptions = dict(gc_options.getOptions(_sAppContext))


def getDictionary ():
    return _oDict


def _getRules (bParagraph):
    try:
        if not bParagraph:
            return _rules.lSentenceRules
        return _rules.lParagraphRules







|







330
331
332
333
334
335
336
337
338
339
340
341
342
343
344

def resetOptions ():
    global _dOptions
    _dOptions = dict(gc_options.getOptions(_sAppContext))


def getDictionary ():
    return _oSpellChecker


def _getRules (bParagraph):
    try:
        if not bParagraph:
            return _rules.lSentenceRules
        return _rules.lParagraphRules
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
    if tWord[0] in dDA:
        echo("DA: " + str(dDA[tWord[0]]))
    echo("FSA: " + str(_dAnalyses[tWord[1]]))
    return True


def _storeMorphFromFSA (sWord):
    "retrieves morphologies list from _oDict -> _dAnalyses"
    global _dAnalyses
    _dAnalyses[sWord] = _oDict.getMorph(sWord)
    return True  if _dAnalyses[sWord]  else False


def morph (dDA, tWord, sPattern, bStrict=True, bNoWord=False):
    "analyse a tuple (position, word), return True if sPattern in morphologies (disambiguation on)"
    if not tWord:
        return bNoWord







|

|







394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
    if tWord[0] in dDA:
        echo("DA: " + str(dDA[tWord[0]]))
    echo("FSA: " + str(_dAnalyses[tWord[1]]))
    return True


def _storeMorphFromFSA (sWord):
    "retrieves morphologies list from _oSpellChecker -> _dAnalyses"
    global _dAnalyses
    _dAnalyses[sWord] = _oSpellChecker.getMorph(sWord)
    return True  if _dAnalyses[sWord]  else False


def morph (dDA, tWord, sPattern, bStrict=True, bNoWord=False):
    "analyse a tuple (position, word), return True if sPattern in morphologies (disambiguation on)"
    if not tWord:
        return bNoWord

Modified gc_lang/fr/modules-js/gce_suggestions.js from [d955978cbc] to [0c31bc1a27].

206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
        } else if (sGender == ":f") {
            return suggFemPlur(sFlex);
        }
    }
    let aSugg = new Set();
    if (!sFlex.includes("-")) {
        if (sFlex.endsWith("l")) {
            if (sFlex.endsWith("al") && sFlex.length > 2 && _oDict.isValid(sFlex.slice(0,-1)+"ux")) {
                aSugg.add(sFlex.slice(0,-1)+"ux");
            }
            if (sFlex.endsWith("ail") && sFlex.length > 3 && _oDict.isValid(sFlex.slice(0,-2)+"ux")) {
                aSugg.add(sFlex.slice(0,-2)+"ux");
            }
        }
        if (_oDict.isValid(sFlex+"s")) {
            aSugg.add(sFlex+"s");
        }
        if (_oDict.isValid(sFlex+"x")) {
            aSugg.add(sFlex+"x");
        }
    }
    if (mfsp.hasMiscPlural(sFlex)) {
        mfsp.getMiscPlural(sFlex).forEach(function(x) { aSugg.add(x); });
    }
    if (aSugg.size > 0) {
        return Array.from(aSugg).join("|");
    }
    return "";
}

function suggSing (sFlex) {
    // returns singular forms assuming sFlex is plural
    if (sFlex.includes("-")) {
        return "";
    }
    let aSugg = new Set();
    if (sFlex.endsWith("ux")) {
        if (_oDict.isValid(sFlex.slice(0,-2)+"l")) {
            aSugg.add(sFlex.slice(0,-2)+"l");
        }
        if (_oDict.isValid(sFlex.slice(0,-2)+"il")) {
            aSugg.add(sFlex.slice(0,-2)+"il");
        }
    }
    if (_oDict.isValid(sFlex.slice(0,-1))) {
        aSugg.add(sFlex.slice(0,-1));
    }
    if (aSugg.size > 0) {
        return Array.from(aSugg).join("|");
    }
    return "";
}







|


|



|


|



















|


|



|







206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
        } else if (sGender == ":f") {
            return suggFemPlur(sFlex);
        }
    }
    let aSugg = new Set();
    if (!sFlex.includes("-")) {
        if (sFlex.endsWith("l")) {
            if (sFlex.endsWith("al") && sFlex.length > 2 && _oSpellChecker.isValid(sFlex.slice(0,-1)+"ux")) {
                aSugg.add(sFlex.slice(0,-1)+"ux");
            }
            if (sFlex.endsWith("ail") && sFlex.length > 3 && _oSpellChecker.isValid(sFlex.slice(0,-2)+"ux")) {
                aSugg.add(sFlex.slice(0,-2)+"ux");
            }
        }
        if (_oSpellChecker.isValid(sFlex+"s")) {
            aSugg.add(sFlex+"s");
        }
        if (_oSpellChecker.isValid(sFlex+"x")) {
            aSugg.add(sFlex+"x");
        }
    }
    if (mfsp.hasMiscPlural(sFlex)) {
        mfsp.getMiscPlural(sFlex).forEach(function(x) { aSugg.add(x); });
    }
    if (aSugg.size > 0) {
        return Array.from(aSugg).join("|");
    }
    return "";
}

function suggSing (sFlex) {
    // returns singular forms assuming sFlex is plural
    if (sFlex.includes("-")) {
        return "";
    }
    let aSugg = new Set();
    if (sFlex.endsWith("ux")) {
        if (_oSpellChecker.isValid(sFlex.slice(0,-2)+"l")) {
            aSugg.add(sFlex.slice(0,-2)+"l");
        }
        if (_oSpellChecker.isValid(sFlex.slice(0,-2)+"il")) {
            aSugg.add(sFlex.slice(0,-2)+"il");
        }
    }
    if (_oSpellChecker.isValid(sFlex.slice(0,-1))) {
        aSugg.add(sFlex.slice(0,-1));
    }
    if (aSugg.size > 0) {
        return Array.from(aSugg).join("|");
    }
    return "";
}

Modified gc_lang/fr/modules/gce_suggestions.py from [50fbeb414d] to [79835965e4].

155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
        if sGender == ":m":
            return suggMasPlur(sFlex)
        elif sGender == ":f":
            return suggFemPlur(sFlex)
    aSugg = set()
    if "-" not in sFlex:
        if sFlex.endswith("l"):
            if sFlex.endswith("al") and len(sFlex) > 2 and _oDict.isValid(sFlex[:-1]+"ux"):
                aSugg.add(sFlex[:-1]+"ux")
            if sFlex.endswith("ail") and len(sFlex) > 3 and _oDict.isValid(sFlex[:-2]+"ux"):
                aSugg.add(sFlex[:-2]+"ux")
        if _oDict.isValid(sFlex+"s"):
            aSugg.add(sFlex+"s")
        if _oDict.isValid(sFlex+"x"):
            aSugg.add(sFlex+"x")
    if mfsp.hasMiscPlural(sFlex):
        aSugg.update(mfsp.getMiscPlural(sFlex))
    if aSugg:
        return "|".join(aSugg)
    return ""


def suggSing (sFlex):
    "returns singular forms assuming sFlex is plural"
    if "-" in sFlex:
        return ""
    aSugg = set()
    if sFlex.endswith("ux"):
        if _oDict.isValid(sFlex[:-2]+"l"):
            aSugg.add(sFlex[:-2]+"l")
        if _oDict.isValid(sFlex[:-2]+"il"):
            aSugg.add(sFlex[:-2]+"il")
    if _oDict.isValid(sFlex[:-1]):
        aSugg.add(sFlex[:-1])
    if aSugg:
        return "|".join(aSugg)
    return ""


def suggMasSing (sFlex, bSuggSimil=False):







|

|

|

|














|

|

|







155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
        if sGender == ":m":
            return suggMasPlur(sFlex)
        elif sGender == ":f":
            return suggFemPlur(sFlex)
    aSugg = set()
    if "-" not in sFlex:
        if sFlex.endswith("l"):
            if sFlex.endswith("al") and len(sFlex) > 2 and _oSpellChecker.isValid(sFlex[:-1]+"ux"):
                aSugg.add(sFlex[:-1]+"ux")
            if sFlex.endswith("ail") and len(sFlex) > 3 and _oSpellChecker.isValid(sFlex[:-2]+"ux"):
                aSugg.add(sFlex[:-2]+"ux")
        if _oSpellChecker.isValid(sFlex+"s"):
            aSugg.add(sFlex+"s")
        if _oSpellChecker.isValid(sFlex+"x"):
            aSugg.add(sFlex+"x")
    if mfsp.hasMiscPlural(sFlex):
        aSugg.update(mfsp.getMiscPlural(sFlex))
    if aSugg:
        return "|".join(aSugg)
    return ""


def suggSing (sFlex):
    "returns singular forms assuming sFlex is plural"
    if "-" in sFlex:
        return ""
    aSugg = set()
    if sFlex.endswith("ux"):
        if _oSpellChecker.isValid(sFlex[:-2]+"l"):
            aSugg.add(sFlex[:-2]+"l")
        if _oSpellChecker.isValid(sFlex[:-2]+"il"):
            aSugg.add(sFlex[:-2]+"il")
    if _oSpellChecker.isValid(sFlex[:-1]):
        aSugg.add(sFlex[:-1])
    if aSugg:
        return "|".join(aSugg)
    return ""


def suggMasSing (sFlex, bSuggSimil=False):