Grammalecte  Check-in [78b5950c87]

Overview
Comment:[core][fr][graphspell] code cleaning: pylint
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | fr | core | graphspell
Files: files | file ages | folders
SHA3-256: 78b5950c87d2dfdbdce793c7f94a63fe1f1345f27945f4f22195be045df540f0
User & Date: olr on 2021-02-10 11:39:52
Other Links: manifest | tags
Context
2021-02-10
12:11
[fr] mise à jour du dictionnaire check-in: 53cc1dda5b user: olr tags: trunk, fr, v2.1.0
11:39
[core][fr][graphspell] code cleaning: pylint check-in: 78b5950c87 user: olr tags: trunk, fr, core, graphspell
10:14
[fr] faux positifs et ajustements check-in: 4698a4f99c user: olr tags: trunk, fr
Changes

Modified gc_core/py/grammar_checker.py from [50b054f72f] to [73cbb643ff].

Modified gc_core/py/lang_core/gc_engine.py from [ce7092fc44] to [2b36e73536].

233
234
235
236
237
238
239

240
241
242
243
244
245
246
    def __init__ (self, sText):
        self.sText = sText
        self.sText0 = sText
        self.sSentence = ""
        self.sSentence0 = ""
        self.nOffsetWithinParagraph = 0
        self.lTokens = []

        self.dTokenPos = {}         # {position: token}
        self.dTags = {}             # {position: tags}
        self.dError = {}            # {position: error}
        self.dSentenceError = {}    # {position: error} (for the current sentence only)
        self.dErrorPriority = {}    # {position: priority of the current error}

    def __str__ (self):







>







233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
    def __init__ (self, sText):
        self.sText = sText
        self.sText0 = sText
        self.sSentence = ""
        self.sSentence0 = ""
        self.nOffsetWithinParagraph = 0
        self.lTokens = []
        self.lTokens0 = []
        self.dTokenPos = {}         # {position: token}
        self.dTags = {}             # {position: tags}
        self.dError = {}            # {position: error}
        self.dSentenceError = {}    # {position: error} (for the current sentence only)
        self.dErrorPriority = {}    # {position: priority of the current error}

    def __str__ (self):
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
        dOpt = dOptions or gc_options.dOptions
        bShowRuleId = gc_options.dOptions.get('idrule', False)
        # parse paragraph
        try:
            self.parseText(self.sText, self.sText0, True, 0, sCountry, dOpt, bShowRuleId, bDebug, bContext)
        except:
            raise
        self.lTokens = None
        self.lTokens0 = None
        if bFullInfo:
            lParagraphErrors = list(self.dError.values())
            lSentences = []
            self.dSentenceError.clear()
        # parse sentences
        sText = self._getCleanText()
        for iStart, iEnd in text.getSentenceBoundaries(sText):







<
<







265
266
267
268
269
270
271


272
273
274
275
276
277
278
        dOpt = dOptions or gc_options.dOptions
        bShowRuleId = gc_options.dOptions.get('idrule', False)
        # parse paragraph
        try:
            self.parseText(self.sText, self.sText0, True, 0, sCountry, dOpt, bShowRuleId, bDebug, bContext)
        except:
            raise


        if bFullInfo:
            lParagraphErrors = list(self.dError.values())
            lSentences = []
            self.dSentenceError.clear()
        # parse sentences
        sText = self._getCleanText()
        for iStart, iEnd in text.getSentenceBoundaries(sText):
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
                        })
                        self.dSentenceError.clear()
                except:
                    raise
        if bFullInfo:
            # Grammar checking and sentence analysis
            return lParagraphErrors, lSentences
        else:
            # Grammar checking only
            return self.dError.values() # this is a view (iterable)

    def _getCleanText (self):
        sText = self.sText
        if " " in sText:
            sText = sText.replace(" ", ' ') # nbsp
        if " " in sText:
            sText = sText.replace(" ", ' ') # nnbsp







<
|
|







298
299
300
301
302
303
304

305
306
307
308
309
310
311
312
313
                        })
                        self.dSentenceError.clear()
                except:
                    raise
        if bFullInfo:
            # Grammar checking and sentence analysis
            return lParagraphErrors, lSentences

        # Grammar checking only
        return self.dError.values() # this is a view (iterable)

    def _getCleanText (self):
        sText = self.sText
        if " " in sText:
            sText = sText.replace(" ", ' ') # nbsp
        if " " in sText:
            sText = sText.replace(" ", ' ') # nnbsp
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
                if bUppercase:
                    sWhat = sWhat[0:1].upper() + sWhat[1:]
                self.lTokens[nTokenRewriteStart]["sNewValue"] = sWhat
            else:
                # several tokens
                lTokenValue = sWhat.split("|")
                if len(lTokenValue) != (nTokenRewriteEnd - nTokenRewriteStart + 1):
                    if (bDebug):
                        echo("Error. Text processor: number of replacements != number of tokens.")
                    return
                for i, sValue in zip(range(nTokenRewriteStart, nTokenRewriteEnd+1), lTokenValue):
                    if not sValue or sValue == "*":
                        self.lTokens[i]["bToRemove"] = True
                    else:
                        if bUppercase:
                            sValue = sValue[0:1].upper() + sValue[1:]
                        self.lTokens[i]["sNewValue"] = sValue

    def rewriteFromTags (self, bDebug=False):
        "rewrite the sentence, modify tokens, purge the token list"
        if bDebug:
            echo("REWRITE")
        lNewTokens = []
        lNewTokens0 = []
        nMergeUntil = 0
        dTokenMerger = {}
        for iToken, dToken in enumerate(self.lTokens):
            bKeepToken = True
            if dToken["sType"] != "INFO":
                if nMergeUntil and iToken <= nMergeUntil:
                    # token to merge







|















<







809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831

832
833
834
835
836
837
838
                if bUppercase:
                    sWhat = sWhat[0:1].upper() + sWhat[1:]
                self.lTokens[nTokenRewriteStart]["sNewValue"] = sWhat
            else:
                # several tokens
                lTokenValue = sWhat.split("|")
                if len(lTokenValue) != (nTokenRewriteEnd - nTokenRewriteStart + 1):
                    if bDebug:
                        echo("Error. Text processor: number of replacements != number of tokens.")
                    return
                for i, sValue in zip(range(nTokenRewriteStart, nTokenRewriteEnd+1), lTokenValue):
                    if not sValue or sValue == "*":
                        self.lTokens[i]["bToRemove"] = True
                    else:
                        if bUppercase:
                            sValue = sValue[0:1].upper() + sValue[1:]
                        self.lTokens[i]["sNewValue"] = sValue

    def rewriteFromTags (self, bDebug=False):
        "rewrite the sentence, modify tokens, purge the token list"
        if bDebug:
            echo("REWRITE")
        lNewTokens = []

        nMergeUntil = 0
        dTokenMerger = {}
        for iToken, dToken in enumerate(self.lTokens):
            bKeepToken = True
            if dToken["sType"] != "INFO":
                if nMergeUntil and iToken <= nMergeUntil:
                    # token to merge

Modified gc_core/py/lang_core/gc_functions.py from [66b95f87c6] to [069bca6c44].

1
2
3
4
5
6
7
8
9
10

11
12
13
14
15
16
17
18
19
20

21
22
23
24
25
26
27
"""
Grammar checking functions
"""

# generated code, do not edit
# template: <gc_core/py/lang_core/gc_functions.py>
# variables generated in <compile_rules.py>


import re


from . import gc_options
from ..graphspell.echo import echo


_sAppContext = "Python"         # what software is running
_oSpellChecker = None


def load (sContext, oSpellChecker):

    global _sAppContext
    global _oSpellChecker
    _sAppContext = sContext
    _oSpellChecker = oSpellChecker


#### common functions










>










>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
"""
Grammar checking functions
"""

# generated code, do not edit
# template: <gc_core/py/lang_core/gc_functions.py>
# variables generated in <compile_rules.py>


import re
import traceback

from . import gc_options
from ..graphspell.echo import echo


_sAppContext = "Python"         # what software is running
_oSpellChecker = None


def load (sContext, oSpellChecker):
    "mandatory first function to call: variables initialization"
    global _sAppContext
    global _oSpellChecker
    _sAppContext = sContext
    _oSpellChecker = oSpellChecker


#### common functions

Modified gc_core/py/lang_core/gc_options.py from [cafc10d4fb] to [dd3e857111].

12
13
14
15
16
17
18

19
20
21
22
23
24
25

dOptions = {}

_sAppContext = "Python"


def load (sContext="Python"):

    global dOptions
    global _sAppContext
    _sAppContext = sContext
    dOptions = getDefaultOptions(sContext)


def setOption (sOpt, bVal):







>







12
13
14
15
16
17
18
19
20
21
22
23
24
25
26

dOptions = {}

_sAppContext = "Python"


def load (sContext="Python"):
    "mandatory first function to call: variables initialization"
    global dOptions
    global _sAppContext
    _sAppContext = sContext
    dOptions = getDefaultOptions(sContext)


def setOption (sOpt, bVal):

Modified gc_core/py/lang_core/tests_core.py from [be13fd30fe] to [b93cf9942d].

73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
        if not os.path.exists(spfParsingTest):
            print(f"No file <gc_test.txt> in <{spHere}>")
            return
        with open(spfParsingTest, "r", encoding="utf-8") as hSrc:
            nUnexpectedErrors = 0
            nTestWithExpectedError = 0
            nTestWithExpectedErrorAndSugg = 0
            for i, sLine in enumerate( s for s in hSrc if not s.startswith("#") and s.strip() ):
                sLineNum = sLine[:10].strip()
                sLine = sLine[10:].strip()
                sOption = None
                m = zOption.search(sLine)
                if m:
                    sLine = sLine[m.end():]
                    sOption = m.group(1)







|







73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
        if not os.path.exists(spfParsingTest):
            print(f"No file <gc_test.txt> in <{spHere}>")
            return
        with open(spfParsingTest, "r", encoding="utf-8") as hSrc:
            nUnexpectedErrors = 0
            nTestWithExpectedError = 0
            nTestWithExpectedErrorAndSugg = 0
            for sLine in ( s for s in hSrc if not s.startswith("#") and s.strip() ):
                sLineNum = sLine[:10].strip()
                sLine = sLine[10:].strip()
                sOption = None
                m = zOption.search(sLine)
                if m:
                    sLine = sLine[m.end():]
                    sOption = m.group(1)
181
182
183
184
185
186
187

188
189
190
191
192
193
194
            lFoundSuggs = sFoundSuggs.split("|")
            if len(lExpectedSuggs) != len(lFoundSuggs) or set(lExpectedSuggs) != set(lFoundSuggs):
                return False
        return True


def purgeMessage (sMessage):

    for sToReplace, sReplacement in [
        ("l’ ", "l’"), ("d’ ", "d’"), ("n’ ", "n’"), ("j’ ", "j’"), ("m’ ", "m’"), ("t’ ", "t’"), ("s’ ", "s’"), ("qu’ ", "qu’"),
        ("L’ ", "L’"), ("D’ ", "D’"), ("N’ ", "N’"), ("J’ ", "J’"), ("M’ ", "M’"), ("T’ ", "T’"), ("S’ ", "S’"), ("QU’ ", "QU’")
    ]:
        sMessage = sMessage.replace(sToReplace, sReplacement)
    return sMessage








>







181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
            lFoundSuggs = sFoundSuggs.split("|")
            if len(lExpectedSuggs) != len(lFoundSuggs) or set(lExpectedSuggs) != set(lFoundSuggs):
                return False
        return True


def purgeMessage (sMessage):
    "remove space after elided French words"
    for sToReplace, sReplacement in [
        ("l’ ", "l’"), ("d’ ", "d’"), ("n’ ", "n’"), ("j’ ", "j’"), ("m’ ", "m’"), ("t’ ", "t’"), ("s’ ", "s’"), ("qu’ ", "qu’"),
        ("L’ ", "L’"), ("D’ ", "D’"), ("N’ ", "N’"), ("J’ ", "J’"), ("M’ ", "M’"), ("T’ ", "T’"), ("S’ ", "S’"), ("QU’ ", "QU’")
    ]:
        sMessage = sMessage.replace(sToReplace, sReplacement)
    return sMessage

Modified gc_lang/fr/modules-js/gce_suggestions.js from [dddc903a64] to [fd0125da80].

168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
                if (conj.hasConj(sStem, sTense, sWho)) {
                    aSugg.add(conj.getConj(sStem, sTense, sWho));
                }
            }
        }
        else {
            for (let [sTense, ] of lTenses) {
                for (let [sWho, ] of [ ...sMorph.matchAll(/:(?:[123][sp]|P|Y)/g) ]) {
                    if (conj.hasConj(sStem, sTense, sWho)) {
                        aSugg.add(conj.getConj(sStem, sTense, sWho));
                    }
                }
            }
        }
    }
    if (aSugg.size > 0) {
        return Array.from(aSugg).join("|");







|
|
|







168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
                if (conj.hasConj(sStem, sTense, sWho)) {
                    aSugg.add(conj.getConj(sStem, sTense, sWho));
                }
            }
        }
        else {
            for (let [sTense, ] of lTenses) {
                for (let [sWho2, ] of [ ...sMorph.matchAll(/:(?:[123][sp]|P|Y)/g) ]) {
                    if (conj.hasConj(sStem, sTense, sWho2)) {
                        aSugg.add(conj.getConj(sStem, sTense, sWho2));
                    }
                }
            }
        }
    }
    if (aSugg.size > 0) {
        return Array.from(aSugg).join("|");

Modified gc_lang/fr/modules/conj.py from [bb0326be30] to [c6776ced05].

63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90


def getNamesFrom (sVerb):
    "returns a list of names derivating from <sVerb>"
    if sVerb in _dVerbNames:
        # there are names derivated from the verb
        return list(_dVerbNames[sVerb])
    else:
        # we suggest past participles
        tTags = _getTags(sVerb)
        if tTags:
            aSugg = [ _getConjWithTags(sVerb, tTags, ":Q", ":m:s") ]
            if _hasConjWithTags(tTags, ":Q", ":f:s"):
                aSugg.append(_getConjWithTags(sVerb, tTags, ":Q", ":f:s"))
            if _hasConjWithTags(tTags, ":Q", ":m:p"):
                aSugg.append(_getConjWithTags(sVerb, tTags, ":Q", ":m:p"))
            if _hasConjWithTags(tTags, ":Q", ":f:p"):
                aSugg.append(_getConjWithTags(sVerb, tTags, ":Q", ":f:p"))
            # if there is only one past participle (epi inv), unreliable.
            return aSugg  if len(aSugg) > 1  else []
        return []


def getConjSimilInfiV1 (sInfi):
    "returns verbal forms phonetically similar to infinitive form (for verb in group 1)"
    if sInfi not in _dVerb:
        return []
    aSugg = []







<
|
|
|
|
|
|
|
|
|
|
|
|
|







63
64
65
66
67
68
69

70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89


def getNamesFrom (sVerb):
    "returns a list of names derivating from <sVerb>"
    if sVerb in _dVerbNames:
        # there are names derivated from the verb
        return list(_dVerbNames[sVerb])

    # nothing found: we suggest past participles
    tTags = _getTags(sVerb)
    if tTags:
        aSugg = [ _getConjWithTags(sVerb, tTags, ":Q", ":m:s") ]
        if _hasConjWithTags(tTags, ":Q", ":f:s"):
            aSugg.append(_getConjWithTags(sVerb, tTags, ":Q", ":f:s"))
        if _hasConjWithTags(tTags, ":Q", ":m:p"):
            aSugg.append(_getConjWithTags(sVerb, tTags, ":Q", ":m:p"))
        if _hasConjWithTags(tTags, ":Q", ":f:p"):
            aSugg.append(_getConjWithTags(sVerb, tTags, ":Q", ":f:p"))
        # if there is only one past participle (epi inv), unreliable.
        return aSugg  if len(aSugg) > 1  else []
    return []


def getConjSimilInfiV1 (sInfi):
    "returns verbal forms phonetically similar to infinitive form (for verb in group 1)"
    if sInfi not in _dVerb:
        return []
    aSugg = []

Modified gc_lang/fr/modules/gce_suggestions.py from [fdf32f5da9] to [c065ff780e].

37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60

61
62
63
64
65
66
67
                            aTense[":Ip"] = ""
                            aTense[":Iq"] = ""
                            aTense[":Is"] = ""
                        elif m.group(1) == ":P":
                            aTense[":Ip"] = ""
                        else:
                            aTense[m.group(1)] = ""
            for sTense in aTense.keys():
                if sWho == ":1ś" and not conj._hasConjWithTags(tTags, sTense, ":1ś"):
                    sWho = ":1s"
                if conj._hasConjWithTags(tTags, sTense, sWho):
                    dSugg[conj._getConjWithTags(sStem, tTags, sTense, sWho)] = ""
    if funcSugg2:
        sSugg2 = funcSugg2(*args)  if args  else funcSugg2(sFlex)
        if sSugg2:
            dSugg[sSugg2] = ""
    if dSugg:
        if bVC:
            return "|".join([ joinVerbAndSuffix(sSugg, sSfx)  for sSugg in dSugg.keys() ])
        return "|".join(dSugg.keys())
    return ""


def joinVerbAndSuffix (sFlex, sSfx):

    if sSfx.startswith(("-t-", "-T-")) and sFlex.endswith(("t", "d", "T", "D")):
        return sFlex + sSfx[2:]
    if sFlex.endswith(("e", "a", "c", "E", "A", "C")):
        if re.match("(?i)-(?:en|y)$", sSfx):
            return sFlex + "s" + sSfx
        if re.match("(?i)-(?:ie?l|elle|on)$", sSfx):
            return sFlex + "-t" + sSfx







|










|
|




>







37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
                            aTense[":Ip"] = ""
                            aTense[":Iq"] = ""
                            aTense[":Is"] = ""
                        elif m.group(1) == ":P":
                            aTense[":Ip"] = ""
                        else:
                            aTense[m.group(1)] = ""
            for sTense in aTense:
                if sWho == ":1ś" and not conj._hasConjWithTags(tTags, sTense, ":1ś"):
                    sWho = ":1s"
                if conj._hasConjWithTags(tTags, sTense, sWho):
                    dSugg[conj._getConjWithTags(sStem, tTags, sTense, sWho)] = ""
    if funcSugg2:
        sSugg2 = funcSugg2(*args)  if args  else funcSugg2(sFlex)
        if sSugg2:
            dSugg[sSugg2] = ""
    if dSugg:
        if bVC:
            return "|".join([ joinVerbAndSuffix(sSugg, sSfx)  for sSugg in dSugg ])
        return "|".join(dSugg)
    return ""


def joinVerbAndSuffix (sFlex, sSfx):
    "join <sFlex> verb with <sSfx> suffix, modifying <sFlex> to prevent irregular forms"
    if sSfx.startswith(("-t-", "-T-")) and sFlex.endswith(("t", "d", "T", "D")):
        return sFlex + sSfx[2:]
    if sFlex.endswith(("e", "a", "c", "E", "A", "C")):
        if re.match("(?i)-(?:en|y)$", sSfx):
            return sFlex + "s" + sSfx
        if re.match("(?i)-(?:ie?l|elle|on)$", sSfx):
            return sFlex + "-t" + sSfx
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
                    dSugg[conj._getConjWithTags(sStem, tTags, ":Q", ":m:s")] = ""
                dSugg[conj._getConjWithTags(sStem, tTags, ":Q", ":f:p")] = ""
            else:
                dSugg[conj._getConjWithTags(sStem, tTags, ":Q", ":m:s")] = ""
    if "" in dSugg:
        del dSugg[""]
    if dSugg:
        return "|".join(dSugg.keys())
    return ""


def suggVerbTense (sFlex, sTense, sWho):
    "change <sFlex> to a verb according to <sTense> and <sWho>"
    dSugg = {}
    for sStem in _oSpellChecker.getLemma(sFlex):
        if conj.hasConj(sStem, sTense, sWho):
            dSugg[conj.getConj(sStem, sTense, sWho)] = ""
    if dSugg:
        return "|".join(dSugg.keys())
    return ""


def suggVerbFrom (sStem, sFlex, sWho=""):
    "conjugate <sStem> according to <sFlex> (and eventually <sWho>)"
    dSugg = {}
    for sMorph in _oSpellChecker.getMorph(sFlex):
        lTenses = [ m.group(0)  for m in re.finditer(":(?:Y|I[pqsf]|S[pq]|K|P)", sMorph) ]
        if sWho:
            for sTense in lTenses:
                if conj.hasConj(sStem, sTense, sWho):
                    dSugg[conj.getConj(sStem, sTense, sWho)] = ""
        else:
            for sTense in lTenses:
                for sWho in [ m.group(0)  for m in re.finditer(":(?:[123][sp]|P|Y)", sMorph) ]:
                    if conj.hasConj(sStem, sTense, sWho):
                        dSugg[conj.getConj(sStem, sTense, sWho)] = ""
    if dSugg:
        return "|".join(dSugg.keys())
    return ""


def suggVerbImpe (sFlex, bVC=False):
    "change <sFlex> to a verb at imperative form"
    if bVC:
        sFlex, sSfx = splitVerb(sFlex)







|










|














|
|
|

|







107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
                    dSugg[conj._getConjWithTags(sStem, tTags, ":Q", ":m:s")] = ""
                dSugg[conj._getConjWithTags(sStem, tTags, ":Q", ":f:p")] = ""
            else:
                dSugg[conj._getConjWithTags(sStem, tTags, ":Q", ":m:s")] = ""
    if "" in dSugg:
        del dSugg[""]
    if dSugg:
        return "|".join(dSugg)
    return ""


def suggVerbTense (sFlex, sTense, sWho):
    "change <sFlex> to a verb according to <sTense> and <sWho>"
    dSugg = {}
    for sStem in _oSpellChecker.getLemma(sFlex):
        if conj.hasConj(sStem, sTense, sWho):
            dSugg[conj.getConj(sStem, sTense, sWho)] = ""
    if dSugg:
        return "|".join(dSugg)
    return ""


def suggVerbFrom (sStem, sFlex, sWho=""):
    "conjugate <sStem> according to <sFlex> (and eventually <sWho>)"
    dSugg = {}
    for sMorph in _oSpellChecker.getMorph(sFlex):
        lTenses = [ m.group(0)  for m in re.finditer(":(?:Y|I[pqsf]|S[pq]|K|P)", sMorph) ]
        if sWho:
            for sTense in lTenses:
                if conj.hasConj(sStem, sTense, sWho):
                    dSugg[conj.getConj(sStem, sTense, sWho)] = ""
        else:
            for sTense in lTenses:
                for sWho2 in [ m.group(0)  for m in re.finditer(":(?:[123][sp]|P|Y)", sMorph) ]:
                    if conj.hasConj(sStem, sTense, sWho2):
                        dSugg[conj.getConj(sStem, sTense, sWho2)] = ""
    if dSugg:
        return "|".join(dSugg)
    return ""


def suggVerbImpe (sFlex, bVC=False):
    "change <sFlex> to a verb at imperative form"
    if bVC:
        sFlex, sSfx = splitVerb(sFlex)
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
        if tTags:
            for sTense in lMode:
                if conj._hasConjWithTags(tTags, sTense, sWho):
                    dSugg[conj._getConjWithTags(sStem, tTags, sTense, sWho)] = ""
    if sFlex in _dModeSugg:
        dSugg[_dModeSugg[sFlex]] = ""
    if dSugg:
        return "|".join(dSugg.keys())
    return ""


## Nouns and adjectives

def suggPlur (sFlex, bSelfSugg=False):
    "returns plural forms assuming sFlex is singular"







|







193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
        if tTags:
            for sTense in lMode:
                if conj._hasConjWithTags(tTags, sTense, sWho):
                    dSugg[conj._getConjWithTags(sStem, tTags, sTense, sWho)] = ""
    if sFlex in _dModeSugg:
        dSugg[_dModeSugg[sFlex]] = ""
    if dSugg:
        return "|".join(dSugg)
    return ""


## Nouns and adjectives

def suggPlur (sFlex, bSelfSugg=False):
    "returns plural forms assuming sFlex is singular"
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
                # We also check if the verb has a feminine form.
                # If not, we consider it’s better to not suggest the masculine one, as it can be considered invariable.
                dSugg[conj.getConj(sVerb, ":Q", ":m:s")] = ""
    if bSuggSimil:
        for e in phonet.selectSimil(sFlex, ":m:[si]"):
            dSugg[e] = ""
    if dSugg:
        return "|".join(dSugg.keys())
    return ""


def suggMasPlur (sFlex, bSuggSimil=False):
    "returns masculine plural forms"
    dSugg = {}
    for sMorph in _oSpellChecker.getMorph(sFlex):







|







276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
                # We also check if the verb has a feminine form.
                # If not, we consider it’s better to not suggest the masculine one, as it can be considered invariable.
                dSugg[conj.getConj(sVerb, ":Q", ":m:s")] = ""
    if bSuggSimil:
        for e in phonet.selectSimil(sFlex, ":m:[si]"):
            dSugg[e] = ""
    if dSugg:
        return "|".join(dSugg)
    return ""


def suggMasPlur (sFlex, bSuggSimil=False):
    "returns masculine plural forms"
    dSugg = {}
    for sMorph in _oSpellChecker.getMorph(sFlex):
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
                # it is necessary to filter these flexions, like “succédé” or “agi” that are not masculine plural.
                if sSugg.endswith("s"):
                    dSugg[sSugg] = ""
    if bSuggSimil:
        for e in phonet.selectSimil(sFlex, ":m:[pi]"):
            dSugg[e] = ""
    if dSugg:
        return "|".join(dSugg.keys())
    return ""


def suggFemSing (sFlex, bSuggSimil=False):
    "returns feminine singular forms"
    dSugg = {}
    for sMorph in _oSpellChecker.getMorph(sFlex):







|







306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
                # it is necessary to filter these flexions, like “succédé” or “agi” that are not masculine plural.
                if sSugg.endswith("s"):
                    dSugg[sSugg] = ""
    if bSuggSimil:
        for e in phonet.selectSimil(sFlex, ":m:[pi]"):
            dSugg[e] = ""
    if dSugg:
        return "|".join(dSugg)
    return ""


def suggFemSing (sFlex, bSuggSimil=False):
    "returns feminine singular forms"
    dSugg = {}
    for sMorph in _oSpellChecker.getMorph(sFlex):
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
            sVerb = cr.getLemmaOfMorph(sMorph)
            if conj.hasConj(sVerb, ":Q", ":f:s"):
                dSugg[conj.getConj(sVerb, ":Q", ":f:s")] = ""
    if bSuggSimil:
        for e in phonet.selectSimil(sFlex, ":f:[si]"):
            dSugg[e] = ""
    if dSugg:
        return "|".join(dSugg.keys())
    return ""


def suggFemPlur (sFlex, bSuggSimil=False):
    "returns feminine plural forms"
    dSugg = {}
    for sMorph in _oSpellChecker.getMorph(sFlex):







|







331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
            sVerb = cr.getLemmaOfMorph(sMorph)
            if conj.hasConj(sVerb, ":Q", ":f:s"):
                dSugg[conj.getConj(sVerb, ":Q", ":f:s")] = ""
    if bSuggSimil:
        for e in phonet.selectSimil(sFlex, ":f:[si]"):
            dSugg[e] = ""
    if dSugg:
        return "|".join(dSugg)
    return ""


def suggFemPlur (sFlex, bSuggSimil=False):
    "returns feminine plural forms"
    dSugg = {}
    for sMorph in _oSpellChecker.getMorph(sFlex):
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
    lMorphSrc = _oSpellChecker.getMorph(sFlexSrc)
    if not lMorphSrc:
        return ""
    sGender, sNumber = cr.getGenderNumber(lMorphSrc)
    if sGender == ":m":
        if sNumber == ":s":
            return suggMasSing(sFlexDest)
        elif sNumber == ":p":
            return suggMasPlur(sFlexDest)
        return suggMasSing(sFlexDest)
    elif sGender == ":f":
        if sNumber == ":s":
            return suggFemSing(sFlexDest)
        elif sNumber == ":p":
            return suggFemPlur(sFlexDest)
        return suggFemSing(sFlexDest)
    elif sGender == ":e":
        if sNumber == ":s":
            return suggSing(sFlexDest)
        elif sNumber == ":p":
            return suggPlur(sFlexDest)
        return sFlexDest
    return ""


def g_suggAgree (dTokenDst, dTokenSrc):
    "returns suggestions for <dTokenDst> that matches agreement with <dTokenSrc>"
    lMorphSrc = dTokenSrc["lMorph"]  if "lMorph" in dTokenSrc  else  _oSpellChecker.getMorph(dTokenSrc["sValue"])
    if not lMorphSrc:
        return ""
    sGender, sNumber = cr.getGenderNumber(lMorphSrc)
    if sGender == ":m":
        if sNumber == ":s":
            return suggMasSing(dTokenDst["sValue"])
        elif sNumber == ":p":
            return suggMasPlur(dTokenDst["sValue"])
        return suggMasSing(dTokenDst["sValue"])
    elif sGender == ":f":
        if sNumber == ":s":
            return suggFemSing(dTokenDst["sValue"])
        elif sNumber == ":p":
            return suggFemPlur(dTokenDst["sValue"])
        return suggFemSing(dTokenDst["sValue"])
    elif sGender == ":e":
        if sNumber == ":s":
            return suggSing(dTokenDst["sValue"])
        elif sNumber == ":p":
            return suggPlur(dTokenDst["sValue"])
        return dTokenDst["sValue"]
    return ""


def hasFemForm (sFlex):
    "return True if there is a feminine form of <sFlex>"







|


|


|


|


|














|


|


|


|


|







369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
    lMorphSrc = _oSpellChecker.getMorph(sFlexSrc)
    if not lMorphSrc:
        return ""
    sGender, sNumber = cr.getGenderNumber(lMorphSrc)
    if sGender == ":m":
        if sNumber == ":s":
            return suggMasSing(sFlexDest)
        if sNumber == ":p":
            return suggMasPlur(sFlexDest)
        return suggMasSing(sFlexDest)
    if sGender == ":f":
        if sNumber == ":s":
            return suggFemSing(sFlexDest)
        if sNumber == ":p":
            return suggFemPlur(sFlexDest)
        return suggFemSing(sFlexDest)
    if sGender == ":e":
        if sNumber == ":s":
            return suggSing(sFlexDest)
        if sNumber == ":p":
            return suggPlur(sFlexDest)
        return sFlexDest
    return ""


def g_suggAgree (dTokenDst, dTokenSrc):
    "returns suggestions for <dTokenDst> that matches agreement with <dTokenSrc>"
    lMorphSrc = dTokenSrc["lMorph"]  if "lMorph" in dTokenSrc  else  _oSpellChecker.getMorph(dTokenSrc["sValue"])
    if not lMorphSrc:
        return ""
    sGender, sNumber = cr.getGenderNumber(lMorphSrc)
    if sGender == ":m":
        if sNumber == ":s":
            return suggMasSing(dTokenDst["sValue"])
        if sNumber == ":p":
            return suggMasPlur(dTokenDst["sValue"])
        return suggMasSing(dTokenDst["sValue"])
    if sGender == ":f":
        if sNumber == ":s":
            return suggFemSing(dTokenDst["sValue"])
        if sNumber == ":p":
            return suggFemPlur(dTokenDst["sValue"])
        return suggFemSing(dTokenDst["sValue"])
    if sGender == ":e":
        if sNumber == ":s":
            return suggSing(dTokenDst["sValue"])
        if sNumber == ":p":
            return suggPlur(dTokenDst["sValue"])
        return dTokenDst["sValue"]
    return ""


def hasFemForm (sFlex):
    "return True if there is a feminine form of <sFlex>"
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
    else:
        for sMorph in _oSpellChecker.getMorph(sFlex):
            if ":f" in sMorph:
                dSugg[suggMasSing(sFlex)] = ""
            elif ":m" in sMorph:
                dSugg[suggFemSing(sFlex)] = ""
    if dSugg:
        return "|".join(dSugg.keys())
    return ""


def switchPlural (sFlex):
    "return plural or singular form(s) of <sFlex>"
    aSugg = {}
    for sMorph in _oSpellChecker.getMorph(sFlex):
        if ":s" in sMorph:
            aSugg[suggPlur(sFlex)] = ""
        elif ":p" in sMorph:
            aSugg[suggSing(sFlex)] = ""
    if aSugg:
        return "|".join(aSugg.keys())
    return ""


def hasSimil (sWord, sPattern=None):
    "return True if there is words phonetically similar to <sWord> (according to <sPattern> if required)"
    return phonet.hasSimil(sWord, sPattern)








|












|







469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
    else:
        for sMorph in _oSpellChecker.getMorph(sFlex):
            if ":f" in sMorph:
                dSugg[suggMasSing(sFlex)] = ""
            elif ":m" in sMorph:
                dSugg[suggFemSing(sFlex)] = ""
    if dSugg:
        return "|".join(dSugg)
    return ""


def switchPlural (sFlex):
    "return plural or singular form(s) of <sFlex>"
    aSugg = {}
    for sMorph in _oSpellChecker.getMorph(sFlex):
        if ":s" in sMorph:
            aSugg[suggPlur(sFlex)] = ""
        elif ":p" in sMorph:
            aSugg[suggSing(sFlex)] = ""
    if aSugg:
        return "|".join(aSugg)
    return ""


def hasSimil (sWord, sPattern=None):
    "return True if there is words phonetically similar to <sWord> (according to <sPattern> if required)"
    return phonet.hasSimil(sWord, sPattern)

508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
                        if any(re.search(sPattern, sMorph2)  for sMorph2 in _oSpellChecker.getMorph(sName)):
                            dSugg[sName] = ""
                else:
                    dSugg.update(dict.fromkeys(conj.getNamesFrom(sInfi), ""))
                break
    if dSugg:
        if bVC:
            return "|".join([ joinVerbAndSuffix(sSugg, sSfx)  for sSugg in dSugg.keys() ])
        return "|".join(dSugg.keys())
    return ""


def suggCeOrCet (sWord):
    "suggest “ce” or “cet” or both according to the first letter of <sWord>"
    if re.match("(?i)[aeéèêiouyâîï]", sWord):
        return "cet"







|
|







509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
                        if any(re.search(sPattern, sMorph2)  for sMorph2 in _oSpellChecker.getMorph(sName)):
                            dSugg[sName] = ""
                else:
                    dSugg.update(dict.fromkeys(conj.getNamesFrom(sInfi), ""))
                break
    if dSugg:
        if bVC:
            return "|".join([ joinVerbAndSuffix(sSugg, sSfx)  for sSugg in dSugg ])
        return "|".join(dSugg)
    return ""


def suggCeOrCet (sWord):
    "suggest “ce” or “cet” or both according to the first letter of <sWord>"
    if re.match("(?i)[aeéèêiouyâîï]", sWord):
        return "cet"

Modified gc_lang/fr/modules/tests_modules.py from [f7e92c1ab0] to [5c8bb6ae99].

53
54
55
56
57
58
59
60
61
62
63

64
65
66
67
68
69





70
71
72
73
74
75
76
    def test_isvalid_failed (self):
        for sWord in ["BranchE", "BRanche", "BRAnCHE", "émilie", "éMILIE", "émiLie", "aujourd'hui", "Aujourd'hui", ]:
            self.assertFalse(self.oSpellChecker.isValid(sWord), sWord)

    def test_suggest (self):
        for sWord in [
            "déelirranttesss", "vallidasion", "Emilie", "exibission", "ditirembique", "jai", "email",
            "fatiqué", "coeur", "trèèèèèèèèès", "vraaaaiiiimeeeeennnt", "apele", "email", "Co2",
            "emmppâiiiller", "testt", "apelaion", "exsepttion", "sintaxik", "ebriete", "ennormmement"
        ]:
            aSugg = self.oSpellChecker.suggest(sWord)

            #with timeblock(sWord):
            #    aSugg = self.oSpellChecker.suggest(sWord)
            #    print(sWord, "->", " ".join(aSugg))

    def test_lemmas (self):
        for sWord, sInfi in [





            ("jetez",       "jeter"),
            ("finit",       "finir"),
            ("mangé",       "manger"),
            ("oubliait",    "oublier"),
            ("arrivais",    "arriver"),
            ("venait",      "venir"),
            ("prendre",     "prendre")







|


|
>






>
>
>
>
>







53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
    def test_isvalid_failed (self):
        for sWord in ["BranchE", "BRanche", "BRAnCHE", "émilie", "éMILIE", "émiLie", "aujourd'hui", "Aujourd'hui", ]:
            self.assertFalse(self.oSpellChecker.isValid(sWord), sWord)

    def test_suggest (self):
        for sWord in [
            "déelirranttesss", "vallidasion", "Emilie", "exibission", "ditirembique", "jai", "email",
            "fatiqué", "coeur", "trèèèèèèèèès", "vraaaaiiiimeeeeennnt", "apele", "Co2",
            "emmppâiiiller", "testt", "apelaion", "exsepttion", "sintaxik", "ebriete", "ennormmement"
        ]:
            for lSugg in self.oSpellChecker.suggest(sWord):
                self.assertTrue(len(lSugg) > 0)
            #with timeblock(sWord):
            #    aSugg = self.oSpellChecker.suggest(sWord)
            #    print(sWord, "->", " ".join(aSugg))

    def test_lemmas (self):
        for sWord, sInfi in [
            ("suis",        "suivre"),
            ("suis",        "être"),
            ("a",           "avoir"),
            ("a",           "a"),
            ("irai",        "aller"),
            ("jetez",       "jeter"),
            ("finit",       "finir"),
            ("mangé",       "manger"),
            ("oubliait",    "oublier"),
            ("arrivais",    "arriver"),
            ("venait",      "venir"),
            ("prendre",     "prendre")

Modified graphspell-js/dawg.js from [1c01c104ea] to [743b532572].

138
139
140
141
142
143
144




145
146
147
148
149
150
151
        if (cStemming == "A") {
            this.funcStemming = str_transform.changeWordWithAffixCode;
        } else if (cStemming == "S") {
            this.funcStemming = str_transform.changeWordWithSuffixCode;
        } else {
            this.funcStemming = str_transform.noStemming;
        }





        // build
        lWord.sort();
        if (xProgressBarNode) {
            xProgressBarNode.value = 0;
            xProgressBarNode.max = lWord.length;
        }







>
>
>
>







138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
        if (cStemming == "A") {
            this.funcStemming = str_transform.changeWordWithAffixCode;
        } else if (cStemming == "S") {
            this.funcStemming = str_transform.changeWordWithSuffixCode;
        } else {
            this.funcStemming = str_transform.noStemming;
        }

        // binary dictionary
        this.sByDic = "";
        this.lByDic = [];

        // build
        lWord.sort();
        if (xProgressBarNode) {
            xProgressBarNode.value = 0;
            xProgressBarNode.max = lWord.length;
        }

Modified graphspell-js/ibdawg.js from [34ce7a8182] to [44a920520f].

106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
        }
        else if (this.sWord.slice(0,1).gl_isUpperCase()) {
            lRes = lRes.map((sSugg) => { return sSugg.slice(0,1).toUpperCase() + sSugg.slice(1); });
            lRes = [...new Set(lRes)];
        }
        return lRes.slice(0, this.nSuggLimit);
    }

    reset () {
        this.dSugg.clear();
        this.dGoodSugg.clear();
        this.dBestSugg.clear();
    }
}


class IBDAWG {
    // INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH

    constructor (source, sPath="") {







<
<
<
<
<
<







106
107
108
109
110
111
112






113
114
115
116
117
118
119
        }
        else if (this.sWord.slice(0,1).gl_isUpperCase()) {
            lRes = lRes.map((sSugg) => { return sSugg.slice(0,1).toUpperCase() + sSugg.slice(1); });
            lRes = [...new Set(lRes)];
        }
        return lRes.slice(0, this.nSuggLimit);
    }






}


class IBDAWG {
    // INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH

    constructor (source, sPath="") {

Modified graphspell/dawg.py from [729715ac89] to [e9390e8710].

161
162
163
164
165
166
167




168
169
170
171
172
173
174

        # calculated later
        self.nBytesNodeAddress = 1
        self.nBytesArc = 0
        self.nBytesOffset = 0
        self.nMaxOffset = 0





        # build
        lWord.sort()
        oProgBar = ProgressBar(0, len(lWord))
        for aEntry in lWord:
            self.insert(aEntry)
            oProgBar.increment(1)
        oProgBar.done()







>
>
>
>







161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178

        # calculated later
        self.nBytesNodeAddress = 1
        self.nBytesArc = 0
        self.nBytesOffset = 0
        self.nMaxOffset = 0

        # binary dictionary
        self.byDic = b""
        self.lByDic = []

        # build
        lWord.sort()
        oProgBar = ProgressBar(0, len(lWord))
        for aEntry in lWord:
            self.insert(aEntry)
            oProgBar.increment(1)
        oProgBar.done()
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
    # BINARY CONVERSION
    def _calculateBinary (self):
        print(" > Write DAWG as an indexable binary dictionary")
        self.nBytesArc = ( (self.nArcVal.bit_length() + 2) // 8 ) + 1   # We add 2 bits. See DawgNode.convToBytes()
        self.nBytesOffset = 0
        self._calcNumBytesNodeAddress()
        self._calcNodesAddress()
        self.byDic = b""
        self.byDic = self.oRoot.convToBytes(self.nBytesArc, self.nBytesNodeAddress)
        for oNode in self.lMinimizedNodes:
            self.byDic += oNode.convToBytes(self.nBytesArc, self.nBytesNodeAddress)
        print("   Arc values (chars, affixes and tags): {}  ->  {} bytes".format( self.nArcVal, len("\t".join(self.lArcVal).encode("utf-8")) ))
        print("   Arc size: {} bytes, Address size: {} bytes   ->   {} * {} = {} bytes".format( self.nBytesArc, self.nBytesNodeAddress, \
                                                                                                self.nBytesArc+self.nBytesNodeAddress, self.nArc, \
                                                                                                (self.nBytesArc+self.nBytesNodeAddress)*self.nArc ))







<







364
365
366
367
368
369
370

371
372
373
374
375
376
377
    # BINARY CONVERSION
    def _calculateBinary (self):
        print(" > Write DAWG as an indexable binary dictionary")
        self.nBytesArc = ( (self.nArcVal.bit_length() + 2) // 8 ) + 1   # We add 2 bits. See DawgNode.convToBytes()
        self.nBytesOffset = 0
        self._calcNumBytesNodeAddress()
        self._calcNodesAddress()

        self.byDic = self.oRoot.convToBytes(self.nBytesArc, self.nBytesNodeAddress)
        for oNode in self.lMinimizedNodes:
            self.byDic += oNode.convToBytes(self.nBytesArc, self.nBytesNodeAddress)
        print("   Arc values (chars, affixes and tags): {}  ->  {} bytes".format( self.nArcVal, len("\t".join(self.lArcVal).encode("utf-8")) ))
        print("   Arc size: {} bytes, Address size: {} bytes   ->   {} * {} = {} bytes".format( self.nBytesArc, self.nBytesNodeAddress, \
                                                                                                self.nBytesArc+self.nBytesNodeAddress, self.nArc, \
                                                                                                (self.nBytesArc+self.nBytesNodeAddress)*self.nArc ))

Modified graphspell/ibdawg.py from [bd93f6353b] to [13d2327263].

99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
        if self.sWord.isupper():
            lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg.upper(), lRes))) # use dict, when Python 3.6+
        elif self.sWord[0:1].isupper():
            # dont’ use <.istitle>
            lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes))) # use dict, when Python 3.6+
        return lRes[:self.nSuggLimit]

    def reset (self):
        "clear data"
        self.aSugg.clear()
        self.dSugg.clear()


class IBDAWG:
    """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""

    def __init__ (self, source):
        if isinstance(source, str):
            by = pkgutil.get_data(__package__, "_dictionaries/" + source)







<
<
<
<
<







99
100
101
102
103
104
105





106
107
108
109
110
111
112
        if self.sWord.isupper():
            lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg.upper(), lRes))) # use dict, when Python 3.6+
        elif self.sWord[0:1].isupper():
            # dont’ use <.istitle>
            lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes))) # use dict, when Python 3.6+
        return lRes[:self.nSuggLimit]







class IBDAWG:
    """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""

    def __init__ (self, source):
        if isinstance(source, str):
            by = pkgutil.get_data(__package__, "_dictionaries/" + source)

Modified graphspell/lexgraph_fr.py from [bcc7c5a33b] to [fab156e863].

438
439
440
441
442
443
444

445
446
447
448
449
450
451
452
453
454
455


_zPartDemForm = re.compile("([\\w]+)-(là|ci)$")
_zInterroVerb = re.compile("([\\w]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$")
_zImperatifVerb = re.compile("([\\w]+)(-(?:l(?:es?|a)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts]['’ʼ‘‛´`′‵՚ꞌꞋ](?:y|en)|les?|la|[mt]oi|leur|lui))$")

def setLabelsOnToken (dToken):

    # Token: .sType, .sValue, .nStart, .nEnd, .lMorph
    try:
        if dToken["sType"] == "PUNC" or dToken["sType"] == "SIGN":
            dToken["aLabels"] = [_dValues.get(dToken["sValue"], "signe de ponctuation divers")]
        elif dToken["sType"] == 'SYMBOL':
            dToken["aLabels"] = ["symbole"]
        elif dToken["sType"] == 'EMOJI':
            dToken["aLabels"] = ["émoji"]
        elif dToken["sType"] == 'NUM':
            dToken["aLabels"] = ["nombre"]
        elif dToken["sType"] == 'LINK':







>



|







438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456


_zPartDemForm = re.compile("([\\w]+)-(là|ci)$")
_zInterroVerb = re.compile("([\\w]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$")
_zImperatifVerb = re.compile("([\\w]+)(-(?:l(?:es?|a)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts]['’ʼ‘‛´`′‵՚ꞌꞋ](?:y|en)|les?|la|[mt]oi|leur|lui))$")

def setLabelsOnToken (dToken):
    "create an attribute “alabels” on <dToken> as a list of readable meanings"
    # Token: .sType, .sValue, .nStart, .nEnd, .lMorph
    try:
        if dToken["sType"] == "PUNC" or dToken["sType"] == "SIGN":
            dToken["aLabels"] = [ _dValues.get(dToken["sValue"], "signe de ponctuation divers") ]
        elif dToken["sType"] == 'SYMBOL':
            dToken["aLabels"] = ["symbole"]
        elif dToken["sType"] == 'EMOJI':
            dToken["aLabels"] = ["émoji"]
        elif dToken["sType"] == 'NUM':
            dToken["aLabels"] = ["nombre"]
        elif dToken["sType"] == 'LINK':
505
506
507
508
509
510
511
512
513

514
            dToken["aLabels"] = ["token de nature inconnue"]
    except:
        return


# Other functions

def filterSugg (aSugg):
    "exclude suggestions"

    return filter(lambda sSugg: not sSugg.endswith(("è", "È")), aSugg)







|

>
|
506
507
508
509
510
511
512
513
514
515
516
            dToken["aLabels"] = ["token de nature inconnue"]
    except:
        return


# Other functions

def filterSugg (aSuggs):
    "exclude suggestions"
    return [ sSugg  for sSugg in aSuggs  if not sSugg.endswith(("è", "È")) ]
    #return filter(lambda sSugg: not sSugg.endswith(("è", "È")), aSuggs) # return an object filter

Modified graphspell/str_transform.py from [d580e06cf9] to [4b10b3e705].

111
112
113
114
115
116
117

118
119
120
121
122
123
124
            )
            if i and j and s1[i] == s2[j-1] and s1[i-1] == s2[j]:
                d[i, j] = min(d[i, j], d[i-2, j-2] + nCost)     # Transposition
    return d[nLen1-1, nLen2-1]


def distanceJaroWinkler (sWord1, sWord2, fBoost = .666):

    # https://github.com/thsig/jaro-winkler-JS
    #if (sWord1 == sWord2): return 1.0
    nLen1 = len(sWord1)
    nLen2 = len(sWord2)
    nMax = max(nLen1, nLen2)
    aFlags1 = [ None for _ in range(nMax) ]
    aFlags2 = [ None for _ in range(nMax) ]







>







111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
            )
            if i and j and s1[i] == s2[j-1] and s1[i-1] == s2[j]:
                d[i, j] = min(d[i, j], d[i-2, j-2] + nCost)     # Transposition
    return d[nLen1-1, nLen2-1]


def distanceJaroWinkler (sWord1, sWord2, fBoost = .666):
    "distance of Jaro-Winkler between <sWord1> and <sWord2>, returns a float"
    # https://github.com/thsig/jaro-winkler-JS
    #if (sWord1 == sWord2): return 1.0
    nLen1 = len(sWord1)
    nLen2 = len(sWord2)
    nMax = max(nLen1, nLen2)
    aFlags1 = [ None for _ in range(nMax) ]
    aFlags2 = [ None for _ in range(nMax) ]
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
                    bTrans = abs(i2-i1) >= abs(t[1] - t[0])
                    if bTrans:
                        nTrans += 1
                    elif not t[2]:
                        t[2] = True
                        nTrans += 1
                    break
                elif i1 > t[1] and i2 > t[0]:
                    del lOffset[i]
                else:
                    i += 1
            lOffset.append([i1, i2, bTrans])
        else:
            nLargestCS += nLocalCS
            nLocalCS = 0
            if i1 != i2:
                i1 = i2 = min(i1, i2)
            for i in range(nMaxOffset):
                if i1 + i >= nLen1 and i2 + i >= nLen2:
                    break
                elif i1 + i < nLen1 and s1[i1+i] == s2[i2]:
                    i1 += i - 1
                    i2 -= 1
                    break
                elif i2 + i < nLen2 and s1[i1] == s2[i2+i]:
                    i2 += i - 1
                    i1 -= 1
                    break
        i1 += 1
        i2 += 1
        if i1 >= nLen1 or i2 >= nLen2:
            nLargestCS += nLocalCS







|












|



|







218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
                    bTrans = abs(i2-i1) >= abs(t[1] - t[0])
                    if bTrans:
                        nTrans += 1
                    elif not t[2]:
                        t[2] = True
                        nTrans += 1
                    break
                if i1 > t[1] and i2 > t[0]:
                    del lOffset[i]
                else:
                    i += 1
            lOffset.append([i1, i2, bTrans])
        else:
            nLargestCS += nLocalCS
            nLocalCS = 0
            if i1 != i2:
                i1 = i2 = min(i1, i2)
            for i in range(nMaxOffset):
                if i1 + i >= nLen1 and i2 + i >= nLen2:
                    break
                if i1 + i < nLen1 and s1[i1+i] == s2[i2]:
                    i1 += i - 1
                    i2 -= 1
                    break
                if i2 + i < nLen2 and s1[i1] == s2[i2+i]:
                    i2 += i - 1
                    i1 -= 1
                    break
        i1 += 1
        i2 += 1
        if i1 >= nLen1 or i2 >= nLen2:
            nLargestCS += nLocalCS

Modified pylintrc from [032bb6abd3] to [237077429f].

14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
ignore-patterns=bottle.py,gc_rules,.*_data.py

# Python code to execute, usually for sys.path manipulation such as
# pygtk.require().
#init-hook=

# Use multiple processes to speed up Pylint.
jobs=4

# List of plugins (as comma separated values of python modules names) to load,
# usually to register additional checkers.
load-plugins=

# Pickle collected data for later comparisons.
persistent=yes







|







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
ignore-patterns=bottle.py,gc_rules,.*_data.py

# Python code to execute, usually for sys.path manipulation such as
# pygtk.require().
#init-hook=

# Use multiple processes to speed up Pylint.
jobs=1

# List of plugins (as comma separated values of python modules names) to load,
# usually to register additional checkers.
load-plugins=

# Pickle collected data for later comparisons.
persistent=yes
116
117
118
119
120
121
122
123
124
125
126

127
128
129
130
131
132
133
134
135
136
137

138
139
140
141
142
143
144
        div-method,
        idiv-method,
        rdiv-method,
        exception-message-attribute,
        invalid-str-codec,
        sys-max-int,
        bad-python3-import,
        deprecated-string-function,
        deprecated-str-translate-call,
        deprecated-itertools-function,
        deprecated-types-field,

        next-method-defined,
        dict-items-not-iterating,
        dict-keys-not-iterating,
        dict-values-not-iterating,
        deprecated-operator-function,
        deprecated-urllib-function,
        xreadlines-attribute,
        deprecated-sys-function,
        exception-escape,
        comprehension-escape,
        bad-whitespace,

        line-too-long

# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
# multiple time (only on the command line, not in the configuration file where
# it should appear only once). See also the "--disable" option for examples.
enable=c-extension-no-member







<
<
<

>







<



>







116
117
118
119
120
121
122



123
124
125
126
127
128
129
130
131

132
133
134
135
136
137
138
139
140
141
142
        div-method,
        idiv-method,
        rdiv-method,
        exception-message-attribute,
        invalid-str-codec,
        sys-max-int,
        bad-python3-import,



        deprecated-types-field,
        missing-format-attribute,
        next-method-defined,
        dict-items-not-iterating,
        dict-keys-not-iterating,
        dict-values-not-iterating,
        deprecated-operator-function,
        deprecated-urllib-function,
        xreadlines-attribute,

        exception-escape,
        comprehension-escape,
        bad-whitespace,
        consider-using-ternary,
        line-too-long

# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
# multiple time (only on the command line, not in the configuration file where
# it should appear only once). See also the "--disable" option for examples.
enable=c-extension-no-member