Grammalecte  Check-in [346195e6bb]

Overview
Comment:[core] ibdawg: suggestion mechanism
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | core | new_feature
Files: files | file ages | folders
SHA3-256: 346195e6bb19fed1b627f7dc913865708f8ee83e6afe9e7d426281bec08badd4
User & Date: olr on 2017-06-25 17:41:57
Other Links: manifest | tags
Context
2017-06-25
17:43
[core] ibdawg: add missing file for suggestion mechanism check-in: 188369efcb user: olr tags: trunk, core
17:41
[core] ibdawg: suggestion mechanism check-in: 346195e6bb user: olr tags: trunk, core, new_feature
14:15
[fr] nr: confusion <à/a> dans <suite à> et <à moi/toi/eux/elles> check-in: 32222de6ef user: olr tags: trunk, fr
Changes

Modified cli.py from [39244ea849] to [cfdf5fb796].

16
17
18
19
20
21
22

23
24
25
26
27
28
29
_EXAMPLE = "Quoi ? Racontes ! Racontes-moi ! Bon sangg, parles ! Oui. Il y a des menteur partout. " \
           "Je suit sidéré par la brutales arrogance de cette homme-là. Quelle salopard ! Un escrocs de la pire espece. " \
           "Quant sera t’il châtiés pour ses mensonge ?             Merde ! J’en aie marre."

_HELP = """
    /help                       /h      show this text
    ?word1 [word2] ...                  words analysis

    /lopt                       /lo     list options
    /+ option1 [option2] ...            activate grammar checking options
    /- option1 [option2] ...            deactivate grammar checking options
    /lrules [pattern]           /lr     list rules
    /--rule1 [rule2] ...                deactivate grammar checking rule
    /++rule1 [rule2] ...                reactivate grammar checking rule
    /quit                       /q      exit







>







16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
_EXAMPLE = "Quoi ? Racontes ! Racontes-moi ! Bon sangg, parles ! Oui. Il y a des menteur partout. " \
           "Je suit sidéré par la brutales arrogance de cette homme-là. Quelle salopard ! Un escrocs de la pire espece. " \
           "Quant sera t’il châtiés pour ses mensonge ?             Merde ! J’en aie marre."

_HELP = """
    /help                       /h      show this text
    ?word1 [word2] ...                  words analysis
    !word                               suggestion
    /lopt                       /lo     list options
    /+ option1 [option2] ...            activate grammar checking options
    /- option1 [option2] ...            deactivate grammar checking options
    /lrules [pattern]           /lr     list rules
    /--rule1 [rule2] ...                deactivate grammar checking rule
    /++rule1 [rule2] ...                reactivate grammar checking rule
    /quit                       /q      exit
196
197
198
199
200
201
202
203
204
205






206
207
208
209
210
211
212
        # pseudo-console
        sInputText = "\n~==========~ Enter your text [/h /q] ~==========~\n"
        sText = _getText(sInputText)
        while True:
            if sText.startswith("?"):
                for sWord in sText[1:].strip().split():
                    if sWord:
                        echo("* {}".format(sWord))
                        for sMorph in oDict.getMorph(sWord):
                            echo("  {:<32} {}".format(sMorph, oLexGraphe.formatTags(sMorph)))






            elif sText.startswith("/+ "):
                gce.setOptions({ opt:True  for opt in sText[3:].strip().split()  if opt in gce.getOptions() })
                echo("done")
            elif sText.startswith("/- "):
                gce.setOptions({ opt:False  for opt in sText[3:].strip().split()  if opt in gce.getOptions() })
                echo("done")
            elif sText.startswith("/-- "):







|


>
>
>
>
>
>







197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
        # pseudo-console
        sInputText = "\n~==========~ Enter your text [/h /q] ~==========~\n"
        sText = _getText(sInputText)
        while True:
            if sText.startswith("?"):
                for sWord in sText[1:].strip().split():
                    if sWord:
                        echo("* " + sWord)
                        for sMorph in oDict.getMorph(sWord):
                            echo("  {:<32} {}".format(sMorph, oLexGraphe.formatTags(sMorph)))
            elif sText.startswith("!"):
                for sWord in sText[1:].strip().split():
                    if sWord:
                        echo("* suggestions for: " + sWord)
                        for sSugg in oDict.suggest(sWord):
                            echo("  > " + sSugg)
            elif sText.startswith("/+ "):
                gce.setOptions({ opt:True  for opt in sText[3:].strip().split()  if opt in gce.getOptions() })
                echo("done")
            elif sText.startswith("/- "):
                gce.setOptions({ opt:False  for opt in sText[3:].strip().split()  if opt in gce.getOptions() })
                echo("done")
            elif sText.startswith("/-- "):

Modified gc_core/py/ibdawg.py from [095d971150] to [077d799ad3].

1
2
3
4
5
6
7
8

9
10
11
12
13
14
15
#!python3
# -*- coding: UTF-8 -*-

import os
import traceback
import pkgutil

from . import str_transform as st

from .echo import echo


class IBDAWG:
    """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""

    def __init__ (self, sDicName):








>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#!python3
# -*- coding: UTF-8 -*-

import os
import traceback
import pkgutil

from . import str_transform as st
from . import char_player as cp
from .echo import echo


class IBDAWG:
    """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""

    def __init__ (self, sDicName):
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185

186
187
188
189
190
191
192
193
194
195
196
            if c not in self.dChar:
                return False
            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
            if iAddr == None:
                return False
        return int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask

    def getSugg (self, sWord, iAddr=0, sNewWord=""):
        "not finished"
        # RECURSIVE FUNCTION
        if not sWord:
            if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
                return [sNewWord]
            return []
        lSugg = []
        lArc = self._getSimilarArcs(sWord[0:1], iAddr)
        if lArc:
            for t in lArc:
                lSugg.extend(self._lookupAndSuggest(sWord[1:], t[1], sNewWord+t[0]))
        else:
            pass
        return lSugg

    def _getSimilarArcs (self, cChar, iAddr):
        lArc = []
        for c in st.dSimilarChars.get(cChar, cChar):

            jAddr = self._lookupArcNode(self.dChar[c], iAddr)
            if jAddr:
                lArc.append((c, iAddr))
        return lArc

    def getMorph (self, sWord):
        "retrieves morphologies list, different casing allowed"
        l = self.morph(sWord)
        if sWord[0:1].isupper():
            l.extend(self.morph(sWord.lower()))
            if sWord.isupper() and len(sWord) > 1:







|







|
<
<
|
<
<



|
|
>
|
|
|
<







161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176


177


178
179
180
181
182
183
184
185
186

187
188
189
190
191
192
193
            if c not in self.dChar:
                return False
            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
            if iAddr == None:
                return False
        return int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask

    def suggest (self, sWord, iAddr=0, sNewWord=""):
        "not finished"
        # RECURSIVE FUNCTION
        if not sWord:
            if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
                return [sNewWord]
            return []
        lSugg = []
        for cChar, jAddr in self._getSimilarArcs(sWord[0:1], iAddr):


            lSugg.extend(self.suggest(sWord[1:], jAddr, sNewWord+cChar))


        return lSugg

    def _getSimilarArcs (self, cChar, iAddr):
        "generator: yield similar char of <cChar> and address of the following node"
        for c in cp.dSimilarChar.get(cChar, [cChar]):
            if c in self.dChar:
                jAddr = self._lookupArcNode(self.dChar[c], iAddr)
                if jAddr:
                    yield (c, jAddr)


    def getMorph (self, sWord):
        "retrieves morphologies list, different casing allowed"
        l = self.morph(sWord)
        if sWord[0:1].isupper():
            l.extend(self.morph(sWord.lower()))
            if sWord.isupper() and len(sWord) > 1:

Modified gc_core/py/str_transform.py from [7df400eceb] to [23bc31f0e4].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!python3
# -*- coding: UTF-8 -*-


dSimilarChars = {
    "a": "aàâáä",
    "à": "aàâáä",
    "â": "aàâáä",
    "á": "aàâáä",
    "ä": "aàâáä",
    "c": "cç",
    "ç": "cç",
    "e": "eéêèë",
    "é": "eéêèë",
    "ê": "eéêèë",
    "è": "eéêèë",
    "ë": "eéêèë",
    "i": "iîïíì",
    "î": "iîïíì",
    "ï": "iîïíì",
    "í": "iîïíì",
    "ì": "iîïíì",
    "o": "oôóòö",
    "ô": "oôóòö",
    "ó": "oôóòö",
    "ò": "oôóòö",
    "ö": "oôóòö",
    "u": "uûùüú",
    "û": "uûùüú",
    "ù": "uûùüú",
    "ü": "uûùüú",
    "ú": "uûùüú",
}

## No stemming

def noStemming (sFlex, sStem):
    return sStem

def rebuildWord (sFlex, cmd1, cmd2):

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







1
































2
3
4
5
6
7
8
#!python3

































## No stemming

def noStemming (sFlex, sStem):
    return sStem

def rebuildWord (sFlex, cmd1, cmd2):