Grammalecte  Check-in [cee9fdd1aa]

Overview
Comment:[core] ibdawg: suggestion mechanism update
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | core
Files: files | file ages | folders
SHA3-256: cee9fdd1aae4f0a139ab15185b4bc438031c0d9f8c90d32e25f16369b0efc22f
User & Date: olr on 2017-06-25 23:41:22
Other Links: manifest | tags
Context
2017-06-26
06:50
[core] ibdawg: suggestion mechanism update + keyboard chars proximity check-in: 80ebc25208 user: olr tags: trunk, core
2017-06-25
23:41
[core] ibdawg: suggestion mechanism update check-in: cee9fdd1aa user: olr tags: trunk, core
17:52
[fr][bug] codes de positionnement JS manquants check-in: 4985bf4659 user: olr tags: trunk, fr
Changes

Modified cli.py from [cfdf5fb796] to [8ec24a7015].

203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
                    if sWord:
                        echo("* " + sWord)
                        for sMorph in oDict.getMorph(sWord):
                            echo("  {:<32} {}".format(sMorph, oLexGraphe.formatTags(sMorph)))
            elif sText.startswith("!"):
                for sWord in sText[1:].strip().split():
                    if sWord:
                        echo("* suggestions for: " + sWord)
                        for sSugg in oDict.suggest(sWord):
                            echo("  > " + sSugg)
            elif sText.startswith("/+ "):
                gce.setOptions({ opt:True  for opt in sText[3:].strip().split()  if opt in gce.getOptions() })
                echo("done")
            elif sText.startswith("/- "):
                gce.setOptions({ opt:False  for opt in sText[3:].strip().split()  if opt in gce.getOptions() })
                echo("done")
            elif sText.startswith("/-- "):







<
|
<







203
204
205
206
207
208
209

210

211
212
213
214
215
216
217
                    if sWord:
                        echo("* " + sWord)
                        for sMorph in oDict.getMorph(sWord):
                            echo("  {:<32} {}".format(sMorph, oLexGraphe.formatTags(sMorph)))
            elif sText.startswith("!"):
                for sWord in sText[1:].strip().split():
                    if sWord:

                        echo(" | ".join(oDict.suggest(sWord)))

            elif sText.startswith("/+ "):
                gce.setOptions({ opt:True  for opt in sText[3:].strip().split()  if opt in gce.getOptions() })
                echo("done")
            elif sText.startswith("/- "):
                gce.setOptions({ opt:False  for opt in sText[3:].strip().split()  if opt in gce.getOptions() })
                echo("done")
            elif sText.startswith("/-- "):

Modified gc_core/py/char_player.py from [8b0dcb8c7c] to [faa9abdccc].

1
2
3
4

5








6
7

8

9



10
11

12


13
14






15
16
17
18
19
20
21











22

23
24
25
26
27
28
29
30






31
32
33
34
35
36
37








38
39
40
41
42
43
44
45
46
47
48






49
50
51
52
53

























































































































































54
# list of similar chars
# useful for suggestion mechanism

dSimilarChar = {

	"1": "l",









    "a": "aàâáä",

    "à": "aàâáä",

    "â": "aàâáä",



    "á": "aàâáä",
    "ä": "aàâáä",




    "c": "cçsśŝ",
    "ç": "cçsśŝ",







    "e": "eéêèë",
    "é": "eéêèë",
    "ê": "eéêèë",
    "è": "eéêèë",
    "ë": "eéêèë",












    "g": "j",

    "j": "g",

    "i": "iîïyíìÿ",
    "î": "iîïyíìÿ",
    "ï": "iîïyíìÿ",
    "í": "iîïyíìÿ",
    "ì": "iîïyíìÿ",







    "n": "nñńǹ",

    "o": "oôóòö",
    "ô": "oôóòö",
    "ó": "oôóòö",
    "ò": "oôóòö",
    "ö": "oôóòö",









    "s": "sśŝcç",
    "ś": "sśŝcç",
    "ŝ": "sśŝcç",

    "u": "uûùüú",
    "û": "uûùüú",
    "ù": "uûùüú",
    "ü": "uûùüú",
    "ú": "uûùüú",







    "y": "yÿiîŷýỳ",
    "ÿ": "yÿiîŷýỳ",
    "ŷ": "yÿiîŷýỳ",
    "ý": "yÿiîŷýỳ",
    "ỳ": "yÿiîŷýỳ",

























































































































































}



|
>
|
>
>
>
>
>
>
>
>
|
|
>
|
>
|
>
>
>
|
|
>

>
>
|
|
>
>
>
>
>
>

|
|
|
|
|

>
>
>
>
>
>
>
>
>
>
>
|
>
|
|
|
|
|
|
|

>
>
>
>
>
>
|

|
|
|
|
|
>
>
>
>
>
>
>
>











>
>
>
>
>
>





>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
# list of similar chars
# useful for suggestion mechanism


# Method: Remove Useless Chars

_dUselessChar = {
    'a': '',  'e': '',  'i': '',  'o': '',  'u': '',  'y': '',
    'à': '',  'é': '',  'î': '',  'ô': '',  'û': '',  'ÿ': '',
    'â': '',  'è': '',  'ï': '',  'ö': '',  'ù': '',  'ŷ': '',
    'ä': '',  'ê': '',  'í': '',  'ó': '',  'ü': '',  'ý': '',
    'á': '',  'ë': '',  'ì': '',  'ò': '',  'ú': '',  'ỳ': '',
    'ā': '',  'ē': '',  'ī': '',  'ō': '',  'ū': '',  'ȳ': '',
    'h': '',  'œ': '',  'æ': ''
 }

_CHARMAP = str.maketrans(_dUselessChar)

aUselessChar = frozenset(_dUselessChar.keys())

def clearWord (sWord):
    "remove vovels and h"
    return sWord.translate(_CHARMAP)


# Similar chars

d1to1 = {
    "1": "li",
    "2": "e",
    "3": "e",
    "4": "aà",
    "5": "ge",
    "6": "bd",
    "7": "lt",
    "8": "b",
    "9": "gbd",

    "a": "aàâáäæ",
    "à": "aàâáäæ",
    "â": "aàâáäæ",
    "á": "aàâáäæ",
    "ä": "aàâáäæ",

    "æ": "éa",

    "c": "cçskqśŝ",
    "ç": "cçskqśŝ",

    "e": "eéèêëœ",
    "é": "eéèêëœ",
    "ê": "eéèêëœ",
    "è": "eéèêëœ",
    "ë": "eéèêëœ",

    "f": "fv",

    "g": "gjq",
    
    "i": "iîïylíìÿ",
    "î": "iîïylíìÿ",
    "ï": "iîïylíìÿ",
    "í": "iîïylíìÿ",
    "ì": "iîïylíìÿ",

    "j": "jg",

    "k": "kcq",

    "l": "li",

    "n": "nñr",

    "o": "oôóòöœ",
    "ô": "oôóòöœ",
    "ó": "oôóòöœ",
    "ò": "oôóòöœ",
    "ö": "oôóòöœ",

    "œ": "œoôeéèêë",

    "p": "pb",

    "q": "ckg",

    "r": "rn",

    "s": "sśŝcç",
    "ś": "sśŝcç",
    "ŝ": "sśŝcç",

    "u": "uûùüú",
    "û": "uûùüú",
    "ù": "uûùüú",
    "ü": "uûùüú",
    "ú": "uûùüú",

    "v": "vwf",

    "w": "wv",

    "x": "xck",

    "y": "yÿiîŷýỳ",
    "ÿ": "yÿiîŷýỳ",
    "ŷ": "yÿiîŷýỳ",
    "ý": "yÿiîŷýỳ",
    "ỳ": "yÿiîŷýỳ",

    "z": "zs",
}

d1toX = {
    "æ": ("ae",),
    "b": ("bb",),
    "c": ("cc", "ss", "qu", "ch"),
    "ç": ("ss", "cc", "qh", "ch"),
    "d": ("dd",),
    "f": ("ff", "ph"),
    "g": ("gu", "ge", "gg", "gh"),
    "i": ("ii",),
    "j": ("jj", "dj"),
    "k": ("qu", "ck", "ch", "cu", "kk", "kh"),
    "l": ("ll",),
    "m": ("mm", "mn"),
    "n": ("nn", "nm", "mn"),
    "o": ("au", "eau", "aut"),
    "œ": ("oe", "eu"),
    "p": ("pp", "ph"),
    "q": ("qu", "ch", "cq", "ck", "kk"),
    "r": ("rr",),
    "s": ("ss", "sh"),
    "t": ("tt", "th"),
    "x": ("cc", "ct", "xx"),
    "z": ("ss", "zh")
}

d2toX = {
    "an": ("en",),
    "en": ("an",),
    "ai": ("ei", "é", "è", "ê", "ë"),
    "ei": ("ai", "ait", "ais", "é", "è", "ê", "ë"),
    "ch": ("sh", "c", "ss"),
    "ct": ("x", "cc"),
    "oa": ("oi",),
    "oi": ("oa", "oie"),
    "qu": ("q", "cq", "ck", "c", "k"),
}


# End of word

dFinal1 = {
    "a": ("as", "at", "ant"),

    "e": ("et", "er", "ets", "ée", "ez", "ai", "ais", "ait", "ent"),
    "é": ("et", "er", "ets", "ée", "ez", "ai", "ais", "ait"),
    "è": ("et", "er", "ets", "ée", "ez", "ai", "ais", "ait"),
    "ê": ("et", "er", "ets", "ée", "ez", "ai", "ais", "ait"),
    "ë": ("et", "er", "ets", "ée", "ez", "ai", "ais", "ait"),

    "i": ("is", "it", "ie", "in"),

    "n": ("nt", "nd", "ns"),

    "o": ("aut", "ot", "os"),
    "ô": ("aut", "ot", "os"),
    "ö": ("aut", "ot", "os"),

    "u": ("ut", "us"),
}

dFinal2 = {
    "an": ("ant", "ent"),
    "en": ("ent", "ant"),
    "ei": ("ait", "ais"),
    "on": ("ons", "ont"),
    "oi": ("ois", "oit", "oix"),
}


# Préfixes

aPfx = ("anti", "contre", "mé", "im", "in", "ir", "par", "pré", "re", "ré", "sans", "sous", "sur")


# Keyboards

dBépo = {
    # on présume que le bépoète est moins susceptible de faire des erreurs de frappe que l’azertyste.
    # ligne 2
    "b": "éa",
    "é": "bpu",
    "p": "éoi",
    "o": "pèe",
    "è": "o",
    "v": "dt",
    "d": "vls",
    "l": "djr",
    "j": "lzn",
    "z": "jmw",
    # ligne 3
    "a": "ubà",
    "u": "aiéy",
    "i": "uepx",
    "e": "io",
    "c": "t",
    "t": "csvq",
    "s": "trdg",
    "r": "snlh",
    "n": "rmjf",
    "m": "nzç",
    # ligne 4
    "à": "yêa",
    "y": "àxu",
    "x": "ywi",
    "w": "z",
    "k": "c",
    "q": "gt",
    "g": "qhs",
    "h": "gfr",
    "f": "hçn",
    "ç": "fm",
}

dAzerty = {
    # ligne 1
    "é": "az",
    "è": "yu",
    "ç": "àio",
    "à": "op",
    # ligne 2
    "a": "zéqs",
    "z": "aesqd",
    "e": "zrdsf",
    "r": "etfdg",
    "t": "rygfh",
    "y": "tuhgj",
    "u": "yijhk",
    "i": "uokjl",
    "o": "iplkm",
    "p": "oml",
    # ligne 3
    "q": "sawz",
    "s": "qdzwxe",
    "d": "sfexcr",
    "f": "dgrcvt",
    "g": "fhtvby",
    "h": "gjybnu",
    "j": "hkuni",
    "k": "jlio",
    "l": "kmop",
    "m": "lùp",
    "ù": "m",
    # ligne 4
    "w": "xqs",
    "x": "wcsd",
    "c": "xvdf",
    "v": "cbfg",
    "b": "vngh",
    "n": "bhj",
}

Modified gc_core/py/ibdawg.py from [077d799ad3] to [18fa7e7c19].

1
2
3
4
5
6

7
8
9
10
11




12
13
14
15
16
17
18
#!python3
# -*- coding: UTF-8 -*-

import os
import traceback
import pkgutil


from . import str_transform as st
from . import char_player as cp
from .echo import echo






class IBDAWG:
    """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""

    def __init__ (self, sDicName):
        self.by = pkgutil.get_data(__package__, "_dictionaries/" + sDicName)
        if not self.by:






>





>
>
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#!python3
# -*- coding: UTF-8 -*-

import os
import traceback
import pkgutil
from itertools import chain

from . import str_transform as st
from . import char_player as cp
from .echo import echo


def show (nDeep, sText):
    print(nDeep * "  " + sText)


class IBDAWG:
    """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""

    def __init__ (self, sDicName):
        self.by = pkgutil.get_data(__package__, "_dictionaries/" + sDicName)
        if not self.by:
47
48
49
50
51
52
53

54
55
56

57
58
59
60
61
62
63
64
65
66
67
68
69

70
71
72
73
74

75
76
77
78

79
80
81
82
83
84
85
        if self.cStemming == "S":
            self.funcStemming = st.changeWordWithSuffixCode
        elif self.cStemming == "A":
            self.funcStemming = st.changeWordWithAffixCode
        else:
            self.funcStemming = st.noStemming
        self.nTag = self.nArcVal - self.nChar - self.nAff

        self.dChar = {}
        for i in range(1, self.nChar):
            self.dChar[self.lArcVal[i]] = i

            
        self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1
        self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1)
        self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2)
        self._addrBitMask = 1 << ((self.nBytesArc * 8) - 3)  # version 2

        self.nBytesOffset = 1 # version 3

        # Configuring DAWG functions according to nVersion
        if self.nVersion == 1:
            self.morph = self._morph1
            self.stem = self._stem1
            self._lookupArcNode = self._lookupArcNode1

            self._writeNodes = self._writeNodes1
        elif self.nVersion == 2:
            self.morph = self._morph2
            self.stem = self._stem2
            self._lookupArcNode = self._lookupArcNode2

            self._writeNodes = self._writeNodes2
        elif self.nVersion == 3:
            self.morph = self._morph3
            self.stem = self._stem3

            self._lookupArcNode = self._lookupArcNode3
            self._writeNodes = self._writeNodes3
        else:
            raise ValueError("  # Error: unknown code: {}".format(self.nVersion))

        self.bOptNumSigle = False
        self.bOptNumAtLast = False







>



>













>





>




>







52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
        if self.cStemming == "S":
            self.funcStemming = st.changeWordWithSuffixCode
        elif self.cStemming == "A":
            self.funcStemming = st.changeWordWithAffixCode
        else:
            self.funcStemming = st.noStemming
        self.nTag = self.nArcVal - self.nChar - self.nAff
        # <dChar> to get the value of an arc, <dVal> to get the char of an arc with its value
        self.dChar = {}
        for i in range(1, self.nChar):
            self.dChar[self.lArcVal[i]] = i
        self.dVal = { v: k  for k, v in self.dChar.items() }
            
        self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1
        self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1)
        self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2)
        self._addrBitMask = 1 << ((self.nBytesArc * 8) - 3)  # version 2

        self.nBytesOffset = 1 # version 3

        # Configuring DAWG functions according to nVersion
        if self.nVersion == 1:
            self.morph = self._morph1
            self.stem = self._stem1
            self._lookupArcNode = self._lookupArcNode1
            self._getArcs = self._getArcs1
            self._writeNodes = self._writeNodes1
        elif self.nVersion == 2:
            self.morph = self._morph2
            self.stem = self._stem2
            self._lookupArcNode = self._lookupArcNode2
            self._getArcs = self._getArcs2
            self._writeNodes = self._writeNodes2
        elif self.nVersion == 3:
            self.morph = self._morph3
            self.stem = self._stem3
            self._getArcs = self._getArcs3
            self._lookupArcNode = self._lookupArcNode3
            self._writeNodes = self._writeNodes3
        else:
            raise ValueError("  # Error: unknown code: {}".format(self.nVersion))

        self.bOptNumSigle = False
        self.bOptNumAtLast = False
161
162
163
164
165
166
167
168











169

170
171
172

173
174

175

176

177














178
179
180
181
182
























183
184
185
186
187
188
189
            if c not in self.dChar:
                return False
            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
            if iAddr == None:
                return False
        return int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask

    def suggest (self, sWord, iAddr=0, sNewWord=""):











        "not finished"

        # RECURSIVE FUNCTION
        if not sWord:
            if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:

                return [sNewWord]
            return []

        lSugg = []

        for cChar, jAddr in self._getSimilarArcs(sWord[0:1], iAddr):

            lSugg.extend(self.suggest(sWord[1:], jAddr, sNewWord+cChar))














        return lSugg

    def _getSimilarArcs (self, cChar, iAddr):
        "generator: yield similar char of <cChar> and address of the following node"
        for c in cp.dSimilarChar.get(cChar, [cChar]):
























            if c in self.dChar:
                jAddr = self._lookupArcNode(self.dChar[c], iAddr)
                if jAddr:
                    yield (c, jAddr)

    def getMorph (self, sWord):
        "retrieves morphologies list, different casing allowed"







|
>
>
>
>
>
>
>
>
>
>
>
|
>



>


>

>
|
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>




|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
            if c not in self.dChar:
                return False
            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
            if iAddr == None:
                return False
        return int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask

    def suggest (self, sWord):
        "returns a set of similar words"
        # first, we check for similar words
        return set(self._suggestWithCrushedUselessChars(cp.clearWord(sWord)))
        lSugg = self._suggest(sWord)
        if not lSugg:
            lSugg.extend(self._suggest(sWord[1:]))
            lSugg.extend(self._suggest(sWord[:-1]))
            lSugg.extend(self._suggest(sWord[1:-1]))
            if not lSugg:
                lSugg.extend(self._suggestWithCrushedUselessChars(cp.clearWord(sWord)))
        return set(lSugg)

    def _suggest (self, sWord, cPrevious='', nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
        # RECURSIVE FUNCTION
        if not sWord:
            if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
                show(nDeep, "!!! " + sNewWord + " !!!")
                return [sNewWord]
            return []
        #show(nDeep, "<" + sWord + ">  ===>  " + sNewWord)
        lSugg = []
        cCurrent = sWord[0:1]
        for cChar, jAddr in self._getSimilarArcs(cCurrent, iAddr):
            #show(nDeep, cChar)
            lSugg.extend(self._suggest(sWord[1:], cCurrent, nDeep+1, jAddr, sNewWord+cChar))
        if not bAvoidLoop: # avoid infinite loop
            #show(nDeep, ":no loop:")
            if cPrevious == cCurrent:
                # same char, we remove 1 char without adding 1 to <sNewWord>
                lSugg.extend(self._suggest(sWord[1:], cCurrent, nDeep+1, iAddr, sNewWord))
            for sRepl in cp.d1toX.get(cCurrent, ()):
                #show(nDeep, sRepl)
                lSugg.extend(self._suggest(sRepl + sWord[1:], cCurrent, nDeep+1, iAddr, sNewWord, True))
            if len(sWord) == 1:
                #show(nDeep, ":end of word:")
                # end of word
                for sRepl in cp.dFinal1.get(sWord, ()):
                    #show(nDeep, sRepl)
                    lSugg.extend(self._suggest(sRepl, cCurrent, nDeep+1, iAddr, sNewWord, True))
        return lSugg

    def _getSimilarArcs (self, cChar, iAddr):
        "generator: yield similar char of <cChar> and address of the following node"
        for c in cp.d1to1.get(cChar, [cChar]):
            if c in self.dChar:
                jAddr = self._lookupArcNode(self.dChar[c], iAddr)
                if jAddr:
                    yield (c, jAddr)

    def _suggestWithCrushedUselessChars (self, sWord, cPrevious='', nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
        if not sWord:
            if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
                show(nDeep, "!!! " + sNewWord + " !!!")
                return [sNewWord]
            return []
        lSugg = []
        cCurrent = sWord[0:1]
        for cChar, jAddr in self._getSimilarArcsAndCrushedChars(cCurrent, iAddr):
            show(nDeep, cChar)
            lSugg.extend(self._suggestWithCrushedUselessChars(sWord[1:], cCurrent, nDeep+1, jAddr, sNewWord+cChar))
        return lSugg

    def _getSimilarArcsAndCrushedChars (self, cChar, iAddr):
        "generator: yield similar char of <cChar> and address of the following node"
        for nVal, jAddr in self._getArcs(iAddr):
            if self.dVal.get(nVal, "") in cp.aUselessChar:
                yield (self.dVal[nVal], jAddr)
        for c in cp.d1to1.get(cChar, [cChar]):
            if c in self.dChar:
                jAddr = self._lookupArcNode(self.dChar[c], iAddr)
                if jAddr:
                    yield (c, jAddr)

    def getMorph (self, sWord):
        "retrieves morphologies list, different casing allowed"
263
264
265
266
267
268
269










270
271
272
273
274
275
276
                return int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
            else:
                # value not found
                if (nRawArc & self._lastArcMask):
                    return None
                iAddr = iEndArcAddr+self.nBytesNodeAddress











    def _writeNodes1 (self, spfDest):
        "for debugging only"
        print(" > Write binary nodes")
        with codecs.open(spfDest, 'w', 'utf-8', newline="\n") as hDst:
            iAddr = 0
            hDst.write("i{:_>10} -- #{:_>10}\n".format("0", iAddr))
            while iAddr < len(self.byDic):







>
>
>
>
>
>
>
>
>
>







327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
                return int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
            else:
                # value not found
                if (nRawArc & self._lastArcMask):
                    return None
                iAddr = iEndArcAddr+self.nBytesNodeAddress

    def _getArcs1 (self, iAddr):
        "generator: return all arcs at <iAddr> as tuples of (nVal, iAddr)"
        while True:
            iEndArcAddr = iAddr+self.nBytesArc
            nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
            yield (nRawArc & self._arcMask, int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big'))
            if (nRawArc & self._lastArcMask):
                break
            iAddr = iEndArcAddr+self.nBytesNodeAddress

    def _writeNodes1 (self, spfDest):
        "for debugging only"
        print(" > Write binary nodes")
        with codecs.open(spfDest, 'w', 'utf-8', newline="\n") as hDst:
            iAddr = 0
            hDst.write("i{:_>10} -- #{:_>10}\n".format("0", iAddr))
            while iAddr < len(self.byDic):