Grammalecte  Diff

Differences From Artifact [095d971150]:

To Artifact [077d799ad3]:


1
2
3
4
5
6
7
8

9
10
11
12
13
14
15
#!python3
# -*- coding: UTF-8 -*-

import os
import traceback
import pkgutil

from . import str_transform as st

from .echo import echo


class IBDAWG:
    """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""

    def __init__ (self, sDicName):








>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#!python3
# -*- coding: UTF-8 -*-

import os
import traceback
import pkgutil

from . import str_transform as st
from . import char_player as cp
from .echo import echo


class IBDAWG:
    """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""

    def __init__ (self, sDicName):
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185

186
187
188
189
190
191
192
193
194
195
196
            if c not in self.dChar:
                return False
            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
            if iAddr == None:
                return False
        return int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask

    def getSugg (self, sWord, iAddr=0, sNewWord=""):
        "not finished"
        # RECURSIVE FUNCTION
        if not sWord:
            if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
                return [sNewWord]
            return []
        lSugg = []
        lArc = self._getSimilarArcs(sWord[0:1], iAddr)
        if lArc:
            for t in lArc:
                lSugg.extend(self._lookupAndSuggest(sWord[1:], t[1], sNewWord+t[0]))
        else:
            pass
        return lSugg

    def _getSimilarArcs (self, cChar, iAddr):
        lArc = []
        for c in st.dSimilarChars.get(cChar, cChar):

            jAddr = self._lookupArcNode(self.dChar[c], iAddr)
            if jAddr:
                lArc.append((c, iAddr))
        return lArc

    def getMorph (self, sWord):
        "retrieves morphologies list, different casing allowed"
        l = self.morph(sWord)
        if sWord[0:1].isupper():
            l.extend(self.morph(sWord.lower()))
            if sWord.isupper() and len(sWord) > 1:







|







|
<
<
|
<
<



|
|
>
|
|
|
<







161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176


177


178
179
180
181
182
183
184
185
186

187
188
189
190
191
192
193
            if c not in self.dChar:
                return False
            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
            if iAddr == None:
                return False
        return int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask

    def suggest (self, sWord, iAddr=0, sNewWord=""):
        "not finished"
        # RECURSIVE FUNCTION
        if not sWord:
            if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
                return [sNewWord]
            return []
        lSugg = []
        for cChar, jAddr in self._getSimilarArcs(sWord[0:1], iAddr):


            lSugg.extend(self.suggest(sWord[1:], jAddr, sNewWord+cChar))


        return lSugg

    def _getSimilarArcs (self, cChar, iAddr):
        "generator: yield similar char of <cChar> and address of the following node"
        for c in cp.dSimilarChar.get(cChar, [cChar]):
            if c in self.dChar:
                jAddr = self._lookupArcNode(self.dChar[c], iAddr)
                if jAddr:
                    yield (c, jAddr)


    def getMorph (self, sWord):
        "retrieves morphologies list, different casing allowed"
        l = self.morph(sWord)
        if sWord[0:1].isupper():
            l.extend(self.morph(sWord.lower()))
            if sWord.isupper() and len(sWord) > 1: