1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
+
|
#!python3
# -*- coding: UTF-8 -*-
import os
import traceback
import pkgutil
from . import str_transform as st
from . import char_player as cp
from .echo import echo
class IBDAWG:
"""INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""
def __init__ (self, sDicName):
|
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
|
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
|
-
+
-
+
-
-
-
+
-
-
-
-
-
-
-
+
+
+
+
+
+
-
|
if c not in self.dChar:
return False
iAddr = self._lookupArcNode(self.dChar[c], iAddr)
if iAddr == None:
return False
return int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask
def getSugg (self, sWord, iAddr=0, sNewWord=""):
def suggest (self, sWord, iAddr=0, sNewWord=""):
"not finished"
# RECURSIVE FUNCTION
if not sWord:
if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
return [sNewWord]
return []
lSugg = []
lArc = self._getSimilarArcs(sWord[0:1], iAddr)
for cChar, jAddr in self._getSimilarArcs(sWord[0:1], iAddr):
if lArc:
for t in lArc:
lSugg.extend(self._lookupAndSuggest(sWord[1:], t[1], sNewWord+t[0]))
lSugg.extend(self.suggest(sWord[1:], jAddr, sNewWord+cChar))
else:
pass
return lSugg
def _getSimilarArcs (self, cChar, iAddr):
lArc = []
for c in st.dSimilarChars.get(cChar, cChar):
jAddr = self._lookupArcNode(self.dChar[c], iAddr)
if jAddr:
lArc.append((c, iAddr))
"generator: yield similar char of <cChar> and address of the following node"
for c in cp.dSimilarChar.get(cChar, [cChar]):
if c in self.dChar:
jAddr = self._lookupArcNode(self.dChar[c], iAddr)
if jAddr:
yield (c, jAddr)
return lArc
def getMorph (self, sWord):
"retrieves morphologies list, different casing allowed"
l = self.morph(sWord)
if sWord[0:1].isupper():
l.extend(self.morph(sWord.lower()))
if sWord.isupper() and len(sWord) > 1:
|