276
277
278
279
280
281
282
283
284
285
286
287
288
289
|
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
|
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
print(" > Write informations")
with open(sPathFile, 'w', encoding='utf-8', newline="\n") as hDst:
hDst.write(self.getArcStats())
hDst.write("\n * Values:\n")
for i, s in enumerate(self.lArcVal):
hDst.write(" {:>6}. {}\n".format(i, s))
hDst.close()
def select (self, sPattern=""):
"generator: returns all entries which morphology fits <sPattern>"
zPattern = None
if sPattern:
try:
zPattern = re.compile(sPattern)
except:
print("# Error in regex pattern")
traceback.print_exc()
yield from self._select(zPattern, self.oRoot, "")
def _select (self, zPattern, oNode, sWord):
# recursive generator
for nVal, oNextNode in oNode.arcs.items():
if nVal <= self.nChar:
# simple character
yield from self._select(zPattern, oNextNode, sWord + self.lArcVal[nVal])
else:
sEntry = sWord + "\t" + self.funcStemming(sWord, self.lArcVal[nVal])
for nMorphVal, _ in oNextNode.arcs.items():
if not zPattern or zPattern.search(self.lArcVal[nMorphVal]):
yield sEntry + "\t" + self.lArcVal[nMorphVal]
# BINARY CONVERSION
def createBinary (self, sPathFile, nMethod, bDebug=False):
print(" > Write DAWG as an indexable binary dictionary [method: %d]" % nMethod)
if nMethod == 1:
self.nBytesArc = ( (self.nArcVal.bit_length() + 2) // 8 ) + 1 # We add 2 bits. See DawgNode.convToBytes1()
self._calcNumBytesNodeAddress()
|