10
11
12
13
14
15
16
17
18
19
20
21
22
23
|
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
+
+
|
import sys
import os
import collections
import json
import time
import re
import traceback
from . import str_transform as st
from .progressbar import ProgressBar
def readFile (spf):
|
59
60
61
62
63
64
65
66
67
68
69
70
71
72
|
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
+
|
lTag = []; dTag = {}; nTag = 0; dTagOccur = {}
nErr = 0
try:
zFilter = re.compile(sSelectFilterRegex) if sSelectFilterRegex else None
except:
print(" # Error. Wrong filter regex. Filter ignored.")
traceback.print_exc()
zFilter = None
# read lexicon
if type(src) is str:
iterable = readFile(src)
else:
iterable = src
|
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
|
+
|
dTagOccur[sTag] = dTagOccur.get(sTag, 0) + 1
aEntry.add((sFlex, dAff[sAff], dTag[sTag]))
if not aEntry:
raise ValueError("# Error. Empty lexicon")
# Preparing DAWG
print(" > Preparing list of words")
print(" Filter: " + (sSelectFilterRegex or "[None]"))
lVal = lChar + lAff + lTag
lWord = [ [dChar[c] for c in sFlex] + [iAff+nChar] + [iTag+nChar+nAff] for sFlex, iAff, iTag in aEntry ]
aEntry = None
# Dictionary of arc values occurrency, to sort arcs of each node
dValOccur = dict( [ (dChar[c], dCharOccur[c]) for c in dChar ] \
+ [ (dAff[aff]+nChar, dAffOccur[aff]) for aff in dAff ] \
|