Grammalecte  Diff

Differences From Artifact [64364f5bf4]:

To Artifact [eb988983d4]:


10
11
12
13
14
15
16


17
18
19
20
21
22
23


import sys
import os
import collections
import json
import time



from . import str_transform as st
from .progressbar import ProgressBar



def readFile (spf):







>
>







10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25


import sys
import os
import collections
import json
import time
import re
import traceback

from . import str_transform as st
from .progressbar import ProgressBar



def readFile (spf):
59
60
61
62
63
64
65

66
67
68
69
70
71
72
        lTag  = [];   dTag  = {}; nTag  = 0; dTagOccur = {}
        nErr = 0

        try:
            zFilter = re.compile(sSelectFilterRegex)  if sSelectFilterRegex  else None
        except:
            print(" # Error. Wrong filter regex. Filter ignored.")

            zFilter = None

        # read lexicon
        if type(src) is str:
            iterable = readFile(src)
        else:
            iterable = src







>







61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
        lTag  = [];   dTag  = {}; nTag  = 0; dTagOccur = {}
        nErr = 0

        try:
            zFilter = re.compile(sSelectFilterRegex)  if sSelectFilterRegex  else None
        except:
            print(" # Error. Wrong filter regex. Filter ignored.")
            traceback.print_exc()
            zFilter = None

        # read lexicon
        if type(src) is str:
            iterable = readFile(src)
        else:
            iterable = src
95
96
97
98
99
100
101

102
103
104
105
106
107
108
                dTagOccur[sTag] = dTagOccur.get(sTag, 0) + 1
                aEntry.add((sFlex, dAff[sAff], dTag[sTag]))
        if not aEntry:
            raise ValueError("# Error. Empty lexicon")
        
        # Preparing DAWG
        print(" > Preparing list of words")

        lVal = lChar + lAff + lTag
        lWord = [ [dChar[c] for c in sFlex] + [iAff+nChar] + [iTag+nChar+nAff]  for sFlex, iAff, iTag in aEntry ]
        aEntry = None
        
        # Dictionary of arc values occurrency, to sort arcs of each node
        dValOccur = dict( [ (dChar[c], dCharOccur[c])  for c in dChar ] \
                        + [ (dAff[aff]+nChar, dAffOccur[aff]) for aff in dAff ] \







>







98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
                dTagOccur[sTag] = dTagOccur.get(sTag, 0) + 1
                aEntry.add((sFlex, dAff[sAff], dTag[sTag]))
        if not aEntry:
            raise ValueError("# Error. Empty lexicon")
        
        # Preparing DAWG
        print(" > Preparing list of words")
        print(" Filter: " + (sSelectFilterRegex or "[None]"))
        lVal = lChar + lAff + lTag
        lWord = [ [dChar[c] for c in sFlex] + [iAff+nChar] + [iTag+nChar+nAff]  for sFlex, iAff, iTag in aEntry ]
        aEntry = None
        
        # Dictionary of arc values occurrency, to sort arcs of each node
        dValOccur = dict( [ (dChar[c], dCharOccur[c])  for c in dChar ] \
                        + [ (dAff[aff]+nChar, dAffOccur[aff]) for aff in dAff ] \