Grammalecte  Check-in [ea16ae6a5b]

Overview
Comment:[core] ibdawg: use regex in select()
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | core
Files: files | file ages | folders
SHA3-256: ea16ae6a5becb9cfc9677289f1083c5b3663023d5a50740bd4ba47d0f4138649
User & Date: olr on 2017-07-01 03:28:12
Other Links: manifest | tags
Context
2017-07-01
07:21
[fr] faux positif: sur un ton différent. check-in: dcf17dd46d user: olr tags: trunk, fr
03:28
[core] ibdawg: use regex in select() check-in: ea16ae6a5b user: olr tags: trunk, core
2017-06-30
19:38
[core] ibdawg: select entries from dictionary (can uncompress the full dictionary) check-in: 4a53af847f user: olr tags: trunk, core, new_feature
Changes

Modified gc_core/py/ibdawg.py from [1de2d3f67f] to [5262800a60].

1
2
3
4
5
6

7
8
9
10
11
12
13
#!python3
# -*- coding: UTF-8 -*-

import os
import traceback
import pkgutil

from itertools import chain

from . import str_transform as st
from . import char_player as cp
from .echo import echo








>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
#!python3
# -*- coding: UTF-8 -*-

import os
import traceback
import pkgutil
import re
from itertools import chain

from . import str_transform as st
from . import char_player as cp
from .echo import echo


279
280
281
282
283
284
285
286
287




288

289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
                n += 1
        if not sWord:
            return
        if iPos >= 0:
            print("\n   "+ " " * iPos + "|")
            self.drawPath(sWord[1:], iNextNodeAddr)

    def select (self, sFilter=""):
        "generator: returns all entries which morphology fits <sFilter>"




        print("Filter: " + sFilter)

        yield from self._select1(sFilter, 0, "")


    # def morph (self, sWord):
    #     is defined in __init__

    # VERSION 1
    def _select1 (self, sFilter, iAddr, sWord):
        # recursive generator
        for nVal, jAddr in self._getArcs1(iAddr):
            if nVal < self.nChar:
                # simple character
                yield from self._select1(sFilter, jAddr, sWord + self.lArcVal[nVal])
            else:
                sEntry = sWord + "\t" + self.funcStemming(sWord, self.lArcVal[nVal])
                for nMorphVal, _ in self._getArcs1(jAddr):
                    if not sFilter or sFilter in self.lArcVal[nMorphVal]:
                        yield sEntry + "\t" + self.lArcVal[nMorphVal]

    def _morph1 (self, sWord):
        "returns morphologies of sWord"
        iAddr = 0
        for c in sWord:
            if c not in self.dChar:







|
|
>
>
>
>
|
>
|
<





|




|



|







280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295

296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
                n += 1
        if not sWord:
            return
        if iPos >= 0:
            print("\n   "+ " " * iPos + "|")
            self.drawPath(sWord[1:], iNextNodeAddr)

    def select (self, sPattern=""):
        "generator: returns all entries which morphology fits <sPattern>"
        zPattern = None
        try:
            zPattern = re.compile(sPattern)
        except:
            print("# Error in regex pattern")
            traceback.print_exc()
        yield from self._select1(zPattern, 0, "")


    # def morph (self, sWord):
    #     is defined in __init__

    # VERSION 1
    def _select1 (self, zPattern, iAddr, sWord):
        # recursive generator
        for nVal, jAddr in self._getArcs1(iAddr):
            if nVal < self.nChar:
                # simple character
                yield from self._select1(zPattern, jAddr, sWord + self.lArcVal[nVal])
            else:
                sEntry = sWord + "\t" + self.funcStemming(sWord, self.lArcVal[nVal])
                for nMorphVal, _ in self._getArcs1(jAddr):
                    if not zPattern or zPattern.search(self.lArcVal[nMorphVal]):
                        yield sEntry + "\t" + self.lArcVal[nMorphVal]

    def _morph1 (self, sWord):
        "returns morphologies of sWord"
        iAddr = 0
        for c in sWord:
            if c not in self.dChar: