Overview
Comment: | [graphspell][py] dawg: add select() function |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | graphspell |
Files: | files | file ages | folders |
SHA3-256: |
057eab4afbd9289173a65ac8ad28144f |
User & Date: | olr on 2018-02-09 15:24:45 |
Original Comment: | [graphspell] dawg: add select() function |
Other Links: | manifest | tags |
Context
2018-02-09
| ||
15:26 | [graphspell][js] ibdawg: fix bug in select() check-in: 22ff60980a user: olr tags: trunk, graphspell | |
15:24 | [graphspell][py] dawg: add select() function check-in: 057eab4afb user: olr tags: trunk, graphspell | |
15:23 | [graphspell][py] ibdawg: fix bug in select() check-in: 8142bf4c10 user: olr tags: trunk, graphspell | |
Changes
Modified graphspell/dawg.py from [a0b57fd216] to [1156fb2ccd].
︙ | ︙ | |||
276 277 278 279 280 281 282 283 284 285 286 287 288 289 | print(" > Write informations") with open(sPathFile, 'w', encoding='utf-8', newline="\n") as hDst: hDst.write(self.getArcStats()) hDst.write("\n * Values:\n") for i, s in enumerate(self.lArcVal): hDst.write(" {:>6}. {}\n".format(i, s)) hDst.close() # BINARY CONVERSION def createBinary (self, sPathFile, nMethod, bDebug=False): print(" > Write DAWG as an indexable binary dictionary [method: %d]" % nMethod) if nMethod == 1: self.nBytesArc = ( (self.nArcVal.bit_length() + 2) // 8 ) + 1 # We add 2 bits. See DawgNode.convToBytes1() self._calcNumBytesNodeAddress() | > > > > > > > > > > > > > > > > > > > > > > > > | 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 | print(" > Write informations") with open(sPathFile, 'w', encoding='utf-8', newline="\n") as hDst: hDst.write(self.getArcStats()) hDst.write("\n * Values:\n") for i, s in enumerate(self.lArcVal): hDst.write(" {:>6}. {}\n".format(i, s)) hDst.close() def select (self, sPattern=""): "generator: returns all entries which morphology fits <sPattern>" zPattern = None if sPattern: try: zPattern = re.compile(sPattern) except: print("# Error in regex pattern") traceback.print_exc() yield from self._select(zPattern, self.oRoot, "") def _select (self, zPattern, oNode, sWord): # recursive generator for nVal, oNextNode in oNode.arcs.items(): if nVal <= self.nChar: # simple character yield from self._select(zPattern, oNextNode, sWord + self.lArcVal[nVal]) else: sEntry = sWord + "\t" + self.funcStemming(sWord, self.lArcVal[nVal]) for nMorphVal, _ in oNextNode.arcs.items(): if not zPattern or zPattern.search(self.lArcVal[nMorphVal]): yield sEntry + "\t" + self.lArcVal[nMorphVal] # BINARY CONVERSION def createBinary (self, sPathFile, nMethod, bDebug=False): print(" > Write DAWG as an indexable binary dictionary [method: %d]" % nMethod) if nMethod == 1: self.nBytesArc = ( (self.nArcVal.bit_length() + 2) // 8 ) + 1 # We add 2 bits. See DawgNode.convToBytes1() self._calcNumBytesNodeAddress() |
︙ | ︙ |