Index: cli.py ================================================================== --- cli.py +++ cli.py @@ -18,10 +18,11 @@ "Quant sera t’il châtiés pour ses mensonge ? Merde ! J’en aie marre." _HELP = """ /help /h show this text ?word1 [word2] ... words analysis + !word suggestion /lopt /lo list options /+ option1 [option2] ... activate grammar checking options /- option1 [option2] ... deactivate grammar checking options /lrules [pattern] /lr list rules /--rule1 [rule2] ... deactivate grammar checking rule @@ -198,13 +199,19 @@ sText = _getText(sInputText) while True: if sText.startswith("?"): for sWord in sText[1:].strip().split(): if sWord: - echo("* {}".format(sWord)) + echo("* " + sWord) for sMorph in oDict.getMorph(sWord): echo(" {:<32} {}".format(sMorph, oLexGraphe.formatTags(sMorph))) + elif sText.startswith("!"): + for sWord in sText[1:].strip().split(): + if sWord: + echo("* suggestions for: " + sWord) + for sSugg in oDict.suggest(sWord): + echo(" > " + sSugg) elif sText.startswith("/+ "): gce.setOptions({ opt:True for opt in sText[3:].strip().split() if opt in gce.getOptions() }) echo("done") elif sText.startswith("/- "): gce.setOptions({ opt:False for opt in sText[3:].strip().split() if opt in gce.getOptions() }) Index: gc_core/py/ibdawg.py ================================================================== --- gc_core/py/ibdawg.py +++ gc_core/py/ibdawg.py @@ -4,10 +4,11 @@ import os import traceback import pkgutil from . import str_transform as st +from . import char_player as cp from .echo import echo class IBDAWG: """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH""" @@ -162,33 +163,29 @@ iAddr = self._lookupArcNode(self.dChar[c], iAddr) if iAddr == None: return False return int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask - def getSugg (self, sWord, iAddr=0, sNewWord=""): + def suggest (self, sWord, iAddr=0, sNewWord=""): "not finished" # RECURSIVE FUNCTION if not sWord: if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask: return [sNewWord] return [] lSugg = [] - lArc = self._getSimilarArcs(sWord[0:1], iAddr) - if lArc: - for t in lArc: - lSugg.extend(self._lookupAndSuggest(sWord[1:], t[1], sNewWord+t[0])) - else: - pass + for cChar, jAddr in self._getSimilarArcs(sWord[0:1], iAddr): + lSugg.extend(self.suggest(sWord[1:], jAddr, sNewWord+cChar)) return lSugg def _getSimilarArcs (self, cChar, iAddr): - lArc = [] - for c in st.dSimilarChars.get(cChar, cChar): - jAddr = self._lookupArcNode(self.dChar[c], iAddr) - if jAddr: - lArc.append((c, iAddr)) - return lArc + "generator: yield similar char of and address of the following node" + for c in cp.dSimilarChar.get(cChar, [cChar]): + if c in self.dChar: + jAddr = self._lookupArcNode(self.dChar[c], iAddr) + if jAddr: + yield (c, jAddr) def getMorph (self, sWord): "retrieves morphologies list, different casing allowed" l = self.morph(sWord) if sWord[0:1].isupper(): Index: gc_core/py/str_transform.py ================================================================== --- gc_core/py/str_transform.py +++ gc_core/py/str_transform.py @@ -1,38 +1,6 @@ #!python3 -# -*- coding: UTF-8 -*- - - -dSimilarChars = { - "a": "aàâáä", - "à": "aàâáä", - "â": "aàâáä", - "á": "aàâáä", - "ä": "aàâáä", - "c": "cç", - "ç": "cç", - "e": "eéêèë", - "é": "eéêèë", - "ê": "eéêèë", - "è": "eéêèë", - "ë": "eéêèë", - "i": "iîïíì", - "î": "iîïíì", - "ï": "iîïíì", - "í": "iîïíì", - "ì": "iîïíì", - "o": "oôóòö", - "ô": "oôóòö", - "ó": "oôóòö", - "ò": "oôóòö", - "ö": "oôóòö", - "u": "uûùüú", - "û": "uûùüú", - "ù": "uûùüú", - "ü": "uûùüú", - "ú": "uûùüú", -} ## No stemming def noStemming (sFlex, sStem): return sStem