DELETED gc_core/py/char_player.py
Index: gc_core/py/char_player.py
==================================================================
--- gc_core/py/char_player.py
+++ /dev/null
@@ -1,324 +0,0 @@
-# list of similar chars
-# useful for suggestion mechanism
-
-import re
-
-
-_xTransChars = str.maketrans({
-    'à': 'a',  'é': 'e',  'î': 'i',  'ô': 'o',  'û': 'u',  'ÿ': 'i',  "y": "i",
-    'â': 'a',  'è': 'e',  'ï': 'i',  'ö': 'o',  'ù': 'u',  'ŷ': 'i',
-    'ä': 'a',  'ê': 'e',  'í': 'i',  'ó': 'o',  'ü': 'u',  'ý': 'i',
-    'á': 'a',  'ë': 'e',  'ì': 'i',  'ò': 'o',  'ú': 'u',  'ỳ': 'i',
-    'ā': 'a',  'ē': 'e',  'ī': 'i',  'ō': 'o',  'ū': 'u',  'ȳ': 'i',
-    'ñ': 'n',  'k': 'q',  'w': 'v',
-    'œ': 'oe',  'æ': 'ae', 
-})
-
-def simplifyWord (sWord):
-    "word simplication before calculating distance between words"
-    sWord = sWord.lower().translate(_xTransChars)
-    sNewWord = ""
-    for i, c in enumerate(sWord, 1):
-        if c != sWord[i:i+1]:
-            sNewWord += c
-    return sNewWord.replace("eau", "o").replace("au", "o").replace("ai", "e").replace("ei", "e").replace("ph", "f")
-
-
-aVowel = set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ")
-aConsonant = set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ")
-aDouble = set("bcdfjklmnprstzBCDFJKLMNPRSTZ")  # letters that may be used twice successively
-
-
-# Similar chars
-
-d1to1 = {
-    "1": "liîLIÎ",
-    "2": "zZ",
-    "3": "eéèêEÉÈÊ",
-    "4": "aàâAÀÂ",
-    "5": "sgSG",
-    "6": "bdgBDG",
-    "7": "ltLT",
-    "8": "bB",
-    "9": "gbdGBD",
-    "0": "oôOÔ",
-
-    "a": "aàâáäæ",
-    "A": "AÀÂÁÄÆ",
-    "à": "aàâáäæ",
-    "À": "AÀÂÁÄÆ",
-    "â": "aàâáäæ",
-    "Â": "AÀÂÁÄÆ",
-    "á": "aàâáäæ",
-    "Á": "AÀÂÁÄÆ",
-    "ä": "aàâáäæ",
-    "Ä": "AÀÂÁÄÆ",
-
-    "æ": "æéa",
-    "Æ": "ÆÉA",
-
-    "c": "cçskqśŝ",
-    "C": "CÇSKQŚŜ",
-    "ç": "cçskqśŝ",
-    "Ç": "CÇSKQŚŜ",
-
-    "e": "eéèêëœ",
-    "E": "EÉÈÊËŒ",
-    "é": "eéèêëœ",
-    "É": "EÉÈÊËŒ",
-    "ê": "eéèêëœ",
-    "Ê": "EÉÈÊËŒ",
-    "è": "eéèêëœ",
-    "È": "EÉÈÊËŒ",
-    "ë": "eéèêëœ",
-    "Ë": "EÉÈÊËŒ",
-
-    "g": "gj",
-    "G": "GJ",
-    
-    "i": "iîïyíìÿ",
-    "I": "IÎÏYÍÌŸ",
-    "î": "iîïyíìÿ",
-    "Î": "IÎÏYÍÌŸ",
-    "ï": "iîïyíìÿ",
-    "Ï": "IÎÏYÍÌŸ",
-    "í": "iîïyíìÿ",
-    "Í": "IÎÏYÍÌŸ",
-    "ì": "iîïyíìÿ",
-    "Ì": "IÎÏYÍÌŸ",
-
-    "j": "jg",
-    "J": "JG",
-
-    "k": "kcq",
-    "K": "KCQ",
-
-    "n": "nñ",
-    "N": "NÑ",
-
-    "o": "oôóòöœ",
-    "O": "OÔÓÒÖŒ",
-    "ô": "oôóòöœ",
-    "Ô": "OÔÓÒÖŒ",
-    "ó": "oôóòöœ",
-    "Ó": "OÔÓÒÖŒ",
-    "ò": "oôóòöœ",
-    "Ò": "OÔÓÒÖŒ",
-    "ö": "oôóòöœ",
-    "Ö": "OÔÓÒÖŒ",
-
-    "œ": "œoôeéèêë",
-    "Œ": "ŒOÔEÉÈÊË",
-
-    "q": "qck",
-    "Q": "QCK",
-
-    "s": "sśŝcç",
-    "S": "SŚŜCÇ",
-    "ś": "sśŝcç",
-    "Ś": "SŚŜCÇ",
-    "ŝ": "sśŝcç",
-    "Ŝ": "SŚŜCÇ",
-
-    "u": "uûùüú",
-    "U": "UÛÙÜÚ",
-    "û": "uûùüú",
-    "Û": "UÛÙÜÚ",
-    "ù": "uûùüú",
-    "Ù": "UÛÙÜÚ",
-    "ü": "uûùüú",
-    "Ü": "UÛÙÜÚ",
-    "ú": "uûùüú",
-    "Ú": "UÛÙÜÚ",
-
-    "v": "vw",
-    "V": "VW",
-
-    "w": "wv",
-    "W": "WV",
-
-    "x": "xck",
-    "X": "XCK",
-
-    "y": "yÿiîŷýỳ",
-    "Y": "YŸIÎŶÝỲ",
-    "ÿ": "yÿiîŷýỳ",
-    "Ÿ": "YŸIÎŶÝỲ",
-    "ŷ": "yÿiîŷýỳ",
-    "Ŷ": "YŸIÎŶÝỲ",
-    "ý": "yÿiîŷýỳ",
-    "Ý": "YŸIÎŶÝỲ",
-    "ỳ": "yÿiîŷýỳ",
-    "Ỳ": "YŸIÎŶÝỲ",
-
-    "z": "zs",
-    "Z": "ZS",
-}
-
-d1toX = {
-    "æ": ("ae",),
-    "Æ": ("AE",),
-    "b": ("bb",),
-    "B": ("BB",),
-    "c": ("cc", "ss", "qu", "ch"),
-    "C": ("CC", "SS", "QU", "CH"),
-    "d": ("dd",),
-    "D": ("DD",),
-    "é": ("ai", "ei"),
-    "É": ("AI", "EI"),
-    "f": ("ff", "ph"),
-    "F": ("FF", "PH"),
-    "g": ("gu", "ge", "gg", "gh"),
-    "G": ("GU", "GE", "GG", "GH"),
-    "j": ("jj", "dj"),
-    "J": ("JJ", "DJ"),
-    "k": ("qu", "ck", "ch", "cu", "kk", "kh"),
-    "K": ("QU", "CK", "CH", "CU", "KK", "KH"),
-    "l": ("ll",),
-    "L": ("LL",),
-    "m": ("mm", "mn"),
-    "M": ("MM", "MN"),
-    "n": ("nn", "nm", "mn"),
-    "N": ("NN", "NM", "MN"),
-    "o": ("au", "eau"),
-    "O": ("AU", "EAU"),
-    "œ": ("oe", "eu"),
-    "Œ": ("OE", "EU"),
-    "p": ("pp", "ph"),
-    "P": ("PP", "PH"),
-    "q": ("qu", "ch", "cq", "ck", "kk"),
-    "Q": ("QU", "CH", "CQ", "CK", "KK"),
-    "r": ("rr",),
-    "R": ("RR",),
-    "s": ("ss", "sh"),
-    "S": ("SS", "SH"),
-    "t": ("tt", "th"),
-    "T": ("TT", "TH"),
-    "x": ("cc", "ct", "xx"),
-    "X": ("CC", "CT", "XX"),
-    "z": ("ss", "zh"),
-    "Z": ("SS", "ZH"),
-}
-
-
-def get1toXReplacement (cPrev, cCur, cNext):
-    if cCur in aConsonant  and  (cPrev in aConsonant  or  cNext in aConsonant):
-        return ()
-    return d1toX.get(cCur, ())
-
-
-d2toX = {
-    "am": ("an", "en", "em"),
-    "AM": ("AN", "EN", "EM"),
-    "an": ("am", "en", "em"),
-    "AN": ("AM", "EN", "EM"),
-    "au": ("eau", "o", "ô"),
-    "AU": ("EAU", "O", "Ô"),
-    "em": ("an", "am", "en"),
-    "EM": ("AN", "AM", "EN"),
-    "en": ("an", "am", "em"),
-    "EN": ("AN", "AM", "EM"),
-    "ai": ("ei", "é", "è", "ê", "ë"),
-    "AI": ("EI", "É", "È", "Ê", "Ë"),
-    "ei": ("ai", "é", "è", "ê", "ë"),
-    "EI": ("AI", "É", "È", "Ê", "Ë"),
-    "ch": ("sh", "c", "ss"),
-    "CH": ("SH", "C", "SS"),
-    "ct": ("x", "cc"),
-    "CT": ("X", "CC"),
-    "oa": ("oi",),
-    "OA": ("OI",),
-    "oi": ("oa", "oie"),
-    "OI": ("OA", "OIE"),
-    "ph": ("f",),
-    "PH": ("F",),
-    "qu": ("q", "cq", "ck", "c", "k"),
-    "QU": ("Q", "CQ", "CK", "C", "K"),
-    "ss": ("c", "ç"),
-    "SS": ("C", "Ç"),
-    "un": ("ein",),
-    "UN": ("EIN",),
-}
-
-
-# End of word
-
-dFinal1 = {
-    "a": ("as", "at", "ant", "ah"),
-    "A": ("AS", "AT", "ANT", "AH"),
-    "c": ("ch",),
-    "C": ("CH",),
-    "e": ("et", "er", "ets", "ée", "ez", "ai", "ais", "ait", "ent", "eh"),
-    "E": ("ET", "ER", "ETS", "ÉE", "EZ", "AI", "AIS", "AIT", "ENT", "EH"),
-    "é": ("et", "er", "ets", "ée", "ez", "ai", "ais", "ait"),
-    "É": ("ET", "ER", "ETS", "ÉE", "EZ", "AI", "AIS", "AIT"),
-    "è": ("et", "er", "ets", "ée", "ez", "ai", "ais", "ait"),
-    "È": ("ET", "ER", "ETS", "ÉE", "EZ", "AI", "AIS", "AIT"),
-    "ê": ("et", "er", "ets", "ée", "ez", "ai", "ais", "ait"),
-    "Ê": ("ET", "ER", "ETS", "ÉE", "EZ", "AI", "AIS", "AIT"),
-    "ë": ("et", "er", "ets", "ée", "ez", "ai", "ais", "ait"),
-    "Ë": ("ET", "ER", "ETS", "ÉE", "EZ", "AI", "AIS", "AIT"),
-    "g": ("gh",),
-    "G": ("GH",),
-    "i": ("is", "it", "ie", "in"),
-    "I": ("IS", "IT", "IE", "IN"),
-    "n": ("nt", "nd", "ns", "nh"),
-    "N": ("NT", "ND", "NS", "NH"),
-    "o": ("aut", "ot", "os"),
-    "O": ("AUT", "OT", "OS"),
-    "ô": ("aut", "ot", "os"),
-    "Ô": ("AUT", "OT", "OS"),
-    "ö": ("aut", "ot", "os"),
-    "Ö": ("AUT", "OT", "OS"),
-    "p": ("ph",),
-    "P": ("PH",),
-    "s": ("sh",),
-    "S": ("SH",),
-    "t": ("th",),
-    "T": ("TH",),
-    "u": ("ut", "us", "uh"),
-    "U": ("UT", "US", "UH"),
-}
-
-dFinal2 = {
-    "ai": ("aient", "ais", "et"),
-    "AI": ("AIENT", "AIS", "ET"),
-    "an": ("ant", "ent"),
-    "AN": ("ANT", "ENT"),
-    "en": ("ent", "ant"),
-    "EN": ("ENT", "ANT"),
-    "ei": ("ait", "ais"),
-    "EI": ("AIT", "AIS"),
-    "on": ("ons", "ont"),
-    "ON": ("ONS", "ONT"),
-    "oi": ("ois", "oit", "oix"),
-    "OI": ("OIS", "OIT", "OIX"),
-}
-
-
-# Préfixes et suffixes
-
-aPfx1 = frozenset([
-    "anti", "archi", "contre", "hyper", "mé", "méta", "im", "in", "ir", "par", "proto",
-    "pseudo", "pré", "re", "ré", "sans", "sous", "supra", "sur", "ultra"
-])
-aPfx2 = frozenset([
-    "belgo", "franco", "génito", "gynéco", "médico", "russo"
-])
-
-
-_zMotAvecPronom = re.compile("^(?i)(\\w+)(-(?:t-|)(?:ils?|elles?|on|je|tu|nous|vous))$")
-
-def cut (sWord):
-    "returns a tuple of strings (prefix, trimed_word, suffix)"
-    m = _zMotAvecPronom.search(sWord)
-    if m:
-        return ("", m.group(1), m.group(2))
-    return ("", sWord, "")
-
-
-# Other functions
-
-def filterSugg (aSugg):
-    "exclude suggestions"
-    return filter(lambda sSugg: not sSugg.endswith(("è", "È")), aSugg)

DELETED gc_core/py/dawg.py
Index: gc_core/py/dawg.py
==================================================================
--- gc_core/py/dawg.py
+++ /dev/null
@@ -1,775 +0,0 @@
-#!python3
-
-# FSA DICTIONARY BUILDER
-#
-# by Olivier R.
-# License: MPL 2
-#
-# This tool encodes lexicon into an indexable binary dictionary 
-# Input files MUST be encoded in UTF-8.
-
-
-import sys
-import os
-import collections
-
-from . import str_transform as st
-from .progressbar import ProgressBar
-
-
-
-def readFile (spf):
-    print(" < Read lexicon: " + spf)
-    if os.path.isfile(spf):
-        with open(spf, "r", encoding="utf-8") as hSrc:
-            for sLine in hSrc:
-                sLine = sLine.strip()
-                if sLine and not sLine.startswith("#"):
-                    yield sLine
-    else:
-        raise OSError("# Error. File not found or not loadable: " + spf)
-
-
-def getElemsFromFile (spf):
-    "returns tuple of (flexion, stem, tags) from lexicon file"
-    nErr = 0
-    if not spf.endswith(".clex"):
-        for sLine in readFile(spf):
-            try:
-                sFlex, sStem, sTag = sLine.split("\t")
-                yield (sFlex, sStem, sTag)
-            except:
-                nErr += 1
-    else:
-        sTag = "_" # neutral tag
-        sTag2 = ""
-        for sLine in readFile(spf):
-            if sLine.startswith("[") and sLine.endswith("]"):
-                # tag line
-                if "-->" in sLine:
-                    try:
-                        sTag, sSfxCode, sTag2 = sLine[1:-1].split(" --> ")
-                    except:
-                        nErr += 1
-                        continue
-                    sTag = sTag.strip()
-                    sSfxCode = sSfxCode.strip()
-                    sTag2 = sTag2.strip()
-                else:
-                    sTag = sLine[1:-1]
-                    sTag2 = ""
-            else:
-                # entry line
-                if "\t" in sLine:
-                    if sLine.count("\t") > 1:
-                        nErr += 1
-                        continue
-                    sFlex, sStem = sLine.split("\t")
-                else:
-                    sFlex = sStem = sLine
-                #print(sFlex, sStem, sTag)
-                yield (sFlex, sStem, sTag)
-                if sTag2:
-                    sFlex2 = st.changeWordWithSuffixCode(sFlex, sSfxCode)
-                    #print(sFlex2, sStem, sTag2)
-                    yield (sFlex2, sStem, sTag2)
-    if nErr:
-        print(" # Lines ignored: {:>10}".format(nErr))
-
-
-
-class DAWG:
-    """DIRECT ACYCLIC WORD GRAPH"""
-    # This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)
-    # We store suffix/affix codes and tags within the graph after the “real” word.
-    # A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags]
-    # Each arc is an index in self.lArcVal, where are stored characters, suffix/affix codes for stemming and tags.
-    # Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final.
-
-    def __init__ (self, spfSrc, sLangName, cStemming):
-        print("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton =====")
-        cStemming = cStemming.upper()
-        if cStemming == "A":
-            funcStemmingGen = st.defineAffixCode
-        elif cStemming == "S":
-            funcStemmingGen = st.defineSuffixCode
-        elif cStemming == "N":
-            funcStemmingGen = st.noStemming
-        else:
-            raise ValueError("# Error. Unknown stemming code: {}".format(cStemming))
-
-        lEntry = []
-        lChar = ['']; dChar = {}; nChar = 1; dCharOccur = {}
-        lAff  = [];   dAff  = {}; nAff  = 0; dAffOccur = {}
-        lTag  = [];   dTag  = {}; nTag  = 0; dTagOccur = {}
-        nErr = 0
-        
-        # read lexicon
-        for sFlex, sStem, sTag in getElemsFromFile(spfSrc):
-            addWordToCharDict(sFlex)
-            # chars
-            for c in sFlex:
-                if c not in dChar:
-                    dChar[c] = nChar
-                    lChar.append(c)
-                    nChar += 1
-                dCharOccur[c] = dCharOccur.get(c, 0) + 1
-            # affixes to find stem from flexion
-            aff = funcStemmingGen(sFlex, sStem)
-            if aff not in dAff:
-                dAff[aff] = nAff
-                lAff.append(aff)
-                nAff += 1
-            dAffOccur[aff] = dCharOccur.get(aff, 0) + 1
-            # tags
-            if sTag not in dTag:
-                dTag[sTag] = nTag
-                lTag.append(sTag)
-                nTag += 1
-            dTagOccur[sTag] = dTagOccur.get(sTag, 0) + 1
-            lEntry.append((sFlex, dAff[aff], dTag[sTag]))
-        if not lEntry:
-            raise ValueError("# Error. Empty lexicon")
-        
-        # Preparing DAWG
-        print(" > Preparing list of words")
-        lVal = lChar + lAff + lTag
-        lWord = [ [dChar[c] for c in sFlex] + [iAff+nChar] + [iTag+nChar+nAff]  for sFlex, iAff, iTag in lEntry ]
-        lEntry = None
-        
-        # Dictionary of arc values occurrency, to sort arcs of each node
-        dValOccur = dict( [ (dChar[c], dCharOccur[c])  for c in dChar ] \
-                        + [ (dAff[aff]+nChar, dAffOccur[aff]) for aff in dAff ] \
-                        + [ (dTag[tag]+nChar+nAff, dTagOccur[tag]) for tag in dTag ] )
-        #with open(spfSrc[:-8]+".valuesfreq.txt", 'w', encoding='utf-8') as hFreqDst:  # DEBUG
-        #    for iKey, nOcc in sorted(dValOccur.items(), key=lambda t: t[1], reverse=True):
-        #        hFreqDst.write("{}: {}\n".format(lVal[iKey], nOcc))
-        #    hFreqDst.close()
-        
-        self.sFile = spfSrc
-        self.sLang = sLangName
-        self.nEntry = len(lWord)
-        self.aPreviousEntry = []
-        DawgNode.resetNextId()
-        self.oRoot = DawgNode()
-        self.lUncheckedNodes = []  # list of nodes that have not been checked for duplication.
-        self.lMinimizedNodes = {}  # list of unique nodes that have been checked for duplication.
-        self.lSortedNodes = []     # version 2 and 3
-        self.nNode = 0
-        self.nArc = 0
-        self.dChar = dChar
-        self.nChar = len(dChar)
-        self.nAff = nAff
-        self.lArcVal = lVal
-        self.nArcVal = len(lVal)
-        self.nTag = self.nArcVal - self.nChar - nAff
-        self.cStemming = cStemming
-        if cStemming == "A":
-            self.funcStemming = st.changeWordWithAffixCode
-        elif cStemming == "S":    
-            self.funcStemming = st.changeWordWithSuffixCode
-        else:
-            self.funcStemming = st.noStemming
-        
-        # build
-        lWord.sort()
-        oProgBar = ProgressBar(0, len(lWord))
-        for aEntry in lWord:
-            self.insert(aEntry)
-            oProgBar.increment(1)
-        oProgBar.done()
-        self.finish()
-        self.countNodes()
-        self.countArcs()
-        self.sortNodes()
-        self.sortNodeArcs(dValOccur)
-        #self.sortNodeArcs2 (self.oRoot, "")
-        self.displayInfo()
-
-    # BUILD DAWG
-    def insert (self, aEntry):
-        if aEntry < self.aPreviousEntry:
-            sys.exit("# Error: Words must be inserted in alphabetical order.")
-        
-        # find common prefix between word and previous word
-        nCommonPrefix = 0
-        for i in range(min(len(aEntry), len(self.aPreviousEntry))):
-            if aEntry[i] != self.aPreviousEntry[i]:
-                break
-            nCommonPrefix += 1
-
-        # Check the lUncheckedNodes for redundant nodes, proceeding from last
-        # one down to the common prefix size. Then truncate the list at that point.
-        self._minimize(nCommonPrefix)
-
-        # add the suffix, starting from the correct node mid-way through the graph
-        if len(self.lUncheckedNodes) == 0:
-            oNode = self.oRoot
-        else:
-            oNode = self.lUncheckedNodes[-1][2]
-
-        iChar = nCommonPrefix
-        for c in aEntry[nCommonPrefix:]:
-            oNextNode = DawgNode()
-            oNode.arcs[c] = oNextNode
-            self.lUncheckedNodes.append((oNode, c, oNextNode))
-            if iChar == (len(aEntry) - 2): 
-                oNode.final = True
-            iChar += 1
-            oNode = oNextNode
-        oNode.final = True
-        self.aPreviousEntry = aEntry
-
-    def finish (self):
-        "minimize unchecked nodes"
-        self._minimize(0)
-
-    def _minimize (self, downTo):
-        # proceed from the leaf up to a certain point
-        for i in range( len(self.lUncheckedNodes)-1, downTo-1, -1 ):
-            oNode, char, oChildNode = self.lUncheckedNodes[i]
-            if oChildNode in self.lMinimizedNodes:
-                # replace the child with the previously encountered one
-                oNode.arcs[char] = self.lMinimizedNodes[oChildNode]
-            else:
-                # add the state to the minimized nodes.
-                self.lMinimizedNodes[oChildNode] = oChildNode
-            self.lUncheckedNodes.pop()
-
-    def countNodes (self):
-        self.nNode = len(self.lMinimizedNodes)
-
-    def countArcs (self):
-        self.nArc = 0
-        for oNode in self.lMinimizedNodes:
-            self.nArc += len(oNode.arcs)
-    
-    def sortNodeArcs (self, dValOccur):
-        print(" > Sort node arcs")
-        self.oRoot.sortArcs(dValOccur)
-        for oNode in self.lMinimizedNodes:
-            oNode.sortArcs(dValOccur)
-    
-    def sortNodeArcs2 (self, oNode, cPrevious=""):
-        # recursive function
-        dCharOccur = getCharOrderAfterChar(cPrevious)
-        if dCharOccur:
-            oNode.sortArcs2(dCharOccur, self.lArcVal)
-        for nArcVal, oNextNode in oNode.arcs.items():
-            self.sortNodeArcs2(oNextNode, self.lArcVal[nArcVal])
-
-    def sortNodes (self):
-        print(" > Sort nodes")
-        for oNode in self.oRoot.arcs.values():
-            self._parseNodes(oNode)
-    
-    def _parseNodes (self, oNode):
-        # Warning: recursive method
-        if oNode.pos > 0:
-            return
-        oNode.setPos()
-        self.lSortedNodes.append(oNode)
-        for oNextNode in oNode.arcs.values():
-             self._parseNodes(oNextNode)
-        
-    def lookup (self, sWord):
-        oNode = self.oRoot
-        for c in sWord:
-            if self.dChar.get(c, '') not in oNode.arcs:
-                return False
-            oNode = oNode.arcs[self.dChar[c]]
-        return oNode.final
-
-    def morph (self, sWord):
-        oNode = self.oRoot
-        for c in sWord:
-            if self.dChar.get(c, '') not in oNode.arcs:
-                return ''
-            oNode = oNode.arcs[self.dChar[c]]
-        if oNode.final:
-            s = "* "
-            for arc in oNode.arcs:
-                if arc >= self.nChar:
-                    s += " [" + self.funcStemming(sWord, self.lArcVal[arc])
-                    oNode2 = oNode.arcs[arc]
-                    for arc2 in oNode2.arcs:
-                        s += " / " + self.lArcVal[arc2]
-                    s += "]"
-            return s
-        return ''
-
-    def displayInfo (self):
-        print(" * {:<12} {:>16,}".format("Entries:", self.nEntry))
-        print(" * {:<12} {:>16,}".format("Characters:", self.nChar))
-        print(" * {:<12} {:>16,}".format("Affixes:", self.nAff))
-        print(" * {:<12} {:>16,}".format("Tags:", self.nTag))
-        print(" * {:<12} {:>16,}".format("Arc values:", self.nArcVal))
-        print(" * {:<12} {:>16,}".format("Nodes:", self.nNode))
-        print(" * {:<12} {:>16,}".format("Arcs:", self.nArc))
-        print(" * {:<12} {:>16}".format("Stemming:", self.cStemming + "FX"))
-
-    def getArcStats (self):
-        d = {}
-        for oNode in self.lMinimizedNodes:
-            n = len(oNode.arcs)
-            d[n] = d.get(n, 0) + 1
-        s = " * Nodes:\n"
-        for n in d:
-            s = s + " {:>9} nodes have {:>3} arcs\n".format(d[n], n)
-        return s
-
-    def writeInfo (self, sPathFile):
-        print(" > Write informations")
-        with open(sPathFile, 'w', encoding='utf-8', newline="\n") as hDst:
-            hDst.write(self.getArcStats())
-            hDst.write("\n * Values:\n")
-            for i, s in enumerate(self.lArcVal):
-                hDst.write(" {:>6}. {}\n".format(i, s))
-            hDst.close()
-
-    # BINARY CONVERSION
-    def createBinary (self, sPathFile, nMethod, bDebug=False):
-        print(" > Write DAWG as an indexable binary dictionary [method: %d]" % nMethod)
-        if nMethod == 1:
-            self.nBytesArc = ( (self.nArcVal.bit_length() + 2) // 8 ) + 1   # We add 2 bits. See DawgNode.convToBytes1()
-            self._calcNumBytesNodeAddress()
-            self._calcNodesAddress1()
-        elif nMethod == 2:
-            self.nBytesArc = ( (self.nArcVal.bit_length() + 3) // 8 ) + 1   # We add 3 bits. See DawgNode.convToBytes2()
-            self._calcNumBytesNodeAddress()
-            self._calcNodesAddress2()
-        elif nMethod == 3:
-            self.nBytesArc = ( (self.nArcVal.bit_length() + 3) // 8 ) + 1   # We add 3 bits. See DawgNode.convToBytes3()
-            self.nBytesOffset = 1
-            self.nMaxOffset = (2 ** (self.nBytesOffset * 8)) - 1
-            self._calcNumBytesNodeAddress()
-            self._calcNodesAddress3()
-        else:
-            print(" # Error: unknown compression method")
-        print("   Arc values (chars, affixes and tags): {}  ->  {} bytes".format( self.nArcVal, len("\t".join(self.lArcVal).encode("utf-8")) ))
-        print("   Arc size: {} bytes, Address size: {} bytes   ->   {} * {} = {} bytes".format( self.nBytesArc, self.nBytesNodeAddress, \
-                                                                                                self.nBytesArc+self.nBytesNodeAddress, self.nArc, \
-                                                                                                (self.nBytesArc+self.nBytesNodeAddress)*self.nArc ))
-        self._writeBinary(sPathFile, nMethod)
-        if bDebug:
-            self._writeNodes(sPathFile, nMethod)
-
-    def _calcNumBytesNodeAddress (self):
-        "how many bytes needed to store all nodes/arcs in the binary dictionary"
-        self.nBytesNodeAddress = 1
-        while ((self.nBytesArc + self.nBytesNodeAddress) * self.nArc) > (2 ** (self.nBytesNodeAddress * 8)):
-            self.nBytesNodeAddress += 1
-
-    def _calcNodesAddress1 (self):
-        nBytesNode = self.nBytesArc + self.nBytesNodeAddress
-        iAddr = len(self.oRoot.arcs) * nBytesNode
-        for oNode in self.lMinimizedNodes:
-            oNode.addr = iAddr
-            iAddr += max(len(oNode.arcs), 1) * nBytesNode
-
-    def _calcNodesAddress2 (self):
-        nBytesNode = self.nBytesArc + self.nBytesNodeAddress
-        iAddr = len(self.oRoot.arcs) * nBytesNode
-        for oNode in self.lSortedNodes:
-            oNode.addr = iAddr
-            iAddr += max(len(oNode.arcs), 1) * nBytesNode
-            for oNextNode in oNode.arcs.values():
-                if (oNode.pos + 1) == oNextNode.pos:
-                    iAddr -= self.nBytesNodeAddress
-                    #break
-
-    def _calcNodesAddress3 (self):
-        nBytesNode = self.nBytesArc + self.nBytesNodeAddress
-        # theorical nodes size if only addresses and no offset
-        self.oRoot.size = len(self.oRoot.arcs) * nBytesNode
-        for oNode in self.lSortedNodes:
-            oNode.size = max(len(oNode.arcs), 1) * nBytesNode
-        # rewind and calculate dropdown from the end, several times
-        nDiff = self.nBytesNodeAddress - self.nBytesOffset
-        bEnd = False
-        while not bEnd:
-            bEnd = True
-            # recalculate addresses
-            iAddr = self.oRoot.size
-            for oNode in self.lSortedNodes:
-                oNode.addr = iAddr
-                iAddr += oNode.size
-            # rewind and calculate dropdown from the end, several times
-            for i in range(self.nNode-1, -1, -1):
-                nSize = max(len(self.lSortedNodes[i].arcs), 1) * nBytesNode
-                for oNextNode in self.lSortedNodes[i].arcs.values():
-                    if 1 < (oNextNode.addr - self.lSortedNodes[i].addr) < self.nMaxOffset:
-                        nSize -= nDiff
-                if self.lSortedNodes[i].size != nSize:
-                    self.lSortedNodes[i].size = nSize
-                    bEnd = False
-
-    def _writeBinary (self, sPathFile, nMethod):
-        """
-        Format of the binary indexable dictionary:
-        Each section is separated with 4 bytes of \0
-        
-        - Section Header:
-            /pyfsa/[version]
-                * version is an ASCII string
-        
-        - Section Informations:
-            /[tag_lang]
-            /[number of chars]
-            /[number of bytes for each arc]
-            /[number of bytes for each address node]
-            /[number of entries]
-            /[number of nodes]
-            /[number of arcs]
-            /[number of affixes]
-                * each field is a ASCII string
-            /[stemming code]
-                * "S" means stems are generated by /suffix_code/, "A" means they are generated by /affix_code/
-                  See defineSuffixCode() and defineAffixCode() for details.
-                  "N" means no stemming
-        
-        - Section Values:
-                * a list of strings encoded in binary from utf-8, each value separated with a tabulation
-        
-        - Section Word Graph (nodes / arcs)
-                * A list of nodes which are a list of arcs with an address of the next node.
-                  See DawgNode.convToBytes() for details.
-        """
-        if not sPathFile.endswith(".bdic"):
-            sPathFile += "."+str(nMethod)+".bdic"
-        with open(sPathFile, 'wb') as hDst:
-            # header
-            hDst.write("/pyfsa/{}/".format(nMethod).encode("utf-8"))
-            hDst.write(b"\0\0\0\0")
-            # infos
-            hDst.write("{}/{}/{}/{}/{}/{}/{}/{}/{}".format(self.sLang, self.nChar, self.nBytesArc, self.nBytesNodeAddress, \
-                                                           self.nEntry, self.nNode, self.nArc, self.nAff, self.cStemming).encode("utf-8"))
-            hDst.write(b"\0\0\0\0")
-            # lArcVal
-            hDst.write("\t".join(self.lArcVal).encode("utf-8"))
-            hDst.write(b"\0\0\0\0")
-            # DAWG: nodes / arcs
-            if nMethod == 1:
-                hDst.write(self.oRoot.convToBytes1(self.nBytesArc, self.nBytesNodeAddress))
-                for oNode in self.lMinimizedNodes:
-                    hDst.write(oNode.convToBytes1(self.nBytesArc, self.nBytesNodeAddress))
-            elif nMethod == 2:
-                hDst.write(self.oRoot.convToBytes2(self.nBytesArc, self.nBytesNodeAddress))
-                for oNode in self.lSortedNodes:
-                    hDst.write(oNode.convToBytes2(self.nBytesArc, self.nBytesNodeAddress))
-            elif nMethod == 3:
-                hDst.write(self.oRoot.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset))
-                for oNode in self.lSortedNodes:
-                    hDst.write(oNode.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset))
-            hDst.close()
-
-    def _writeNodes (self, sPathFile, nMethod):
-        "for debugging only"
-        print(" > Write nodes")
-        with open(sPathFile+".nodes."+str(nMethod)+".txt", 'w', encoding='utf-8', newline="\n") as hDst:
-            if nMethod == 1:
-                hDst.write(self.oRoot.getTxtRepr1(self.nBytesArc, self.nBytesNodeAddress, self.lArcVal)+"\n")
-                #hDst.write( ''.join( [ "%02X " %  z  for z in self.oRoot.convToBytes1(self.nBytesArc, self.nBytesNodeAddress) ] ).strip() )
-                for oNode in self.lMinimizedNodes:
-                    hDst.write(oNode.getTxtRepr1(self.nBytesArc, self.nBytesNodeAddress, self.lArcVal)+"\n")
-            if nMethod == 2:
-                hDst.write(self.oRoot.getTxtRepr2(self.nBytesArc, self.nBytesNodeAddress, self.lArcVal)+"\n")
-                for oNode in self.lSortedNodes:
-                    hDst.write(oNode.getTxtRepr2(self.nBytesArc, self.nBytesNodeAddress, self.lArcVal)+"\n")
-            if nMethod == 3:
-                hDst.write(self.oRoot.getTxtRepr3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset, self.lArcVal)+"\n")
-                #hDst.write( ''.join( [ "%02X " %  z  for z in self.oRoot.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset) ] ).strip() )
-                for oNode in self.lSortedNodes:
-                    hDst.write(oNode.getTxtRepr3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset, self.lArcVal)+"\n")
-            hDst.close()
-    
-    def writeResults (self, sPathFile):
-        bFileExits = os.path.isfile("_lexicons.res.txt")
-        with open("_lexicons.res.txt", "a", encoding='utf-8', newline="\n") as hDst:
-            sFormat1 = "{:<12} {:>12} {:>5} {:>8} {:>8} {:>6} {:>8} {:>9} {:>9} {:>15} {:>12} {:>12}\n"
-            sFormat2 = "{:<12} {:>12,} {:>5,} {:>8,} {:>8} {:>6,} {:>8,} {:>9,} {:>9,} {:>15,} {:>12,} {:>12,}\n"
-            if not bFileExits:
-                hDst.write(sFormat1.format("Lexicon", "Entries", "Chars", "Affixes", "Stemming", "Tags", "Values", "Nodes", "Arcs", "Lexicon (Kb)", "Dict (Kb)", "LT Dict (Kb)"))
-            hDst.write(sFormat2.format(self.sLang, self.nEntry, self.nChar, self.nAff, self.cStemming + "FX", self.nTag, self.nArcVal, \
-                                       self.nNode, self.nArc, os.path.getsize(self.sFile), os.path.getsize(sPathFile), \
-                                       os.path.getsize("cfsa/dict/{}.dict".format(self.sLang)) if os.path.isfile("cfsa/dict/{}.dict".format(self.sLang)) else 0))
-            hDst.close()
-
-
-
-class DawgNode:
-    NextId = 0
-    NextPos = 1 # (version 2)
-    
-    def __init__ (self):
-        self.i = DawgNode.NextId
-        DawgNode.NextId += 1
-        self.final = False
-        self.arcs = {}          # key: arc value; value: a node
-        self.addr = 0           # address in the binary dictionary
-        self.pos = 0            # position in the binary dictionary (version 2)
-        self.size = 0           # size of node in bytes (version 3)
-
-    @classmethod
-    def resetNextId (cls):
-        cls.NextId = 0
-
-    def setPos (self): # version 2
-        self.pos = DawgNode.NextPos
-        DawgNode.NextPos += 1
-
-    def __str__ (self):
-        # Caution! this function is used for hashing and comparison!
-        l = []
-        if self.final: 
-            l.append("1")
-        else:
-            l.append("0")
-        for (key, node) in self.arcs.items():
-            l.append(str(key))
-            l.append(str(node.i))
-        return "_".join(l)
-
-    def __hash__ (self):
-        # Used as a key in a python dictionary.
-        return self.__str__().__hash__()
-
-    def __eq__ (self, other):
-        # Used as a key in a python dictionary.
-        # Nodes are equivalent if they have identical arcs, and each identical arc leads to identical states.
-        return self.__str__() == other.__str__()
-
-    def sortArcs (self, dValOccur):
-        self.arcs = collections.OrderedDict(sorted(self.arcs.items(), key=lambda t: dValOccur.get(t[0], 0), reverse=True))
-
-    def sortArcs2 (self, dValOccur, lArcVal):
-        self.arcs = collections.OrderedDict(sorted(self.arcs.items(), key=lambda t: dValOccur.get(lArcVal[t[0]], 0), reverse=True))
-
-    # VERSION 1 =====================================================================================================
-    def convToBytes1 (self, nBytesArc, nBytesNodeAddress):
-        """
-        Node scheme:
-        - Arc length is defined by nBytesArc
-        - Address length is defined by nBytesNodeAddress
-                                       
-        |                Arc                |                         Address of next node                          |
-        |                                   |                                                                       |
-         /---------------\ /---------------\ /---------------\ /---------------\ /---------------\ /---------------\
-         | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
-         \---------------/ \---------------/ \---------------/ \---------------/ \---------------/ \---------------/
-         [...]
-         /---------------\ /---------------\ /---------------\ /---------------\ /---------------\ /---------------\
-         | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
-         \---------------/ \---------------/ \---------------/ \---------------/ \---------------/ \---------------/
-          ^ ^
-          | |
-          | |
-          |  \___ if 1, last arc of this node
-           \_____ if 1, this node is final (only on the first arc)
-        """
-        nArc = len(self.arcs)
-        nFinalNodeMask = 1 << ((nBytesArc*8)-1)
-        nFinalArcMask = 1 << ((nBytesArc*8)-2)
-        if len(self.arcs) == 0:
-            val = nFinalNodeMask | nFinalArcMask
-            by = val.to_bytes(nBytesArc, byteorder='big')
-            by += (0).to_bytes(nBytesNodeAddress, byteorder='big')
-            return by
-        by = b""
-        for i, arc in enumerate(self.arcs, 1):
-            val = arc
-            if i == 1 and self.final:
-                val = val | nFinalNodeMask
-            if i == nArc:
-                val = val | nFinalArcMask
-            by += val.to_bytes(nBytesArc, byteorder='big')
-            by += self.arcs[arc].addr.to_bytes(nBytesNodeAddress, byteorder='big')
-        return by
-        
-    def getTxtRepr1 (self, nBytesArc, nBytesNodeAddress, lVal):
-        nArc = len(self.arcs)
-        nFinalNodeMask = 1 << ((nBytesArc*8)-1)
-        nFinalArcMask = 1 << ((nBytesArc*8)-2)
-        s = "i{:_>10} -- #{:_>10}\n".format(self.i, self.addr)
-        if len(self.arcs) == 0:
-            s += "  {:<20}  {:0>16}  i{:_>10}   #{:_>10}\n".format("", bin(nFinalNodeMask | nFinalArcMask)[2:], "0", "0")
-            return s
-        for i, arc in enumerate(self.arcs, 1):
-            val = arc
-            if i == 1 and self.final:
-                val = val | nFinalNodeMask
-            if i == nArc:
-                val = val | nFinalArcMask
-            s += "  {:<20}  {:0>16}  i{:_>10}   #{:_>10}\n".format(lVal[arc], bin(val)[2:], self.arcs[arc].i, self.arcs[arc].addr)
-        return s
-
-    # VERSION 2 =====================================================================================================
-    def convToBytes2 (self, nBytesArc, nBytesNodeAddress):
-        """
-        Node scheme:
-        - Arc length is defined by nBytesArc
-        - Address length is defined by nBytesNodeAddress
-                                       
-        |                Arc                |                         Address of next node                          |
-        |                                   |                                                                       |
-         /---------------\ /---------------\ /---------------\ /---------------\ /---------------\ /---------------\
-         | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
-         \---------------/ \---------------/ \---------------/ \---------------/ \---------------/ \---------------/
-         [...]
-         /---------------\ /---------------\ /---------------\ /---------------\ /---------------\ /---------------\
-         | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
-         \---------------/ \---------------/ \---------------/ \---------------/ \---------------/ \---------------/
-          ^ ^ ^
-          | | |
-          | |  \_ if 1, caution, no address: next node is the following node
-          |  \___ if 1, last arc of this node
-           \_____ if 1, this node is final (only on the first arc)
-        """
-        nArc = len(self.arcs)
-        nFinalNodeMask = 1 << ((nBytesArc*8)-1)
-        nFinalArcMask = 1 << ((nBytesArc*8)-2)
-        nNextNodeMask = 1 << ((nBytesArc*8)-3)
-        if len(self.arcs) == 0:
-            val = nFinalNodeMask | nFinalArcMask
-            by = val.to_bytes(nBytesArc, byteorder='big')
-            by += (0).to_bytes(nBytesNodeAddress, byteorder='big')
-            return by
-        by = b""
-        for i, arc in enumerate(self.arcs, 1):
-            val = arc
-            if i == 1 and self.final:
-                val = val | nFinalNodeMask
-            if i == nArc:
-                val = val | nFinalArcMask
-            if (self.pos + 1) == self.arcs[arc].pos and self.i != 0:
-                val = val | nNextNodeMask
-                by += val.to_bytes(nBytesArc, byteorder='big')
-            else:
-                by += val.to_bytes(nBytesArc, byteorder='big')
-                by += self.arcs[arc].addr.to_bytes(nBytesNodeAddress, byteorder='big')
-        return by
-        
-    def getTxtRepr2 (self, nBytesArc, nBytesNodeAddress, lVal):
-        nArc = len(self.arcs)
-        nFinalNodeMask = 1 << ((nBytesArc*8)-1)
-        nFinalArcMask = 1 << ((nBytesArc*8)-2)
-        nNextNodeMask = 1 << ((nBytesArc*8)-3)
-        s = "i{:_>10} -- #{:_>10}\n".format(self.i, self.addr)
-        if nArc == 0:
-            s += "  {:<20}  {:0>16}  i{:_>10}   #{:_>10}\n".format("", bin(nFinalNodeMask | nFinalArcMask)[2:], "0", "0")
-            return s
-        for i, arc in enumerate(self.arcs, 1):
-            val = arc
-            if i == 1 and self.final:
-                val = val | nFinalNodeMask
-            if i == nArc:
-                val = val | nFinalArcMask
-            if (self.pos + 1) == self.arcs[arc].pos  and self.i != 0:
-                val = val | nNextNodeMask
-                s += "  {:<20}  {:0>16}\n".format(lVal[arc], bin(val)[2:], "")
-            else:
-                s += "  {:<20}  {:0>16}  i{:_>10}   #{:_>10}\n".format(lVal[arc], bin(val)[2:], self.arcs[arc].i, self.arcs[arc].addr)
-        return s
-
-    # VERSION 3 =====================================================================================================
-    def convToBytes3 (self, nBytesArc, nBytesNodeAddress, nBytesOffset):
-        """
-        Node scheme:
-        - Arc length is defined by nBytesArc
-        - Address length is defined by nBytesNodeAddress
-        - Offset length is defined by nBytesOffset
-                                       
-        |                Arc                |            Address of next node  or  offset to next node              |
-        |                                   |                                                                       |
-         /---------------\ /---------------\ /---------------\ /---------------\ /---------------\ /---------------\
-         |1|0|0| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
-         \---------------/ \---------------/ \---------------/ \---------------/ \---------------/ \---------------/
-         [...]
-         /---------------\ /---------------\ /---------------\
-         |0|0|1| | | | | | | | | | | | | | | | | | | | | | | |     Offsets are shorter than addresses
-         \---------------/ \---------------/ \---------------/ 
-         /---------------\ /---------------\ /---------------\ /---------------\ /---------------\ /---------------\
-         |0|1|0| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
-         \---------------/ \---------------/ \---------------/ \---------------/ \---------------/ \---------------/
-
-          ^ ^ ^
-          | | |
-          | |  \_ if 1, offset instead of address of next node
-          |  \___ if 1, last arc of this node
-           \_____ if 1, this node is final (only on the first arc)
-        """
-        nArc = len(self.arcs)
-        nFinalNodeMask = 1 << ((nBytesArc*8)-1)
-        nFinalArcMask = 1 << ((nBytesArc*8)-2)
-        nNextNodeMask = 1 << ((nBytesArc*8)-3)
-        nMaxOffset = (2 ** (nBytesOffset * 8)) - 1
-        if nArc == 0:
-            val = nFinalNodeMask | nFinalArcMask
-            by = val.to_bytes(nBytesArc, byteorder='big')
-            by += (0).to_bytes(nBytesNodeAddress, byteorder='big')
-            return by
-        by = b""
-        for i, arc in enumerate(self.arcs, 1):
-            val = arc
-            if i == 1 and self.final:
-                val = val | nFinalNodeMask
-            if i == nArc:
-                val = val | nFinalArcMask
-            if 1 < (self.arcs[arc].addr - self.addr) < nMaxOffset and self.i != 0:
-                val = val | nNextNodeMask
-                by += val.to_bytes(nBytesArc, byteorder='big')
-                by += (self.arcs[arc].addr-self.addr).to_bytes(nBytesOffset, byteorder='big')
-            else:
-                by += val.to_bytes(nBytesArc, byteorder='big')
-                by += self.arcs[arc].addr.to_bytes(nBytesNodeAddress, byteorder='big')
-        return by
-        
-    def getTxtRepr3 (self, nBytesArc, nBytesNodeAddress, nBytesOffset, lVal):
-        nArc = len(self.arcs)
-        nFinalNodeMask = 1 << ((nBytesArc*8)-1)
-        nFinalArcMask = 1 << ((nBytesArc*8)-2)
-        nNextNodeMask = 1 << ((nBytesArc*8)-3)
-        nMaxOffset = (2 ** (nBytesOffset * 8)) - 1
-        s = "i{:_>10} -- #{:_>10}  ({})\n".format(self.i, self.addr, self.size)
-        if nArc == 0:
-            s += "  {:<20}  {:0>16}  i{:_>10}   #{:_>10}\n".format("", bin(nFinalNodeMask | nFinalArcMask)[2:], "0", "0")
-            return s
-        for i, arc in enumerate(self.arcs, 1):
-            val = arc
-            if i == 1 and self.final:
-                val = val | nFinalNodeMask
-            if i == nArc:
-                val = val | nFinalArcMask
-            if 1 < (self.arcs[arc].addr - self.addr) < nMaxOffset and self.i != 0:
-                val = val | nNextNodeMask
-                s += "  {:<20}  {:0>16}  i{:_>10}   +{:_>10}\n".format(lVal[arc], bin(val)[2:], self.arcs[arc].i, self.arcs[arc].addr - self.addr)
-            else:
-                s += "  {:<20}  {:0>16}  i{:_>10}   #{:_>10}\n".format(lVal[arc], bin(val)[2:], self.arcs[arc].i, self.arcs[arc].addr)
-        return s
-
-
-
-# Another attempt to sort node arcs
-
-_dCharOrder = {
-    # key: previous char, value: dictionary of chars {c: nValue}
-    "": {}
-}
-
-
-def addWordToCharDict (sWord):
-    cPrevious = ""
-    for cChar in sWord:
-        if cPrevious not in _dCharOrder:
-            _dCharOrder[cPrevious] = {}
-        _dCharOrder[cPrevious][cChar] = _dCharOrder[cPrevious].get(cChar, 0) + 1
-        cPrevious = cChar
-
-
-def getCharOrderAfterChar (cChar):
-    return _dCharOrder.get(cChar, None)
-
-
-def displayCharOrder ():
-    for key, value in _dCharOrder.items():
-        print("[" + key + "]: ", ", ".join([ c+":"+str(n)  for c, n  in  sorted(value.items(), key=lambda t: t[1], reverse=True) ]))

DELETED gc_core/py/echo.py
Index: gc_core/py/echo.py
==================================================================
--- gc_core/py/echo.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#!python3
-
-# The most boring yet indispensable function: print!
-
-
-import sys
-
-
-_CHARMAP = str.maketrans({  'œ': 'ö',  'Œ': 'Ö',  'ʳ': "r",  'ᵉ': "e",  '…': "_",  \
-                            '“': '"',  '”': '"',  '„': '"',  '‘': "'",  '’': "'",  \
-                            'ā': 'â',  'Ā': 'Â',  'ē': 'ê',  'Ē': 'Ê',  'ī': 'î',  'Ī': 'Î',  \
-                            'ō': 'ô',  'Ō': 'Ô',  'ū': 'û',  'Ū': 'Û',  'Ÿ': 'Y',  \
-                            'ś': 's',  'ŝ': 's',  \
-                            '—': '-',  '–': '-'
-                         })
-
-
-def echo (obj, sep=' ', end='\n', file=sys.stdout, flush=False):
-    """ Print for Windows to avoid Python crashes.
-        Encoding depends on Windows locale. No useful standard.
-        Always returns True (useful for debugging)."""
-    if sys.platform != "win32":
-        print(obj, sep=sep, end=end, file=file, flush=flush)
-        return True
-    try:
-        print(str(obj).translate(_CHARMAP), sep=sep, end=end, file=file, flush=flush)
-    except:
-        print(str(obj).encode('ascii', 'replace').decode('ascii', 'replace'), sep=sep, end=end, file=file, flush=flush)
-    return True

DELETED gc_core/py/ibdawg.py
Index: gc_core/py/ibdawg.py
==================================================================
--- gc_core/py/ibdawg.py
+++ /dev/null
@@ -1,720 +0,0 @@
-#!python3
-
-import os
-import traceback
-import pkgutil
-import re
-from functools import wraps
-import time
-
-#import logging
-#logging.basicConfig(filename="suggestions.log", level=logging.DEBUG)
-
-from . import str_transform as st
-from . import char_player as cp
-from .echo import echo
-
-
-def timethis (func):
-    "decorator for the execution time"
-    @wraps(func)
-    def wrapper (*args, **kwargs):
-        fStart = time.time()
-        result = func(*args, **kwargs)
-        fEnd = time.time()
-        print(func.__name__, fEnd - fStart)
-        return result
-    return wrapper
-
-
-class SuggResult:
-    """Structure for storing, classifying and filtering suggestions"""
-
-    def __init__ (self, sWord, nDistLimit=-1):
-        self.sWord = sWord
-        self.sSimplifiedWord = cp.simplifyWord(sWord)
-        self.nDistLimit = nDistLimit  if nDistLimit >= 0  else  (len(sWord) // 3) + 1
-        self.nMinDist = 1000
-        self.aSugg = set()
-        self.dSugg = { 0: [],  1: [],  2: [] }
-
-    def addSugg (self, sSugg, nDeep=0):
-        "add a suggestion"
-        #logging.info((nDeep * "  ") + "__" + sSugg + "__")
-        if sSugg not in self.aSugg:
-            nDist = st.distanceDamerauLevenshtein(self.sSimplifiedWord, cp.simplifyWord(sSugg))
-            if nDist <= self.nDistLimit:
-                if nDist not in self.dSugg:
-                    self.dSugg[nDist] = []
-                self.dSugg[nDist].append(sSugg)
-                self.aSugg.add(sSugg)
-                if nDist < self.nMinDist:
-                    self.nMinDist = nDist
-                self.nDistLimit = min(self.nDistLimit, self.nMinDist+2)
-
-    def getSuggestions (self, nSuggLimit=10, nDistLimit=-1):
-        "return a list of suggestions"
-        lRes = []
-        if self.dSugg[0]:
-            # we sort the better results with the original word
-            self.dSugg[0].sort(key=lambda sSugg: st.distanceDamerauLevenshtein(self.sWord, sSugg))
-        for lSugg in self.dSugg.values():
-            lRes.extend(lSugg)
-            if len(lRes) > nSuggLimit:
-                break
-        lRes = list(cp.filterSugg(lRes))
-        if self.sWord.istitle():
-            lRes = list(map(lambda sSugg: sSugg.title(), lRes))
-        elif self.sWord.isupper():
-            lRes = list(map(lambda sSugg: sSugg.upper(), lRes))
-        return lRes[:nSuggLimit]
-
-    def reset (self):
-        self.aSugg.clear()
-        self.dSugg.clear()
-
-
-class IBDAWG:
-    """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""
-
-    def __init__ (self, sDicName):
-        self.by = pkgutil.get_data(__package__, "_dictionaries/" + sDicName)
-        if not self.by:
-            raise OSError("# Error. File not found or not loadable: "+sDicName)
-
-        if self.by[0:7] != b"/pyfsa/":
-            raise TypeError("# Error. Not a pyfsa binary dictionary. Header: {}".format(self.by[0:9]))
-        if not(self.by[7:8] == b"1" or self.by[7:8] == b"2" or self.by[7:8] == b"3"):
-            raise ValueError("# Error. Unknown dictionary version: {}".format(self.by[7:8]))
-        try:
-            header, info, values, bdic = self.by.split(b"\0\0\0\0", 3)
-        except Exception:
-            raise Exception
-
-        self.sName = sDicName
-        self.nVersion = int(self.by[7:8].decode("utf-8"))
-        self.sHeader = header.decode("utf-8")
-        self.lArcVal = values.decode("utf-8").split("\t")
-        self.nArcVal = len(self.lArcVal)
-        self.byDic = bdic
-
-        l = info.decode("utf-8").split("/")
-        self.sLang = l[0]
-        self.nChar = int(l[1])
-        self.nBytesArc = int(l[2])
-        self.nBytesNodeAddress = int(l[3])
-        self.nEntries = int(l[4])
-        self.nNode = int(l[5])
-        self.nArc = int(l[6])
-        self.nAff = int(l[7])
-        self.cStemming = l[8]
-        if self.cStemming == "S":
-            self.funcStemming = st.changeWordWithSuffixCode
-        elif self.cStemming == "A":
-            self.funcStemming = st.changeWordWithAffixCode
-        else:
-            self.funcStemming = st.noStemming
-        self.nTag = self.nArcVal - self.nChar - self.nAff
-        # <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
-        self.dChar = {}
-        for i in range(1, self.nChar):
-            self.dChar[self.lArcVal[i]] = i
-        self.dCharVal = { v: k  for k, v in self.dChar.items() }
-            
-        self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1
-        self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1)
-        self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2)
-        self._addrBitMask = 1 << ((self.nBytesArc * 8) - 3)  # version 2
-
-        self.nBytesOffset = 1 # version 3
-
-        # Configuring DAWG functions according to nVersion
-        if self.nVersion == 1:
-            self.morph = self._morph1
-            self.stem = self._stem1
-            self._lookupArcNode = self._lookupArcNode1
-            self._getArcs = self._getArcs1
-            self._writeNodes = self._writeNodes1
-        elif self.nVersion == 2:
-            self.morph = self._morph2
-            self.stem = self._stem2
-            self._lookupArcNode = self._lookupArcNode2
-            self._getArcs = self._getArcs2
-            self._writeNodes = self._writeNodes2
-        elif self.nVersion == 3:
-            self.morph = self._morph3
-            self.stem = self._stem3
-            self._lookupArcNode = self._lookupArcNode3
-            self._getArcs = self._getArcs3
-            self._writeNodes = self._writeNodes3
-        else:
-            raise ValueError("  # Error: unknown code: {}".format(self.nVersion))
-
-        self.bOptNumSigle = False
-        self.bOptNumAtLast = False
-
-    def getInfo (self):
-        return  "  Language: {0.sLang:>10}      Version: {0.nVersion:>2}      Stemming: {0.cStemming}FX\n" \
-                "  Arcs values:  {0.nArcVal:>10,} = {0.nChar:>5,} characters,  {0.nAff:>6,} affixes,  {0.nTag:>6,} tags\n" \
-                "  Dictionary: {0.nEntries:>12,} entries,    {0.nNode:>11,} nodes,   {0.nArc:>11,} arcs\n" \
-                "  Address size: {0.nBytesNodeAddress:>1} bytes,  Arc size: {0.nBytesArc:>1} bytes\n".format(self)
-
-    def writeAsJSObject (self, spfDest, bInJSModule=False, bBinaryDictAsHexString=False):
-        "write IBDAWG as a JavaScript object in a JavaScript module"
-        import json
-        with open(spfDest, "w", encoding="utf-8", newline="\n") as hDst:
-            if bInJSModule:
-                hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ')
-            hDst.write(json.dumps({
-                            "sName": self.sName,
-                            "nVersion": self.nVersion,
-                            "sHeader": self.sHeader,
-                            "lArcVal": self.lArcVal,
-                            "nArcVal": self.nArcVal,
-                            # JavaScript is a pile of shit, so Mozilla’s JS parser don’t like file bigger than 4 Mb!
-                            # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension.
-                            # https://github.com/mozilla/addons-linter/issues/1361
-                            "byDic": self.byDic.hex()  if bBinaryDictAsHexString  else [ e  for e in self.byDic ],
-                            "sLang": self.sLang,
-                            "nChar": self.nChar,
-                            "nBytesArc": self.nBytesArc,
-                            "nBytesNodeAddress": self.nBytesNodeAddress,
-                            "nEntries": self.nEntries,
-                            "nNode": self.nNode,
-                            "nArc": self.nArc,
-                            "nAff": self.nAff,
-                            "cStemming": self.cStemming,
-                            "nTag": self.nTag,
-                            "dChar": self.dChar,
-                            "_arcMask": self._arcMask,
-                            "_finalNodeMask": self._finalNodeMask,
-                            "_lastArcMask": self._lastArcMask,
-                            "_addrBitMask": self._addrBitMask,
-                            "nBytesOffset": self.nBytesOffset
-                        }, ensure_ascii=False))
-            if bInJSModule:
-                hDst.write(";\n\nexports.dictionary = dictionary;\n")
-
-    def isValidToken (self, sToken):
-        "checks if <sToken> is valid (if there is hyphens in <sToken>, <sToken> is split, each part is checked)"
-        if self.isValid(sToken):
-            return True
-        if "-" in sToken:
-            if sToken.count("-") > 4:
-                return True
-            return all(self.isValid(sWord)  for sWord in sToken.split("-"))
-        return False
-
-    def isValid (self, sWord):
-        "checks if <sWord> is valid (different casing tested if the first letter is a capital)"
-        if not sWord:
-            return None
-        if "’" in sWord: # ugly hack
-            sWord = sWord.replace("’", "'")
-        if self.lookup(sWord):
-            return True
-        if sWord[0:1].isupper():
-            if len(sWord) > 1:
-                if sWord.istitle():
-                    return self.lookup(sWord.lower())
-                if sWord.isupper():
-                    if self.bOptNumSigle:
-                        return True
-                    return self.lookup(sWord.lower()) or self.lookup(sWord.capitalize())
-                return self.lookup(sWord[:1].lower() + sWord[1:])
-            else:
-                return self.lookup(sWord.lower())
-        return False
-
-    def lookup (self, sWord):
-        "returns True if <sWord> in dictionary (strict verification)"
-        iAddr = 0
-        for c in sWord:
-            if c not in self.dChar:
-                return False
-            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
-            if iAddr == None:
-                return False
-        return bool(int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask)
-
-    def getMorph (self, sWord):
-        "retrieves morphologies list, different casing allowed"
-        l = self.morph(sWord)
-        if sWord[0:1].isupper():
-            l.extend(self.morph(sWord.lower()))
-            if sWord.isupper() and len(sWord) > 1:
-                l.extend(self.morph(sWord.capitalize()))
-        return l
-
-    #@timethis
-    def suggest (self, sWord, nSuggLimit=10):
-        "returns a set of suggestions for <sWord>"
-        sPfx, sWord, sSfx = cp.cut(sWord)
-        nMaxSwitch = max(len(sWord) // 3, 1)
-        nMaxDel = len(sWord) // 5
-        nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
-        oSuggResult = SuggResult(sWord)
-        self._suggest(oSuggResult, sWord, nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)
-        if sWord.istitle():
-            self._suggest(oSuggResult, sWord.lower(), nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)
-        elif sWord.islower():
-            self._suggest(oSuggResult, sWord.title(), nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)
-        aSugg = oSuggResult.getSuggestions(nSuggLimit)
-        if sSfx or sPfx:
-            # we add what we removed
-            return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
-        return aSugg
-
-    def _suggest (self, oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
-        # recursive function
-        #logging.info((nDeep * "  ") + sNewWord + ":" + sRemain)
-        if not sRemain:
-            if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
-                oSuggResult.addSugg(sNewWord, nDeep)
-            for sTail in self._getTails(iAddr):
-                oSuggResult.addSugg(sNewWord+sTail, nDeep)
-            return
-        cCurrent = sRemain[0:1]
-        for cChar, jAddr in self._getCharArcs(iAddr):
-            if cChar in cp.d1to1.get(cCurrent, cCurrent):
-                self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, jAddr, sNewWord+cChar)
-            elif not bAvoidLoop and nMaxHardRepl:
-                self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl-1, nDeep+1, jAddr, sNewWord+cChar, True)
-        if not bAvoidLoop: # avoid infinite loop
-            if len(sRemain) > 1:
-                if cCurrent == sRemain[1:2]:
-                    # same char, we remove 1 char without adding 1 to <sNewWord>
-                    self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord)
-                else:
-                    # switching chars
-                    if nMaxSwitch:
-                        self._suggest(oSuggResult, sRemain[1:2]+sRemain[0:1]+sRemain[2:], nMaxSwitch-1, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True)
-                    # delete char
-                    if nMaxDel:
-                        self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True)
-                # Phonetic replacements
-                for sRepl in cp.get1toXReplacement(sNewWord[-1:], cCurrent, sRemain[1:2]):
-                    self._suggest(oSuggResult, sRepl + sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True)
-                for sRepl in cp.d2toX.get(sRemain[0:2], ()):
-                    self._suggest(oSuggResult, sRepl + sRemain[2:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True)
-            # end of word
-            if len(sRemain) == 2:
-                for sRepl in cp.dFinal2.get(sRemain, ()):
-                    self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True)
-            elif len(sRemain) == 1:
-                self._suggest(oSuggResult, "", nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True) # remove last char and go on
-                for sRepl in cp.dFinal1.get(sRemain, ()):
-                    self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True)
-
-    #@timethis
-    def suggest2 (self, sWord, nMaxSugg=10):
-        "returns a set of suggestions for <sWord>"
-        sPfx, sWord, sSfx = cp.cut(sWord)
-        oSuggResult = SuggResult(sWord)
-        self._suggest2(oSuggResult)
-        aSugg = oSuggResult.getSuggestions()
-        if sSfx or sPfx:
-            # we add what we removed
-            return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
-        return aSugg
-
-    def _suggest2 (self, oSuggResult, nDeep=0, iAddr=0, sNewWord=""):
-        # recursive function
-        #logging.info((nDeep * "  ") + sNewWord)
-        if nDeep >= oSuggResult.nDistLimit:
-            sCleanNewWord = cp.simplifyWord(sNewWord)
-            if st.distanceSift4(oSuggResult.sCleanWord[:len(sCleanNewWord)], sCleanNewWord) > oSuggResult.nDistLimit:
-                return
-        if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
-            oSuggResult.addSugg(sNewWord, nDeep)
-        for cChar, jAddr in self._getCharArcsWithPriority(iAddr, oSuggResult.sWord[nDeep:nDeep+1]):
-            self._suggest2(oSuggResult, nDeep+1, jAddr, sNewWord+cChar)
-        return
-
-    def _getCharArcs (self, iAddr):
-        "generator: yield all chars and addresses from node at address <iAddr>"
-        for nVal, jAddr in self._getArcs(iAddr):
-            if nVal < self.nChar:
-                yield (self.dCharVal[nVal], jAddr)
-
-    def _getSimilarCharArcs (self, cChar, iAddr):
-        "generator: yield similar char of <cChar> and address of the following node"
-        for c in cp.d1to1.get(cChar, [cChar]):
-            if c in self.dChar:
-                jAddr = self._lookupArcNode(self.dChar[c], iAddr)
-                if jAddr:
-                    yield (c, jAddr)
-
-    def _getCharArcsWithPriority (self, iAddr, cChar):
-        if not cChar:
-            yield from self._getCharArcs(iAddr)
-        lTuple = list(self._getCharArcs(iAddr))
-        lTuple.sort(key=lambda t: 0  if t[0] in cp.d1to1.get(cChar, cChar)  else  1)
-        yield from lTuple
-
-    def _getTails (self, iAddr, sTail="", n=2):
-        "return a list of suffixes ending at a distance of <n> from <iAddr>"
-        aTails = set()
-        for nVal, jAddr in self._getArcs(iAddr):
-            if nVal < self.nChar:
-                if int.from_bytes(self.byDic[jAddr:jAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
-                    aTails.add(sTail + self.dCharVal[nVal])
-                if n and not aTails:
-                    aTails.update(self._getTails(jAddr, sTail+self.dCharVal[nVal], n-1))
-        return aTails
-
-    def drawPath (self, sWord, iAddr=0):
-        "show the path taken by <sWord> in the graph"
-        c1 = sWord[0:1]  if sWord  else " "
-        iPos = -1
-        n = 0
-        print(c1 + ": ", end="")
-        for c2, jAddr in self._getCharArcs(iAddr):
-            print(c2, end="")
-            if c2 == sWord[0:1]:
-                iNextNodeAddr = jAddr
-                iPos = n
-            n += 1
-        if not sWord:
-            return
-        if iPos >= 0:
-            print("\n   "+ " " * iPos + "|")
-            self.drawPath(sWord[1:], iNextNodeAddr)
-
-    def select (self, sPattern=""):
-        "generator: returns all entries which morphology fits <sPattern>"
-        zPattern = None
-        try:
-            zPattern = re.compile(sPattern)
-        except:
-            print("# Error in regex pattern")
-            traceback.print_exc()
-        yield from self._select1(zPattern, 0, "")
-
-    # def morph (self, sWord):
-    #     is defined in __init__
-
-    # VERSION 1
-    def _select1 (self, zPattern, iAddr, sWord):
-        # recursive generator
-        for nVal, jAddr in self._getArcs1(iAddr):
-            if nVal < self.nChar:
-                # simple character
-                yield from self._select1(zPattern, jAddr, sWord + self.lArcVal[nVal])
-            else:
-                sEntry = sWord + "\t" + self.funcStemming(sWord, self.lArcVal[nVal])
-                for nMorphVal, _ in self._getArcs1(jAddr):
-                    if not zPattern or zPattern.search(self.lArcVal[nMorphVal]):
-                        yield sEntry + "\t" + self.lArcVal[nMorphVal]
-
-    def _morph1 (self, sWord):
-        "returns morphologies of <sWord>"
-        iAddr = 0
-        for c in sWord:
-            if c not in self.dChar:
-                return []
-            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
-            if iAddr == None:
-                return []
-        if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask):
-            l = []
-            nRawArc = 0
-            while not (nRawArc & self._lastArcMask):
-                iEndArcAddr = iAddr + self.nBytesArc
-                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
-                nArc = nRawArc & self._arcMask
-                if nArc >= self.nChar:
-                    # This value is not a char, this is a stemming code 
-                    sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc])
-                    # Now , we go to the next node and retrieve all following arcs values, all of them are tags
-                    iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
-                    nRawArc2 = 0
-                    while not (nRawArc2 & self._lastArcMask):
-                        iEndArcAddr2 = iAddr2 + self.nBytesArc
-                        nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
-                        l.append(sStem + " " + self.lArcVal[nRawArc2 & self._arcMask])
-                        iAddr2 = iEndArcAddr2+self.nBytesNodeAddress
-                iAddr = iEndArcAddr+self.nBytesNodeAddress
-            return l
-        return []
-
-    def _stem1 (self, sWord):
-        "returns stems list of <sWord>"
-        iAddr = 0
-        for c in sWord:
-            if c not in self.dChar:
-                return []
-            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
-            if iAddr == None:
-                return []
-        if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask):
-            l = []
-            nRawArc = 0
-            while not (nRawArc & self._lastArcMask):
-                iEndArcAddr = iAddr + self.nBytesArc
-                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
-                nArc = nRawArc & self._arcMask
-                if nArc >= self.nChar:
-                    # This value is not a char, this is a stemming code 
-                    l.append(self.funcStemming(sWord, self.lArcVal[nArc]))
-                iAddr = iEndArcAddr+self.nBytesNodeAddress
-            return l
-        return []
-
-    def _lookupArcNode1 (self, nVal, iAddr):
-        "looks if <nVal> is an arc at the node at <iAddr>, if yes, returns address of next node else None"
-        while True:
-            iEndArcAddr = iAddr+self.nBytesArc
-            nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
-            if nVal == (nRawArc & self._arcMask):
-                # the value we are looking for 
-                # we return the address of the next node
-                return int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
-            else:
-                # value not found
-                if (nRawArc & self._lastArcMask):
-                    return None
-                iAddr = iEndArcAddr+self.nBytesNodeAddress
-
-    def _getArcs1 (self, iAddr):
-        "generator: return all arcs at <iAddr> as tuples of (nVal, iAddr)"
-        while True:
-            iEndArcAddr = iAddr+self.nBytesArc
-            nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
-            yield (nRawArc & self._arcMask, int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big'))
-            if (nRawArc & self._lastArcMask):
-                break
-            iAddr = iEndArcAddr+self.nBytesNodeAddress
-
-    def _writeNodes1 (self, spfDest):
-        "for debugging only"
-        print(" > Write binary nodes")
-        with codecs.open(spfDest, 'w', 'utf-8', newline="\n") as hDst:
-            iAddr = 0
-            hDst.write("i{:_>10} -- #{:_>10}\n".format("0", iAddr))
-            while iAddr < len(self.byDic):
-                iEndArcAddr = iAddr+self.nBytesArc
-                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
-                nArc = nRawArc & self._arcMask
-                hDst.write("  {:<20}  {:0>16}  i{:>10}   #{:_>10}\n".format(self.lArcVal[nArc], bin(nRawArc)[2:], "?", \
-                                                                            int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], \
-                                                                                           byteorder='big')))
-                iAddr = iEndArcAddr+self.nBytesNodeAddress
-                if (nRawArc & self._lastArcMask) and iAddr < len(self.byDic):
-                    hDst.write("\ni{:_>10} -- #{:_>10}\n".format("?", iAddr))
-            hDst.close()
-
-    # VERSION 2
-    def _morph2 (self, sWord):
-        "returns morphologies of <sWord>"
-        iAddr = 0
-        for c in sWord:
-            if c not in self.dChar:
-                return []
-            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
-            if iAddr == None:
-                return []
-        if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask):
-            l = []
-            nRawArc = 0
-            while not (nRawArc & self._lastArcMask):
-                iEndArcAddr = iAddr + self.nBytesArc
-                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
-                nArc = nRawArc & self._arcMask
-                if nArc >= self.nChar:
-                    # This value is not a char, this is a stemming code 
-                    sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc])
-                    # Now , we go to the next node and retrieve all following arcs values, all of them are tags
-                    if not (nRawArc & self._addrBitMask):
-                        iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
-                    else:
-                        # we go to the end of the node
-                        iAddr2 = iEndArcAddr
-                        while not (nRawArc & self._lastArcMask):
-                            nRawArc = int.from_bytes(self.byDic[iAddr2:iAddr2+self.nBytesArc], byteorder='big')
-                            iAddr2 += self.nBytesArc + self.nBytesNodeAddress
-                    nRawArc2 = 0
-                    while not (nRawArc2 & self._lastArcMask):
-                        iEndArcAddr2 = iAddr2 + self.nBytesArc
-                        nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
-                        l.append(sStem + " " + self.lArcVal[nRawArc2 & self._arcMask])
-                        iAddr2 = iEndArcAddr2+self.nBytesNodeAddress  if not (nRawArc2 & self._addrBitMask) else iEndArcAddr2
-                iAddr = iEndArcAddr+self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else iEndArcAddr
-            return l
-        return []
-
-    def _stem2 (self, sWord):
-        "returns stems list of <sWord>"
-        iAddr = 0
-        for c in sWord:
-            if c not in self.dChar:
-                return []
-            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
-            if iAddr == None:
-                return []
-        if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask):
-            l = []
-            nRawArc = 0
-            while not (nRawArc & self._lastArcMask):
-                iEndArcAddr = iAddr + self.nBytesArc
-                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
-                nArc = nRawArc & self._arcMask
-                if nArc >= self.nChar:
-                    # This value is not a char, this is a stemming code 
-                    l.append(self.funcStemming(sWord, self.lArcVal[nArc]))
-                    # Now , we go to the next node
-                    if not (nRawArc & self._addrBitMask):
-                        iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
-                    else:
-                        # we go to the end of the node
-                        iAddr2 = iEndArcAddr
-                        while not (nRawArc & self._lastArcMask):
-                            nRawArc = int.from_bytes(self.byDic[iAddr2:iAddr2+self.nBytesArc], byteorder='big')
-                            iAddr2 += self.nBytesArc + self.nBytesNodeAddress
-                iAddr = iEndArcAddr+self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else iEndArcAddr
-            return l
-        return []
-
-    def _lookupArcNode2 (self, nVal, iAddr):
-        "looks if <nVal> is an arc at the node at <iAddr>, if yes, returns address of next node else None"
-        while True:
-            iEndArcAddr = iAddr+self.nBytesArc
-            nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
-            if nVal == (nRawArc & self._arcMask):
-                # the value we are looking for 
-                if not (nRawArc & self._addrBitMask):
-                    # we return the address of the next node
-                    return int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
-                else:
-                    # we go to the end of the node
-                    iAddr = iEndArcAddr
-                    while not (nRawArc & self._lastArcMask):
-                        nRawArc = int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big')
-                        iAddr += self.nBytesArc + self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else self.nBytesArc
-                    return iAddr
-            else:
-                # value not found
-                if (nRawArc & self._lastArcMask):
-                    return None
-                iAddr = iEndArcAddr+self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else iEndArcAddr
-
-    def _writeNodes2 (self, spfDest):
-        "for debugging only"
-        print(" > Write binary nodes")
-        with codecs.open(spfDest, 'w', 'utf-8', newline="\n") as hDst:
-            iAddr = 0
-            hDst.write("i{:_>10} -- #{:_>10}\n".format("0", iAddr))
-            while iAddr < len(self.byDic):
-                iEndArcAddr = iAddr+self.nBytesArc
-                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
-                nArc = nRawArc & self._arcMask
-                if not (nRawArc & self._addrBitMask):
-                    iNextNodeAddr = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
-                    hDst.write("  {:<20}  {:0>16}  i{:>10}   #{:_>10}\n".format(self.lArcVal[nArc], bin(nRawArc)[2:], "?", iNextNodeAddr))
-                    iAddr = iEndArcAddr+self.nBytesNodeAddress
-                else:
-                    hDst.write("  {:<20}  {:0>16}\n".format(self.lArcVal[nArc], bin(nRawArc)[2:]))
-                    iAddr = iEndArcAddr
-                if (nRawArc & self._lastArcMask):
-                    hDst.write("\ni{:_>10} -- #{:_>10}\n".format("?", iAddr))
-            hDst.close()
-
-    # VERSION 3
-    def _morph3 (self, sWord):
-        "returns morphologies of <sWord>"
-        iAddr = 0
-        for c in sWord:
-            if c not in self.dChar:
-                return []
-            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
-            if iAddr == None:
-                return []
-        if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask):
-            l = []
-            nRawArc = 0
-            iAddrNode = iAddr
-            while not (nRawArc & self._lastArcMask):
-                iEndArcAddr = iAddr + self.nBytesArc
-                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
-                nArc = nRawArc & self._arcMask
-                if nArc >= self.nChar:
-                    # This value is not a char, this is a stemming code 
-                    sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc])
-                    # Now , we go to the next node and retrieve all following arcs values, all of them are tags
-                    if not (nRawArc & self._addrBitMask):
-                        iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
-                    else:
-                        iAddr2 = iAddrNode + int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesOffset], byteorder='big')
-                    nRawArc2 = 0
-                    while not (nRawArc2 & self._lastArcMask):
-                        iEndArcAddr2 = iAddr2 + self.nBytesArc
-                        nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
-                        l.append(sStem + " " + self.lArcVal[nRawArc2 & self._arcMask])
-                        iAddr2 = iEndArcAddr2+self.nBytesNodeAddress  if not (nRawArc2 & self._addrBitMask) else iEndArcAddr2+self.nBytesOffset
-                iAddr = iEndArcAddr+self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else iEndArcAddr+self.nBytesOffset
-            return l
-        return []
-
-    def _stem3 (self, sWord):
-        "returns stems list of <sWord>"
-        iAddr = 0
-        for c in sWord:
-            if c not in self.dChar:
-                return []
-            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
-            if iAddr == None:
-                return []
-        if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask):
-            l = []
-            nRawArc = 0
-            iAddrNode = iAddr
-            while not (nRawArc & self._lastArcMask):
-                iEndArcAddr = iAddr + self.nBytesArc
-                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
-                nArc = nRawArc & self._arcMask
-                if nArc >= self.nChar:
-                    # This value is not a char, this is a stemming code 
-                    l.append(self.funcStemming(sWord, self.lArcVal[nArc]))
-                iAddr = iEndArcAddr+self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else iEndArcAddr+self.nBytesOffset
-            return l
-        return []
-
-    def _lookupArcNode3 (self, nVal, iAddr):
-        "looks if <nVal> is an arc at the node at <iAddr>, if yes, returns address of next node else None"
-        iAddrNode = iAddr
-        while True:
-            iEndArcAddr = iAddr+self.nBytesArc
-            nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
-            if nVal == (nRawArc & self._arcMask):
-                # the value we are looking for 
-                if not (nRawArc & self._addrBitMask):
-                    return int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
-                else:
-                    return iAddrNode + int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesOffset], byteorder='big')
-            else:
-                # value not found
-                if (nRawArc & self._lastArcMask):
-                    return None
-                iAddr = iEndArcAddr+self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else iEndArcAddr+self.nBytesOffset
-
-    def _writeNodes3 (self, spfDest):
-        "for debugging only"
-        print(" > Write binary nodes")
-        with codecs.open(spfDest, 'w', 'utf-8', newline="\n") as hDst:
-            iAddr = 0
-            hDst.write("i{:_>10} -- #{:_>10}\n".format("0", iAddr))
-            while iAddr < len(self.byDic):
-                iEndArcAddr = iAddr+self.nBytesArc
-                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
-                nArc = nRawArc & self._arcMask
-                if not (nRawArc & self._addrBitMask):
-                    iNextNodeAddr = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
-                    hDst.write("  {:<20}  {:0>16}  i{:>10}   #{:_>10}\n".format(self.lArcVal[nArc], bin(nRawArc)[2:], "?", iNextNodeAddr))
-                    iAddr = iEndArcAddr+self.nBytesNodeAddress
-                else:
-                    iNextNodeAddr = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesOffset], byteorder='big')
-                    hDst.write("  {:<20}  {:0>16}  i{:>10}   +{:_>10}\n".format(self.lArcVal[nArc], bin(nRawArc)[2:], "?", iNextNodeAddr))
-                    iAddr = iEndArcAddr+self.nBytesOffset
-                if (nRawArc & self._lastArcMask):
-                    hDst.write("\ni{:_>10} -- #{:_>10}\n".format("?", iAddr))
-            hDst.close()

DELETED gc_core/py/keyboard_chars_proximity.py
Index: gc_core/py/keyboard_chars_proximity.py
==================================================================
--- gc_core/py/keyboard_chars_proximity.py
+++ /dev/null
@@ -1,220 +0,0 @@
-# Keyboard chars proximity
-
-
-def getKeyboardMap (sKeyboard):
-    return _dKeyboardMap.get(sKeyboard.lower(), {})
-
-
-def getKeyboardList ():
-    return _dKeyboardMap.keys()
-
-
-_dKeyboardMap = {
-    # keyboards by alphabetical order
-    # bépo, colemak and dvorak users are assumed to do less typing errors.
-    "azerty": {
-        # fr
-        # line 1
-        "é": "az",
-        "è": "yu",
-        "ç": "àio",
-        "à": "op",
-        # line 2
-        "a": "zéq",
-        "z": "aesq",
-        "e": "zrds",
-        "r": "etfd",
-        "t": "rygf",
-        "y": "tuhg",
-        "u": "yijh",
-        "i": "uokj",
-        "o": "iplk",
-        "p": "oml",
-        # line 3
-        "q": "sawz",
-        "s": "qdzwxe",
-        "d": "sfexcr",
-        "f": "dgrcvt",
-        "g": "fhtvby",
-        "h": "gjybnu",
-        "j": "hkuni",
-        "k": "jlio",
-        "l": "kmop",
-        "m": "lùp",
-        "ù": "m",
-        # line 4
-        "w": "xqs",
-        "x": "wcsd",
-        "c": "xvdf",
-        "v": "cbfg",
-        "b": "vngh",
-        "n": "bhj",
-    },
-    "bépo": {
-        # fr
-        # line 2
-        "b": "éa",
-        "é": "bpu",
-        "p": "éoi",
-        "o": "pèe",
-        "è": "o",
-        "v": "dt",
-        "d": "vls",
-        "l": "djr",
-        "j": "lzn",
-        "z": "jmw",
-        # line 3
-        "a": "ubà",
-        "u": "aiéy",
-        "i": "uepx",
-        "e": "io",
-        "c": "t",
-        "t": "csvq",
-        "s": "trdg",
-        "r": "snlh",
-        "n": "rmjf",
-        "m": "nzç",
-        # line 4
-        "à": "yêa",
-        "y": "àxu",
-        "x": "ywi",
-        "w": "z",
-        "k": "c",
-        "q": "gt",
-        "g": "qhs",
-        "h": "gfr",
-        "f": "hçn",
-        "ç": "fm",
-    },
-    "colemak": {
-        # en, us, intl
-        # line 2
-        "q": "wa",
-        "w": "qfr",
-        "f": "wps",
-        "p": "fgt",
-        "g": "pjd",
-        "j": "glh",
-        "l": "jun",
-        "u": "lye",
-        "y": "ui",
-        # line 3
-        "a": "rqz",
-        "r": "aswx",
-        "s": "rtfc",
-        "t": "sdpv",
-        "d": "thgb",
-        "h": "dnjk",
-        "n": "helm",
-        "e": "niu",
-        "i": "eoy",
-        "o": "i",
-        # line 4
-        "z": "xa",
-        "x": "zcr",
-        "c": "xvs",
-        "v": "cbt",
-        "b": "vkd",
-        "k": "bmh",
-        "m": "kn",
-    },
-    "dvorak": {
-        # en, us, intl
-        # line 2
-        "p": "yu",
-        "y": "pfi",
-        "f": "ygd",
-        "g": "fch",
-        "c": "grt",
-        "r": "cln",
-        "l": "rs",
-        # line 3
-        "a": "o",
-        "o": "aeq",
-        "e": "ouj",
-        "u": "eipk",
-        "i": "udyx",
-        "d": "ihfb",
-        "h": "dtgm",
-        "t": "hncw",
-        "n": "tsrv",
-        "s": "nlz",
-        # line 4
-        "q": "jo",
-        "j": "qke",
-        "k": "jxu",
-        "x": "kbi",
-        "b": "xmd",
-        "m": "bwh",
-        "w": "mvt",
-        "v": "wzn",
-        "z": "vs",
-    },
-    "qwerty": {
-        # en, us, intl
-        # line 2
-        "q": "wa",
-        "w": "qeas",
-        "e": "wrds",
-        "r": "etfd",
-        "t": "rygf",
-        "y": "tuhg",
-        "u": "yijh",
-        "i": "uokj",
-        "o": "iplk",
-        "p": "ol",
-        # line 3
-        "a": "sqzw",
-        "s": "adwzxe",
-        "d": "sfexcr",
-        "f": "dgrcvt",
-        "g": "fhtvby",
-        "h": "gjybnu",
-        "j": "hkunmi",
-        "k": "jlimo",
-        "l": "kop",
-        # line 4
-        "z": "xas",
-        "x": "zcsd",
-        "c": "xvdf",
-        "v": "cbfg",
-        "b": "vngh",
-        "n": "bmhj",
-        "m": "njk",
-    },
-    "qwertz": {
-        # ge, au
-        # line 2
-        "q": "wa",
-        "w": "qeas",
-        "e": "wrds",
-        "r": "etfd",
-        "t": "rzgf",
-        "z": "tuhg",
-        "u": "zijh",
-        "i": "uokj",
-        "o": "iplk",
-        "p": "oüöl",
-        "ü": "päö",
-        # line 3
-        "a": "sqyw",
-        "s": "adwyxe",
-        "d": "sfexcr",
-        "f": "dgrcvt",
-        "g": "fhtvbz",
-        "h": "gjzbnu",
-        "j": "hkunmi",
-        "k": "jlimo",
-        "l": "köop",
-        "ö": "läpü",
-        "ä": "öü",
-        # line 4
-        "y": "xas",
-        "x": "ycsd",
-        "c": "xvdf",
-        "v": "cbfg",
-        "b": "vngh",
-        "n": "bmhj",
-        "m": "njk",
-    }
-}

DELETED gc_core/py/progressbar.py
Index: gc_core/py/progressbar.py
==================================================================
--- gc_core/py/progressbar.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# Textual progressbar
-# by Olivier R.
-# License: MPL 2
-
-import time
-
-class ProgressBar:
-    "Textual progressbar"
-    
-    def __init__ (self, nMin=0, nMax=100, nWidth=78):
-        "initiate with minimum nMin to maximum nMax"
-        self.nMin = nMin
-        self.nMax = nMax
-        self.nSpan = nMax - nMin
-        self.nWidth = nWidth-9
-        self.nAdvance = -1
-        self.nCurVal = nMin
-        self.startTime = time.time()
-        self._update()
-
-    def _update (self):
-        fDone = ((self.nCurVal - self.nMin) / self.nSpan)
-        nAdvance = int(fDone * self.nWidth)
-        if (nAdvance > self.nAdvance):
-            self.nAdvance = nAdvance
-            print("\r[ {}{}  {}% ] ".format('>'*nAdvance, ' '*(self.nWidth-nAdvance), round(fDone*100)), end="")
-
-    def increment (self, n=1):
-        "increment value by n (1 by default)"
-        self.nCurVal += n
-        self._update()
-    
-    def done (self):
-        "to call when it’s finished"
-        print("\r[ task done in {:.1f} s ] ".format(time.time() - self.startTime))

DELETED gc_core/py/spellchecker.py
Index: gc_core/py/spellchecker.py
==================================================================
--- gc_core/py/spellchecker.py
+++ /dev/null
@@ -1,134 +0,0 @@
-# Spellchecker
-# Wrapper for the IBDAWG class.
-# Useful to check several dictionaries at once.
-
-from . import ibdawg
-
-
-dDictionaries = {
-    "fr": "French.bdic",
-    "en": "English.bdic"
-}
-
-
-class Spellchecker ():
-
-    def __init__ (self, sLangCode):
-        self.sLangCode = sLangCode
-        self.oMainDic = None
-        if sLangCode in dDictionaries:
-            self.oMainDic = ibdawg.IBDAWG(dDictionaries[sLangCode])
-        self.lOtherDic = []
-        return bool(self.oMainDic)
-
-
-    def setMainDictionary (self, sDicName):
-        try:
-            self.oMainDic = ibdawg.IBDAWG(sDicName)
-            return True
-        except:
-            print("Error: <" + sDicName + "> not set as main dictionary.")
-            return False
-
-    def addDictionary (self, sDicName):
-        try:
-            self.lOtherDic.append(ibdawg.IBDAWG(sDicName))
-            return True
-        except:
-            print("Error: <" + sDicName + "> not added to the list.")
-            return False
-
-    # Return codes:
-    #   0: invalid
-    #   1: correct in main dictionary
-    #   2+: correct in foreign dictionaries
-
-
-    # check in the main dictionary only
-
-    def isValidToken (self, sToken):
-        "(in main dictionary) checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)"
-        if self.oMainDic.isValidToken(sToken):
-            return 1
-        return 0
-
-    def isValid (self, sWord):
-        "(in main dictionary) checks if sWord is valid (different casing tested if the first letter is a capital)"
-        if self.oMainDic.isValid(sWord):
-            return 1
-        return 0
-
-    def lookup (self, sWord):
-        "(in main dictionary) checks if sWord is in dictionary as is (strict verification)"
-        if self.oMainDic.lookup(sWord):
-            return 1
-        return 0
-
-
-    # check in all dictionaries
-
-    def isValidTokenAll (self, sToken):
-        "(in all dictionaries) checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)"
-        if self.oMainDic.isValidToken(sToken):
-            return 1
-        for i, oDic in enumerate(self.lOtherDic, 2):
-            if oDic.isValidToken(sToken):
-                return i
-        return 0
-
-    def isValidAll (self, sWord):
-        "(in all dictionaries) checks if sWord is valid (different casing tested if the first letter is a capital)"
-        if self.oMainDic.isValid(sToken):
-            return 1
-        for i, oDic in enumerate(self.lOtherDic, 2):
-            if oDic.isValid(sToken):
-                return i
-        return 0
-
-    def lookupAll (self, sWord):
-        "(in all dictionaries) checks if sWord is in dictionary as is (strict verification)"
-        if self.oMainDic.lookup(sToken):
-            return 1
-        for i, oDic in enumerate(self.lOtherDic, 2):
-            if oDic.lookup(sToken):
-                return i
-        return 0
-
-
-    # check in dictionaries up to level n
-
-    def isValidTokenLevel (self, sToken, nLevel):
-        "(in dictionaries up to level n) checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)"
-        if self.oMainDic.isValidToken(sToken):
-            return 1
-        if nLevel >= 2:
-            for i, oDic in enumerate(self.lOtherDic, 2):
-                if oDic.isValidToken(sToken):
-                    return i
-                if i == nLevel:
-                    break
-        return 0
-
-    def isValidLevel (self, sWord, nLevel):
-        "(in dictionaries up to level n) checks if sWord is valid (different casing tested if the first letter is a capital)"
-        if self.oMainDic.isValid(sToken):
-            return 1
-        if nLevel >= 2:
-            for i, oDic in enumerate(self.lOtherDic, 2):
-                if oDic.isValid(sToken):
-                    return i
-                if i == nLevel:
-                    break
-        return 0
-
-    def lookupLevel (self, sWord, nLevel):
-        "(in dictionaries up to level n) checks if sWord is in dictionary as is (strict verification)"
-        if self.oMainDic.lookup(sToken):
-            return 1
-        if nLevel >= 2:
-            for i, oDic in enumerate(self.lOtherDic, 2):
-                if oDic.lookup(sToken):
-                    return i
-                if i == nLevel:
-                    break
-        return 0

DELETED gc_core/py/str_transform.py
Index: gc_core/py/str_transform.py
==================================================================
--- gc_core/py/str_transform.py
+++ /dev/null
@@ -1,203 +0,0 @@
-#!python3
-
-
-#### DISTANCE CALCULATIONS
-
-def longestCommonSubstring (s1, s2):
-    # http://en.wikipedia.org/wiki/Longest_common_substring_problem
-    # http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_substring
-    M = [ [0]*(1+len(s2)) for i in range(1+len(s1)) ]
-    longest, x_longest = 0, 0
-    for x in range(1, 1+len(s1)):
-        for y in range(1, 1+len(s2)):
-            if s1[x-1] == s2[y-1]:
-                M[x][y] = M[x-1][y-1] + 1
-                if M[x][y] > longest:
-                    longest = M[x][y]
-                    x_longest = x
-            else:
-                M[x][y] = 0
-    return s1[x_longest-longest : x_longest]
-
-
-def distanceDamerauLevenshtein (s1, s2):
-    "distance of Damerau-Levenshtein between <s1> and <s2>"
-    # https://fr.wikipedia.org/wiki/Distance_de_Damerau-Levenshtein
-    d = {}
-    nLen1 = len(s1)
-    nLen2 = len(s2)
-    for i in range(-1, nLen1+1):
-        d[i, -1] = i + 1
-    for j in range(-1, nLen2+1):
-        d[-1, j] = j + 1
-    for i in range(nLen1):
-        for j in range(nLen2):
-            nCost = 0  if s1[i] == s2[j]  else 1
-            d[i, j] = min(
-                d[i-1, j]   + 1,        # Deletion
-                d[i,   j-1] + 1,        # Insertion
-                d[i-1, j-1] + nCost,    # Substitution
-            )
-            if i and j and s1[i] == s2[j-1] and s1[i-1] == s2[j]:
-                d[i, j] = min(d[i, j], d[i-2, j-2] + nCost)     # Transposition
-    return d[nLen1-1, nLen2-1]
-
-
-def distanceSift4 (s1, s2, nMaxOffset=5):
-    "implementation of general Sift4."
-    # https://siderite.blogspot.com/2014/11/super-fast-and-accurate-string-distance.html
-    if not s1:
-        return len(s2)
-    if not s2:
-        return len(s1)
-    nLen1, nLen2 = len(s1), len(s2)
-    i1, i2 = 0, 0   # Cursors for each string
-    nLargestCS = 0  # Largest common substring
-    nLocalCS = 0    # Local common substring
-    nTrans = 0      # Number of transpositions ('ab' vs 'ba')
-    lOffset = []    # Offset pair array, for computing the transpositions
- 
-    while i1 < nLen1 and i2 < nLen2:
-        if s1[i1] == s2[i2]:
-            nLocalCS += 1
-            # Check if current match is a transposition
-            bTrans = False
-            i = 0
-            while i < len(lOffset):
-                t = lOffset[i]
-                if i1 <= t[0] or i2 <= t[1]:
-                    bTrans = abs(i2-i1) >= abs(t[1] - t[0])
-                    if bTrans:
-                        nTrans += 1
-                    elif not t[2]:
-                        t[2] = True
-                        nTrans += 1
-                    break
-                elif i1 > t[1] and i2 > t[0]:
-                    del lOffset[i]
-                else:
-                    i += 1
-            lOffset.append([i1, i2, bTrans])
-        else:
-            nLargestCS += nLocalCS
-            nLocalCS = 0
-            if i1 != i2:
-                i1 = i2 = min(i1, i2)
-            for i in range(nMaxOffset):
-                if i1 + i >= nLen1 and i2 + i >= nLen2:
-                    break
-                elif i1 + i < nLen1 and s1[i1+i] == s2[i2]:
-                    i1 += i - 1
-                    i2 -= 1
-                    break
-                elif i2 + i < nLen2 and s1[i1] == s2[i2+i]:
-                    i2 += i - 1
-                    i1 -= 1
-                    break
-        i1 += 1
-        i2 += 1
-        if i1 >= nLen1 or i2 >= nLen2:
-            nLargestCS += nLocalCS
-            nLocalCS = 0
-            i1 = i2 = min(i1, i2)
-    nLargestCS += nLocalCS
-    return round(max(nLen1, nLen2) - nLargestCS + nTrans)
-
-
-def showDistance (s1, s2):
-    print("Damerau-Levenshtein: " + s1 + "/" + s2 + " = " + distanceDamerauLevenshtein(s1, s2))
-    print("Sift4:" + s1 + "/" + s2 + " = " + distanceSift4(s1, s2))
-
-
-
-
-#### STEMMING OPERATIONS
-
-## No stemming
-
-def noStemming (sFlex, sStem):
-    return sStem
-
-def rebuildWord (sFlex, cmd1, cmd2):
-    if cmd1 == "_":
-        return sFlex
-    n, c = cmd1.split(":")
-    s = s[:n] + c + s[n:]
-    if cmd2 == "_":
-        return s
-    n, c = cmd2.split(":")
-    return s[:n] + c + s[n:]
-
-    
-## Define affixes for stemming
-
-# Note: 48 is the ASCII code for "0"
-
-
-# Suffix only
-def defineSuffixCode (sFlex, sStem):
-    """ Returns a string defining how to get stem from flexion
-            "n(sfx)"
-        with n: a char with numeric meaning, "0" = 0, "1" = 1, ... ":" = 10, etc. (See ASCII table.) Says how many letters to strip from flexion.
-             sfx [optional]: string to add on flexion
-        Examples:
-            "0": strips nothing, adds nothing
-            "1er": strips 1 letter, adds "er"
-            "2": strips 2 letters, adds nothing
-    """
-    if sFlex == sStem:
-        return "0"
-    jSfx = 0
-    for i in range(min(len(sFlex), len(sStem))):
-        if sFlex[i] != sStem[i]:
-            break
-        jSfx += 1
-    return chr(len(sFlex)-jSfx+48) + sStem[jSfx:]  
-
-
-def changeWordWithSuffixCode (sWord, sSfxCode):
-    if sSfxCode == "0":
-        return sWord
-    return sWord[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:]  if sSfxCode[0] != '0'  else sWord + sSfxCode[1:]
-
-
-# Prefix and suffix
-
-def defineAffixCode (sFlex, sStem):
-    """ Returns a string defining how to get stem from flexion. Examples:
-            "0" if stem = flexion
-            "stem" if no common substring
-            "n(pfx)/m(sfx)"
-        with n and m: chars with numeric meaning, "0" = 0, "1" = 1, ... ":" = 10, etc. (See ASCII table.) Says how many letters to strip from flexion.
-            pfx [optional]: string to add before the flexion 
-            sfx [optional]: string to add after the flexion
-    """
-    if sFlex == sStem:
-        return "0"
-    # is stem a substring of flexion?
-    n = sFlex.find(sStem)
-    if n >= 0:
-        return "{}/{}".format(chr(n+48), chr(len(sFlex)-(len(sStem)+n)+48))
-    # no, so we are looking for common substring
-    sSubs = longestCommonSubstring(sFlex, sStem)
-    if len(sSubs) > 1:
-        iPos = sStem.find(sSubs)
-        sPfx = sStem[:iPos]
-        sSfx = sStem[iPos+len(sSubs):]
-        n = sFlex.find(sSubs)
-        m = len(sFlex) - (len(sSubs)+n)
-        sAff = "{}/".format(chr(n+48))  if not sPfx  else "{}{}/".format(chr(n+48), sPfx)
-        sAff += chr(m+48)  if not sSfx  else "{}{}".format(chr(m+48), sSfx)
-        return sAff
-    return sStem
-
-
-def changeWordWithAffixCode (sWord, sAffCode):
-    if sAffCode == "0":
-        return sWord
-    if '/' not in sAffCode:
-        return "# error #"
-    sPfxCode, sSfxCode = sAffCode.split('/')
-    sWord = sPfxCode[1:] + sWord[(ord(sPfxCode[0])-48):] 
-    return sWord[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:]  if sSfxCode[0] != '0'  else sWord + sSfxCode[1:]
-

DELETED gc_core/py/tokenizer.py
Index: gc_core/py/tokenizer.py
==================================================================
--- gc_core/py/tokenizer.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# Very simple tokenizer
-
-import re
-
-_PATTERNS = {
-    "default":
-        (
-            r'(?P<FOLDERUNIX>/(?:bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:/[\w.()-]+)*)',
-            r'(?P<FOLDERWIN>[a-zA-Z]:\\(?:Program Files(?: [(]x86[)]|)|[\w.()]+)(?:\\[\w.()-]+)*)',
-            r'(?P<PUNC>[.,?!:;…«»“”"()/·]+)',
-            r'(?P<ACRONYM>[A-Z][.][A-Z][.](?:[A-Z][.])*)',
-            r'(?P<LINK>(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
-            r'(?P<HASHTAG>[#@][\w-]+)',
-            r'(?P<HTML><\w+.*?>|</\w+ *>)',
-            r'(?P<PSEUDOHTML>\[/?\w+\])',
-            r'(?P<HOUR>\d\d?h\d\d\b)',
-            r'(?P<NUM>-?\d+(?:[.,]\d+))',
-            r"(?P<WORD>\w+(?:[’'`-]\w+)*)"
-        ),
-    "fr":
-        (
-            r'(?P<FOLDERUNIX>/(?:bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:/[\w.()-]+)*)',
-            r'(?P<FOLDERWIN>[a-zA-Z]:\\(?:Program Files(?: [(]x86[)]|)|[\w.()]+)(?:\\[\w.()-]+)*)',
-            r'(?P<PUNC>[.,?!:;…«»“”"()/·]+)',
-            r'(?P<ACRONYM>[A-Z][.][A-Z][.](?:[A-Z][.])*)',
-            r'(?P<LINK>(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
-            r'(?P<HASHTAG>[#@][\w-]+)',
-            r'(?P<HTML><\w+.*?>|</\w+ *>)',
-            r'(?P<PSEUDOHTML>\[/?\w+\])',
-            r"(?P<ELPFX>(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`])",
-            r'(?P<ORDINAL>\d+(?:er|nd|e|de|ième|ème|eme)\b)',
-            r'(?P<HOUR>\d\d?h\d\d\b)',
-            r'(?P<NUM>-?\d+(?:[.,]\d+|))',
-            r"(?P<WORD>\w+(?:[’'`-]\w+)*)"
-        )
-}
-
-
-class Tokenizer:
-
-    def __init__ (self, sLang):
-        self.sLang = sLang
-        if sLang not in _PATTERNS:
-            self.sLang = "default"
-        self.zToken = re.compile( "(?i)" + '|'.join(sRegex for sRegex in _PATTERNS[sLang]) )
-
-    def genTokens (self, sText):
-        for m in self.zToken.finditer(sText):
-            yield { "sType": m.lastgroup, "sValue": m.group(), "nStart": m.start(), "nEnd": m.end() }

ADDED   graphspell/char_player.py
Index: graphspell/char_player.py
==================================================================
--- /dev/null
+++ graphspell/char_player.py
@@ -0,0 +1,324 @@
+# list of similar chars
+# useful for suggestion mechanism
+
+import re
+
+
+_xTransChars = str.maketrans({
+    'à': 'a',  'é': 'e',  'î': 'i',  'ô': 'o',  'û': 'u',  'ÿ': 'i',  "y": "i",
+    'â': 'a',  'è': 'e',  'ï': 'i',  'ö': 'o',  'ù': 'u',  'ŷ': 'i',
+    'ä': 'a',  'ê': 'e',  'í': 'i',  'ó': 'o',  'ü': 'u',  'ý': 'i',
+    'á': 'a',  'ë': 'e',  'ì': 'i',  'ò': 'o',  'ú': 'u',  'ỳ': 'i',
+    'ā': 'a',  'ē': 'e',  'ī': 'i',  'ō': 'o',  'ū': 'u',  'ȳ': 'i',
+    'ñ': 'n',  'k': 'q',  'w': 'v',
+    'œ': 'oe',  'æ': 'ae', 
+})
+
+def simplifyWord (sWord):
+    "word simplication before calculating distance between words"
+    sWord = sWord.lower().translate(_xTransChars)
+    sNewWord = ""
+    for i, c in enumerate(sWord, 1):
+        if c != sWord[i:i+1]:
+            sNewWord += c
+    return sNewWord.replace("eau", "o").replace("au", "o").replace("ai", "e").replace("ei", "e").replace("ph", "f")
+
+
+aVowel = set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ")
+aConsonant = set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ")
+aDouble = set("bcdfjklmnprstzBCDFJKLMNPRSTZ")  # letters that may be used twice successively
+
+
+# Similar chars
+
+d1to1 = {
+    "1": "liîLIÎ",
+    "2": "zZ",
+    "3": "eéèêEÉÈÊ",
+    "4": "aàâAÀÂ",
+    "5": "sgSG",
+    "6": "bdgBDG",
+    "7": "ltLT",
+    "8": "bB",
+    "9": "gbdGBD",
+    "0": "oôOÔ",
+
+    "a": "aàâáäæ",
+    "A": "AÀÂÁÄÆ",
+    "à": "aàâáäæ",
+    "À": "AÀÂÁÄÆ",
+    "â": "aàâáäæ",
+    "Â": "AÀÂÁÄÆ",
+    "á": "aàâáäæ",
+    "Á": "AÀÂÁÄÆ",
+    "ä": "aàâáäæ",
+    "Ä": "AÀÂÁÄÆ",
+
+    "æ": "æéa",
+    "Æ": "ÆÉA",
+
+    "c": "cçskqśŝ",
+    "C": "CÇSKQŚŜ",
+    "ç": "cçskqśŝ",
+    "Ç": "CÇSKQŚŜ",
+
+    "e": "eéèêëœ",
+    "E": "EÉÈÊËŒ",
+    "é": "eéèêëœ",
+    "É": "EÉÈÊËŒ",
+    "ê": "eéèêëœ",
+    "Ê": "EÉÈÊËŒ",
+    "è": "eéèêëœ",
+    "È": "EÉÈÊËŒ",
+    "ë": "eéèêëœ",
+    "Ë": "EÉÈÊËŒ",
+
+    "g": "gj",
+    "G": "GJ",
+    
+    "i": "iîïyíìÿ",
+    "I": "IÎÏYÍÌŸ",
+    "î": "iîïyíìÿ",
+    "Î": "IÎÏYÍÌŸ",
+    "ï": "iîïyíìÿ",
+    "Ï": "IÎÏYÍÌŸ",
+    "í": "iîïyíìÿ",
+    "Í": "IÎÏYÍÌŸ",
+    "ì": "iîïyíìÿ",
+    "Ì": "IÎÏYÍÌŸ",
+
+    "j": "jg",
+    "J": "JG",
+
+    "k": "kcq",
+    "K": "KCQ",
+
+    "n": "nñ",
+    "N": "NÑ",
+
+    "o": "oôóòöœ",
+    "O": "OÔÓÒÖŒ",
+    "ô": "oôóòöœ",
+    "Ô": "OÔÓÒÖŒ",
+    "ó": "oôóòöœ",
+    "Ó": "OÔÓÒÖŒ",
+    "ò": "oôóòöœ",
+    "Ò": "OÔÓÒÖŒ",
+    "ö": "oôóòöœ",
+    "Ö": "OÔÓÒÖŒ",
+
+    "œ": "œoôeéèêë",
+    "Œ": "ŒOÔEÉÈÊË",
+
+    "q": "qck",
+    "Q": "QCK",
+
+    "s": "sśŝcç",
+    "S": "SŚŜCÇ",
+    "ś": "sśŝcç",
+    "Ś": "SŚŜCÇ",
+    "ŝ": "sśŝcç",
+    "Ŝ": "SŚŜCÇ",
+
+    "u": "uûùüú",
+    "U": "UÛÙÜÚ",
+    "û": "uûùüú",
+    "Û": "UÛÙÜÚ",
+    "ù": "uûùüú",
+    "Ù": "UÛÙÜÚ",
+    "ü": "uûùüú",
+    "Ü": "UÛÙÜÚ",
+    "ú": "uûùüú",
+    "Ú": "UÛÙÜÚ",
+
+    "v": "vw",
+    "V": "VW",
+
+    "w": "wv",
+    "W": "WV",
+
+    "x": "xck",
+    "X": "XCK",
+
+    "y": "yÿiîŷýỳ",
+    "Y": "YŸIÎŶÝỲ",
+    "ÿ": "yÿiîŷýỳ",
+    "Ÿ": "YŸIÎŶÝỲ",
+    "ŷ": "yÿiîŷýỳ",
+    "Ŷ": "YŸIÎŶÝỲ",
+    "ý": "yÿiîŷýỳ",
+    "Ý": "YŸIÎŶÝỲ",
+    "ỳ": "yÿiîŷýỳ",
+    "Ỳ": "YŸIÎŶÝỲ",
+
+    "z": "zs",
+    "Z": "ZS",
+}
+
+d1toX = {
+    "æ": ("ae",),
+    "Æ": ("AE",),
+    "b": ("bb",),
+    "B": ("BB",),
+    "c": ("cc", "ss", "qu", "ch"),
+    "C": ("CC", "SS", "QU", "CH"),
+    "d": ("dd",),
+    "D": ("DD",),
+    "é": ("ai", "ei"),
+    "É": ("AI", "EI"),
+    "f": ("ff", "ph"),
+    "F": ("FF", "PH"),
+    "g": ("gu", "ge", "gg", "gh"),
+    "G": ("GU", "GE", "GG", "GH"),
+    "j": ("jj", "dj"),
+    "J": ("JJ", "DJ"),
+    "k": ("qu", "ck", "ch", "cu", "kk", "kh"),
+    "K": ("QU", "CK", "CH", "CU", "KK", "KH"),
+    "l": ("ll",),
+    "L": ("LL",),
+    "m": ("mm", "mn"),
+    "M": ("MM", "MN"),
+    "n": ("nn", "nm", "mn"),
+    "N": ("NN", "NM", "MN"),
+    "o": ("au", "eau"),
+    "O": ("AU", "EAU"),
+    "œ": ("oe", "eu"),
+    "Œ": ("OE", "EU"),
+    "p": ("pp", "ph"),
+    "P": ("PP", "PH"),
+    "q": ("qu", "ch", "cq", "ck", "kk"),
+    "Q": ("QU", "CH", "CQ", "CK", "KK"),
+    "r": ("rr",),
+    "R": ("RR",),
+    "s": ("ss", "sh"),
+    "S": ("SS", "SH"),
+    "t": ("tt", "th"),
+    "T": ("TT", "TH"),
+    "x": ("cc", "ct", "xx"),
+    "X": ("CC", "CT", "XX"),
+    "z": ("ss", "zh"),
+    "Z": ("SS", "ZH"),
+}
+
+
+def get1toXReplacement (cPrev, cCur, cNext):
+    if cCur in aConsonant  and  (cPrev in aConsonant  or  cNext in aConsonant):
+        return ()
+    return d1toX.get(cCur, ())
+
+
+d2toX = {
+    "am": ("an", "en", "em"),
+    "AM": ("AN", "EN", "EM"),
+    "an": ("am", "en", "em"),
+    "AN": ("AM", "EN", "EM"),
+    "au": ("eau", "o", "ô"),
+    "AU": ("EAU", "O", "Ô"),
+    "em": ("an", "am", "en"),
+    "EM": ("AN", "AM", "EN"),
+    "en": ("an", "am", "em"),
+    "EN": ("AN", "AM", "EM"),
+    "ai": ("ei", "é", "è", "ê", "ë"),
+    "AI": ("EI", "É", "È", "Ê", "Ë"),
+    "ei": ("ai", "é", "è", "ê", "ë"),
+    "EI": ("AI", "É", "È", "Ê", "Ë"),
+    "ch": ("sh", "c", "ss"),
+    "CH": ("SH", "C", "SS"),
+    "ct": ("x", "cc"),
+    "CT": ("X", "CC"),
+    "oa": ("oi",),
+    "OA": ("OI",),
+    "oi": ("oa", "oie"),
+    "OI": ("OA", "OIE"),
+    "ph": ("f",),
+    "PH": ("F",),
+    "qu": ("q", "cq", "ck", "c", "k"),
+    "QU": ("Q", "CQ", "CK", "C", "K"),
+    "ss": ("c", "ç"),
+    "SS": ("C", "Ç"),
+    "un": ("ein",),
+    "UN": ("EIN",),
+}
+
+
+# End of word
+
+dFinal1 = {
+    "a": ("as", "at", "ant", "ah"),
+    "A": ("AS", "AT", "ANT", "AH"),
+    "c": ("ch",),
+    "C": ("CH",),
+    "e": ("et", "er", "ets", "ée", "ez", "ai", "ais", "ait", "ent", "eh"),
+    "E": ("ET", "ER", "ETS", "ÉE", "EZ", "AI", "AIS", "AIT", "ENT", "EH"),
+    "é": ("et", "er", "ets", "ée", "ez", "ai", "ais", "ait"),
+    "É": ("ET", "ER", "ETS", "ÉE", "EZ", "AI", "AIS", "AIT"),
+    "è": ("et", "er", "ets", "ée", "ez", "ai", "ais", "ait"),
+    "È": ("ET", "ER", "ETS", "ÉE", "EZ", "AI", "AIS", "AIT"),
+    "ê": ("et", "er", "ets", "ée", "ez", "ai", "ais", "ait"),
+    "Ê": ("ET", "ER", "ETS", "ÉE", "EZ", "AI", "AIS", "AIT"),
+    "ë": ("et", "er", "ets", "ée", "ez", "ai", "ais", "ait"),
+    "Ë": ("ET", "ER", "ETS", "ÉE", "EZ", "AI", "AIS", "AIT"),
+    "g": ("gh",),
+    "G": ("GH",),
+    "i": ("is", "it", "ie", "in"),
+    "I": ("IS", "IT", "IE", "IN"),
+    "n": ("nt", "nd", "ns", "nh"),
+    "N": ("NT", "ND", "NS", "NH"),
+    "o": ("aut", "ot", "os"),
+    "O": ("AUT", "OT", "OS"),
+    "ô": ("aut", "ot", "os"),
+    "Ô": ("AUT", "OT", "OS"),
+    "ö": ("aut", "ot", "os"),
+    "Ö": ("AUT", "OT", "OS"),
+    "p": ("ph",),
+    "P": ("PH",),
+    "s": ("sh",),
+    "S": ("SH",),
+    "t": ("th",),
+    "T": ("TH",),
+    "u": ("ut", "us", "uh"),
+    "U": ("UT", "US", "UH"),
+}
+
+dFinal2 = {
+    "ai": ("aient", "ais", "et"),
+    "AI": ("AIENT", "AIS", "ET"),
+    "an": ("ant", "ent"),
+    "AN": ("ANT", "ENT"),
+    "en": ("ent", "ant"),
+    "EN": ("ENT", "ANT"),
+    "ei": ("ait", "ais"),
+    "EI": ("AIT", "AIS"),
+    "on": ("ons", "ont"),
+    "ON": ("ONS", "ONT"),
+    "oi": ("ois", "oit", "oix"),
+    "OI": ("OIS", "OIT", "OIX"),
+}
+
+
+# Préfixes et suffixes
+
+aPfx1 = frozenset([
+    "anti", "archi", "contre", "hyper", "mé", "méta", "im", "in", "ir", "par", "proto",
+    "pseudo", "pré", "re", "ré", "sans", "sous", "supra", "sur", "ultra"
+])
+aPfx2 = frozenset([
+    "belgo", "franco", "génito", "gynéco", "médico", "russo"
+])
+
+
+_zMotAvecPronom = re.compile("^(?i)(\\w+)(-(?:t-|)(?:ils?|elles?|on|je|tu|nous|vous))$")
+
+def cut (sWord):
+    "returns a tuple of strings (prefix, trimed_word, suffix)"
+    m = _zMotAvecPronom.search(sWord)
+    if m:
+        return ("", m.group(1), m.group(2))
+    return ("", sWord, "")
+
+
+# Other functions
+
+def filterSugg (aSugg):
+    "exclude suggestions"
+    return filter(lambda sSugg: not sSugg.endswith(("è", "È")), aSugg)

ADDED   graphspell/dawg.py
Index: graphspell/dawg.py
==================================================================
--- /dev/null
+++ graphspell/dawg.py
@@ -0,0 +1,775 @@
+#!python3
+
+# FSA DICTIONARY BUILDER
+#
+# by Olivier R.
+# License: MPL 2
+#
+# This tool encodes lexicon into an indexable binary dictionary 
+# Input files MUST be encoded in UTF-8.
+
+
+import sys
+import os
+import collections
+
+from . import str_transform as st
+from .progressbar import ProgressBar
+
+
+
+def readFile (spf):
+    print(" < Read lexicon: " + spf)
+    if os.path.isfile(spf):
+        with open(spf, "r", encoding="utf-8") as hSrc:
+            for sLine in hSrc:
+                sLine = sLine.strip()
+                if sLine and not sLine.startswith("#"):
+                    yield sLine
+    else:
+        raise OSError("# Error. File not found or not loadable: " + spf)
+
+
+def getElemsFromFile (spf):
+    "returns tuple of (flexion, stem, tags) from lexicon file"
+    nErr = 0
+    if not spf.endswith(".clex"):
+        for sLine in readFile(spf):
+            try:
+                sFlex, sStem, sTag = sLine.split("\t")
+                yield (sFlex, sStem, sTag)
+            except:
+                nErr += 1
+    else:
+        sTag = "_" # neutral tag
+        sTag2 = ""
+        for sLine in readFile(spf):
+            if sLine.startswith("[") and sLine.endswith("]"):
+                # tag line
+                if "-->" in sLine:
+                    try:
+                        sTag, sSfxCode, sTag2 = sLine[1:-1].split(" --> ")
+                    except:
+                        nErr += 1
+                        continue
+                    sTag = sTag.strip()
+                    sSfxCode = sSfxCode.strip()
+                    sTag2 = sTag2.strip()
+                else:
+                    sTag = sLine[1:-1]
+                    sTag2 = ""
+            else:
+                # entry line
+                if "\t" in sLine:
+                    if sLine.count("\t") > 1:
+                        nErr += 1
+                        continue
+                    sFlex, sStem = sLine.split("\t")
+                else:
+                    sFlex = sStem = sLine
+                #print(sFlex, sStem, sTag)
+                yield (sFlex, sStem, sTag)
+                if sTag2:
+                    sFlex2 = st.changeWordWithSuffixCode(sFlex, sSfxCode)
+                    #print(sFlex2, sStem, sTag2)
+                    yield (sFlex2, sStem, sTag2)
+    if nErr:
+        print(" # Lines ignored: {:>10}".format(nErr))
+
+
+
+class DAWG:
+    """DIRECT ACYCLIC WORD GRAPH"""
+    # This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)
+    # We store suffix/affix codes and tags within the graph after the “real” word.
+    # A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags]
+    # Each arc is an index in self.lArcVal, where are stored characters, suffix/affix codes for stemming and tags.
+    # Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final.
+
+    def __init__ (self, spfSrc, sLangName, cStemming):
+        print("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton =====")
+        cStemming = cStemming.upper()
+        if cStemming == "A":
+            funcStemmingGen = st.defineAffixCode
+        elif cStemming == "S":
+            funcStemmingGen = st.defineSuffixCode
+        elif cStemming == "N":
+            funcStemmingGen = st.noStemming
+        else:
+            raise ValueError("# Error. Unknown stemming code: {}".format(cStemming))
+
+        lEntry = []
+        lChar = ['']; dChar = {}; nChar = 1; dCharOccur = {}
+        lAff  = [];   dAff  = {}; nAff  = 0; dAffOccur = {}
+        lTag  = [];   dTag  = {}; nTag  = 0; dTagOccur = {}
+        nErr = 0
+        
+        # read lexicon
+        for sFlex, sStem, sTag in getElemsFromFile(spfSrc):
+            addWordToCharDict(sFlex)
+            # chars
+            for c in sFlex:
+                if c not in dChar:
+                    dChar[c] = nChar
+                    lChar.append(c)
+                    nChar += 1
+                dCharOccur[c] = dCharOccur.get(c, 0) + 1
+            # affixes to find stem from flexion
+            aff = funcStemmingGen(sFlex, sStem)
+            if aff not in dAff:
+                dAff[aff] = nAff
+                lAff.append(aff)
+                nAff += 1
+            dAffOccur[aff] = dCharOccur.get(aff, 0) + 1
+            # tags
+            if sTag not in dTag:
+                dTag[sTag] = nTag
+                lTag.append(sTag)
+                nTag += 1
+            dTagOccur[sTag] = dTagOccur.get(sTag, 0) + 1
+            lEntry.append((sFlex, dAff[aff], dTag[sTag]))
+        if not lEntry:
+            raise ValueError("# Error. Empty lexicon")
+        
+        # Preparing DAWG
+        print(" > Preparing list of words")
+        lVal = lChar + lAff + lTag
+        lWord = [ [dChar[c] for c in sFlex] + [iAff+nChar] + [iTag+nChar+nAff]  for sFlex, iAff, iTag in lEntry ]
+        lEntry = None
+        
+        # Dictionary of arc values occurrency, to sort arcs of each node
+        dValOccur = dict( [ (dChar[c], dCharOccur[c])  for c in dChar ] \
+                        + [ (dAff[aff]+nChar, dAffOccur[aff]) for aff in dAff ] \
+                        + [ (dTag[tag]+nChar+nAff, dTagOccur[tag]) for tag in dTag ] )
+        #with open(spfSrc[:-8]+".valuesfreq.txt", 'w', encoding='utf-8') as hFreqDst:  # DEBUG
+        #    for iKey, nOcc in sorted(dValOccur.items(), key=lambda t: t[1], reverse=True):
+        #        hFreqDst.write("{}: {}\n".format(lVal[iKey], nOcc))
+        #    hFreqDst.close()
+        
+        self.sFile = spfSrc
+        self.sLang = sLangName
+        self.nEntry = len(lWord)
+        self.aPreviousEntry = []
+        DawgNode.resetNextId()
+        self.oRoot = DawgNode()
+        self.lUncheckedNodes = []  # list of nodes that have not been checked for duplication.
+        self.lMinimizedNodes = {}  # list of unique nodes that have been checked for duplication.
+        self.lSortedNodes = []     # version 2 and 3
+        self.nNode = 0
+        self.nArc = 0
+        self.dChar = dChar
+        self.nChar = len(dChar)
+        self.nAff = nAff
+        self.lArcVal = lVal
+        self.nArcVal = len(lVal)
+        self.nTag = self.nArcVal - self.nChar - nAff
+        self.cStemming = cStemming
+        if cStemming == "A":
+            self.funcStemming = st.changeWordWithAffixCode
+        elif cStemming == "S":    
+            self.funcStemming = st.changeWordWithSuffixCode
+        else:
+            self.funcStemming = st.noStemming
+        
+        # build
+        lWord.sort()
+        oProgBar = ProgressBar(0, len(lWord))
+        for aEntry in lWord:
+            self.insert(aEntry)
+            oProgBar.increment(1)
+        oProgBar.done()
+        self.finish()
+        self.countNodes()
+        self.countArcs()
+        self.sortNodes()
+        self.sortNodeArcs(dValOccur)
+        #self.sortNodeArcs2 (self.oRoot, "")
+        self.displayInfo()
+
+    # BUILD DAWG
+    def insert (self, aEntry):
+        if aEntry < self.aPreviousEntry:
+            sys.exit("# Error: Words must be inserted in alphabetical order.")
+        
+        # find common prefix between word and previous word
+        nCommonPrefix = 0
+        for i in range(min(len(aEntry), len(self.aPreviousEntry))):
+            if aEntry[i] != self.aPreviousEntry[i]:
+                break
+            nCommonPrefix += 1
+
+        # Check the lUncheckedNodes for redundant nodes, proceeding from last
+        # one down to the common prefix size. Then truncate the list at that point.
+        self._minimize(nCommonPrefix)
+
+        # add the suffix, starting from the correct node mid-way through the graph
+        if len(self.lUncheckedNodes) == 0:
+            oNode = self.oRoot
+        else:
+            oNode = self.lUncheckedNodes[-1][2]
+
+        iChar = nCommonPrefix
+        for c in aEntry[nCommonPrefix:]:
+            oNextNode = DawgNode()
+            oNode.arcs[c] = oNextNode
+            self.lUncheckedNodes.append((oNode, c, oNextNode))
+            if iChar == (len(aEntry) - 2): 
+                oNode.final = True
+            iChar += 1
+            oNode = oNextNode
+        oNode.final = True
+        self.aPreviousEntry = aEntry
+
+    def finish (self):
+        "minimize unchecked nodes"
+        self._minimize(0)
+
+    def _minimize (self, downTo):
+        # proceed from the leaf up to a certain point
+        for i in range( len(self.lUncheckedNodes)-1, downTo-1, -1 ):
+            oNode, char, oChildNode = self.lUncheckedNodes[i]
+            if oChildNode in self.lMinimizedNodes:
+                # replace the child with the previously encountered one
+                oNode.arcs[char] = self.lMinimizedNodes[oChildNode]
+            else:
+                # add the state to the minimized nodes.
+                self.lMinimizedNodes[oChildNode] = oChildNode
+            self.lUncheckedNodes.pop()
+
+    def countNodes (self):
+        self.nNode = len(self.lMinimizedNodes)
+
+    def countArcs (self):
+        self.nArc = 0
+        for oNode in self.lMinimizedNodes:
+            self.nArc += len(oNode.arcs)
+    
+    def sortNodeArcs (self, dValOccur):
+        print(" > Sort node arcs")
+        self.oRoot.sortArcs(dValOccur)
+        for oNode in self.lMinimizedNodes:
+            oNode.sortArcs(dValOccur)
+    
+    def sortNodeArcs2 (self, oNode, cPrevious=""):
+        # recursive function
+        dCharOccur = getCharOrderAfterChar(cPrevious)
+        if dCharOccur:
+            oNode.sortArcs2(dCharOccur, self.lArcVal)
+        for nArcVal, oNextNode in oNode.arcs.items():
+            self.sortNodeArcs2(oNextNode, self.lArcVal[nArcVal])
+
+    def sortNodes (self):
+        print(" > Sort nodes")
+        for oNode in self.oRoot.arcs.values():
+            self._parseNodes(oNode)
+    
+    def _parseNodes (self, oNode):
+        # Warning: recursive method
+        if oNode.pos > 0:
+            return
+        oNode.setPos()
+        self.lSortedNodes.append(oNode)
+        for oNextNode in oNode.arcs.values():
+             self._parseNodes(oNextNode)
+        
+    def lookup (self, sWord):
+        oNode = self.oRoot
+        for c in sWord:
+            if self.dChar.get(c, '') not in oNode.arcs:
+                return False
+            oNode = oNode.arcs[self.dChar[c]]
+        return oNode.final
+
+    def morph (self, sWord):
+        oNode = self.oRoot
+        for c in sWord:
+            if self.dChar.get(c, '') not in oNode.arcs:
+                return ''
+            oNode = oNode.arcs[self.dChar[c]]
+        if oNode.final:
+            s = "* "
+            for arc in oNode.arcs:
+                if arc >= self.nChar:
+                    s += " [" + self.funcStemming(sWord, self.lArcVal[arc])
+                    oNode2 = oNode.arcs[arc]
+                    for arc2 in oNode2.arcs:
+                        s += " / " + self.lArcVal[arc2]
+                    s += "]"
+            return s
+        return ''
+
+    def displayInfo (self):
+        print(" * {:<12} {:>16,}".format("Entries:", self.nEntry))
+        print(" * {:<12} {:>16,}".format("Characters:", self.nChar))
+        print(" * {:<12} {:>16,}".format("Affixes:", self.nAff))
+        print(" * {:<12} {:>16,}".format("Tags:", self.nTag))
+        print(" * {:<12} {:>16,}".format("Arc values:", self.nArcVal))
+        print(" * {:<12} {:>16,}".format("Nodes:", self.nNode))
+        print(" * {:<12} {:>16,}".format("Arcs:", self.nArc))
+        print(" * {:<12} {:>16}".format("Stemming:", self.cStemming + "FX"))
+
+    def getArcStats (self):
+        d = {}
+        for oNode in self.lMinimizedNodes:
+            n = len(oNode.arcs)
+            d[n] = d.get(n, 0) + 1
+        s = " * Nodes:\n"
+        for n in d:
+            s = s + " {:>9} nodes have {:>3} arcs\n".format(d[n], n)
+        return s
+
+    def writeInfo (self, sPathFile):
+        print(" > Write informations")
+        with open(sPathFile, 'w', encoding='utf-8', newline="\n") as hDst:
+            hDst.write(self.getArcStats())
+            hDst.write("\n * Values:\n")
+            for i, s in enumerate(self.lArcVal):
+                hDst.write(" {:>6}. {}\n".format(i, s))
+            hDst.close()
+
+    # BINARY CONVERSION
+    def createBinary (self, sPathFile, nMethod, bDebug=False):
+        print(" > Write DAWG as an indexable binary dictionary [method: %d]" % nMethod)
+        if nMethod == 1:
+            self.nBytesArc = ( (self.nArcVal.bit_length() + 2) // 8 ) + 1   # We add 2 bits. See DawgNode.convToBytes1()
+            self._calcNumBytesNodeAddress()
+            self._calcNodesAddress1()
+        elif nMethod == 2:
+            self.nBytesArc = ( (self.nArcVal.bit_length() + 3) // 8 ) + 1   # We add 3 bits. See DawgNode.convToBytes2()
+            self._calcNumBytesNodeAddress()
+            self._calcNodesAddress2()
+        elif nMethod == 3:
+            self.nBytesArc = ( (self.nArcVal.bit_length() + 3) // 8 ) + 1   # We add 3 bits. See DawgNode.convToBytes3()
+            self.nBytesOffset = 1
+            self.nMaxOffset = (2 ** (self.nBytesOffset * 8)) - 1
+            self._calcNumBytesNodeAddress()
+            self._calcNodesAddress3()
+        else:
+            print(" # Error: unknown compression method")
+        print("   Arc values (chars, affixes and tags): {}  ->  {} bytes".format( self.nArcVal, len("\t".join(self.lArcVal).encode("utf-8")) ))
+        print("   Arc size: {} bytes, Address size: {} bytes   ->   {} * {} = {} bytes".format( self.nBytesArc, self.nBytesNodeAddress, \
+                                                                                                self.nBytesArc+self.nBytesNodeAddress, self.nArc, \
+                                                                                                (self.nBytesArc+self.nBytesNodeAddress)*self.nArc ))
+        self._writeBinary(sPathFile, nMethod)
+        if bDebug:
+            self._writeNodes(sPathFile, nMethod)
+
+    def _calcNumBytesNodeAddress (self):
+        "how many bytes needed to store all nodes/arcs in the binary dictionary"
+        self.nBytesNodeAddress = 1
+        while ((self.nBytesArc + self.nBytesNodeAddress) * self.nArc) > (2 ** (self.nBytesNodeAddress * 8)):
+            self.nBytesNodeAddress += 1
+
+    def _calcNodesAddress1 (self):
+        nBytesNode = self.nBytesArc + self.nBytesNodeAddress
+        iAddr = len(self.oRoot.arcs) * nBytesNode
+        for oNode in self.lMinimizedNodes:
+            oNode.addr = iAddr
+            iAddr += max(len(oNode.arcs), 1) * nBytesNode
+
+    def _calcNodesAddress2 (self):
+        nBytesNode = self.nBytesArc + self.nBytesNodeAddress
+        iAddr = len(self.oRoot.arcs) * nBytesNode
+        for oNode in self.lSortedNodes:
+            oNode.addr = iAddr
+            iAddr += max(len(oNode.arcs), 1) * nBytesNode
+            for oNextNode in oNode.arcs.values():
+                if (oNode.pos + 1) == oNextNode.pos:
+                    iAddr -= self.nBytesNodeAddress
+                    #break
+
+    def _calcNodesAddress3 (self):
+        nBytesNode = self.nBytesArc + self.nBytesNodeAddress
+        # theorical nodes size if only addresses and no offset
+        self.oRoot.size = len(self.oRoot.arcs) * nBytesNode
+        for oNode in self.lSortedNodes:
+            oNode.size = max(len(oNode.arcs), 1) * nBytesNode
+        # rewind and calculate dropdown from the end, several times
+        nDiff = self.nBytesNodeAddress - self.nBytesOffset
+        bEnd = False
+        while not bEnd:
+            bEnd = True
+            # recalculate addresses
+            iAddr = self.oRoot.size
+            for oNode in self.lSortedNodes:
+                oNode.addr = iAddr
+                iAddr += oNode.size
+            # rewind and calculate dropdown from the end, several times
+            for i in range(self.nNode-1, -1, -1):
+                nSize = max(len(self.lSortedNodes[i].arcs), 1) * nBytesNode
+                for oNextNode in self.lSortedNodes[i].arcs.values():
+                    if 1 < (oNextNode.addr - self.lSortedNodes[i].addr) < self.nMaxOffset:
+                        nSize -= nDiff
+                if self.lSortedNodes[i].size != nSize:
+                    self.lSortedNodes[i].size = nSize
+                    bEnd = False
+
+    def _writeBinary (self, sPathFile, nMethod):
+        """
+        Format of the binary indexable dictionary:
+        Each section is separated with 4 bytes of \0
+        
+        - Section Header:
+            /pyfsa/[version]
+                * version is an ASCII string
+        
+        - Section Informations:
+            /[tag_lang]
+            /[number of chars]
+            /[number of bytes for each arc]
+            /[number of bytes for each address node]
+            /[number of entries]
+            /[number of nodes]
+            /[number of arcs]
+            /[number of affixes]
+                * each field is a ASCII string
+            /[stemming code]
+                * "S" means stems are generated by /suffix_code/, "A" means they are generated by /affix_code/
+                  See defineSuffixCode() and defineAffixCode() for details.
+                  "N" means no stemming
+        
+        - Section Values:
+                * a list of strings encoded in binary from utf-8, each value separated with a tabulation
+        
+        - Section Word Graph (nodes / arcs)
+                * A list of nodes which are a list of arcs with an address of the next node.
+                  See DawgNode.convToBytes() for details.
+        """
+        if not sPathFile.endswith(".bdic"):
+            sPathFile += "."+str(nMethod)+".bdic"
+        with open(sPathFile, 'wb') as hDst:
+            # header
+            hDst.write("/pyfsa/{}/".format(nMethod).encode("utf-8"))
+            hDst.write(b"\0\0\0\0")
+            # infos
+            hDst.write("{}/{}/{}/{}/{}/{}/{}/{}/{}".format(self.sLang, self.nChar, self.nBytesArc, self.nBytesNodeAddress, \
+                                                           self.nEntry, self.nNode, self.nArc, self.nAff, self.cStemming).encode("utf-8"))
+            hDst.write(b"\0\0\0\0")
+            # lArcVal
+            hDst.write("\t".join(self.lArcVal).encode("utf-8"))
+            hDst.write(b"\0\0\0\0")
+            # DAWG: nodes / arcs
+            if nMethod == 1:
+                hDst.write(self.oRoot.convToBytes1(self.nBytesArc, self.nBytesNodeAddress))
+                for oNode in self.lMinimizedNodes:
+                    hDst.write(oNode.convToBytes1(self.nBytesArc, self.nBytesNodeAddress))
+            elif nMethod == 2:
+                hDst.write(self.oRoot.convToBytes2(self.nBytesArc, self.nBytesNodeAddress))
+                for oNode in self.lSortedNodes:
+                    hDst.write(oNode.convToBytes2(self.nBytesArc, self.nBytesNodeAddress))
+            elif nMethod == 3:
+                hDst.write(self.oRoot.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset))
+                for oNode in self.lSortedNodes:
+                    hDst.write(oNode.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset))
+            hDst.close()
+
+    def _writeNodes (self, sPathFile, nMethod):
+        "for debugging only"
+        print(" > Write nodes")
+        with open(sPathFile+".nodes."+str(nMethod)+".txt", 'w', encoding='utf-8', newline="\n") as hDst:
+            if nMethod == 1:
+                hDst.write(self.oRoot.getTxtRepr1(self.nBytesArc, self.nBytesNodeAddress, self.lArcVal)+"\n")
+                #hDst.write( ''.join( [ "%02X " %  z  for z in self.oRoot.convToBytes1(self.nBytesArc, self.nBytesNodeAddress) ] ).strip() )
+                for oNode in self.lMinimizedNodes:
+                    hDst.write(oNode.getTxtRepr1(self.nBytesArc, self.nBytesNodeAddress, self.lArcVal)+"\n")
+            if nMethod == 2:
+                hDst.write(self.oRoot.getTxtRepr2(self.nBytesArc, self.nBytesNodeAddress, self.lArcVal)+"\n")
+                for oNode in self.lSortedNodes:
+                    hDst.write(oNode.getTxtRepr2(self.nBytesArc, self.nBytesNodeAddress, self.lArcVal)+"\n")
+            if nMethod == 3:
+                hDst.write(self.oRoot.getTxtRepr3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset, self.lArcVal)+"\n")
+                #hDst.write( ''.join( [ "%02X " %  z  for z in self.oRoot.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset) ] ).strip() )
+                for oNode in self.lSortedNodes:
+                    hDst.write(oNode.getTxtRepr3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset, self.lArcVal)+"\n")
+            hDst.close()
+    
+    def writeResults (self, sPathFile):
+        bFileExits = os.path.isfile("_lexicons.res.txt")
+        with open("_lexicons.res.txt", "a", encoding='utf-8', newline="\n") as hDst:
+            sFormat1 = "{:<12} {:>12} {:>5} {:>8} {:>8} {:>6} {:>8} {:>9} {:>9} {:>15} {:>12} {:>12}\n"
+            sFormat2 = "{:<12} {:>12,} {:>5,} {:>8,} {:>8} {:>6,} {:>8,} {:>9,} {:>9,} {:>15,} {:>12,} {:>12,}\n"
+            if not bFileExits:
+                hDst.write(sFormat1.format("Lexicon", "Entries", "Chars", "Affixes", "Stemming", "Tags", "Values", "Nodes", "Arcs", "Lexicon (Kb)", "Dict (Kb)", "LT Dict (Kb)"))
+            hDst.write(sFormat2.format(self.sLang, self.nEntry, self.nChar, self.nAff, self.cStemming + "FX", self.nTag, self.nArcVal, \
+                                       self.nNode, self.nArc, os.path.getsize(self.sFile), os.path.getsize(sPathFile), \
+                                       os.path.getsize("cfsa/dict/{}.dict".format(self.sLang)) if os.path.isfile("cfsa/dict/{}.dict".format(self.sLang)) else 0))
+            hDst.close()
+
+
+
+class DawgNode:
+    NextId = 0
+    NextPos = 1 # (version 2)
+    
+    def __init__ (self):
+        self.i = DawgNode.NextId
+        DawgNode.NextId += 1
+        self.final = False
+        self.arcs = {}          # key: arc value; value: a node
+        self.addr = 0           # address in the binary dictionary
+        self.pos = 0            # position in the binary dictionary (version 2)
+        self.size = 0           # size of node in bytes (version 3)
+
+    @classmethod
+    def resetNextId (cls):
+        cls.NextId = 0
+
+    def setPos (self): # version 2
+        self.pos = DawgNode.NextPos
+        DawgNode.NextPos += 1
+
+    def __str__ (self):
+        # Caution! this function is used for hashing and comparison!
+        l = []
+        if self.final: 
+            l.append("1")
+        else:
+            l.append("0")
+        for (key, node) in self.arcs.items():
+            l.append(str(key))
+            l.append(str(node.i))
+        return "_".join(l)
+
+    def __hash__ (self):
+        # Used as a key in a python dictionary.
+        return self.__str__().__hash__()
+
+    def __eq__ (self, other):
+        # Used as a key in a python dictionary.
+        # Nodes are equivalent if they have identical arcs, and each identical arc leads to identical states.
+        return self.__str__() == other.__str__()
+
+    def sortArcs (self, dValOccur):
+        self.arcs = collections.OrderedDict(sorted(self.arcs.items(), key=lambda t: dValOccur.get(t[0], 0), reverse=True))
+
+    def sortArcs2 (self, dValOccur, lArcVal):
+        self.arcs = collections.OrderedDict(sorted(self.arcs.items(), key=lambda t: dValOccur.get(lArcVal[t[0]], 0), reverse=True))
+
+    # VERSION 1 =====================================================================================================
+    def convToBytes1 (self, nBytesArc, nBytesNodeAddress):
+        """
+        Node scheme:
+        - Arc length is defined by nBytesArc
+        - Address length is defined by nBytesNodeAddress
+                                       
+        |                Arc                |                         Address of next node                          |
+        |                                   |                                                                       |
+         /---------------\ /---------------\ /---------------\ /---------------\ /---------------\ /---------------\
+         | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+         \---------------/ \---------------/ \---------------/ \---------------/ \---------------/ \---------------/
+         [...]
+         /---------------\ /---------------\ /---------------\ /---------------\ /---------------\ /---------------\
+         | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+         \---------------/ \---------------/ \---------------/ \---------------/ \---------------/ \---------------/
+          ^ ^
+          | |
+          | |
+          |  \___ if 1, last arc of this node
+           \_____ if 1, this node is final (only on the first arc)
+        """
+        nArc = len(self.arcs)
+        nFinalNodeMask = 1 << ((nBytesArc*8)-1)
+        nFinalArcMask = 1 << ((nBytesArc*8)-2)
+        if len(self.arcs) == 0:
+            val = nFinalNodeMask | nFinalArcMask
+            by = val.to_bytes(nBytesArc, byteorder='big')
+            by += (0).to_bytes(nBytesNodeAddress, byteorder='big')
+            return by
+        by = b""
+        for i, arc in enumerate(self.arcs, 1):
+            val = arc
+            if i == 1 and self.final:
+                val = val | nFinalNodeMask
+            if i == nArc:
+                val = val | nFinalArcMask
+            by += val.to_bytes(nBytesArc, byteorder='big')
+            by += self.arcs[arc].addr.to_bytes(nBytesNodeAddress, byteorder='big')
+        return by
+        
+    def getTxtRepr1 (self, nBytesArc, nBytesNodeAddress, lVal):
+        nArc = len(self.arcs)
+        nFinalNodeMask = 1 << ((nBytesArc*8)-1)
+        nFinalArcMask = 1 << ((nBytesArc*8)-2)
+        s = "i{:_>10} -- #{:_>10}\n".format(self.i, self.addr)
+        if len(self.arcs) == 0:
+            s += "  {:<20}  {:0>16}  i{:_>10}   #{:_>10}\n".format("", bin(nFinalNodeMask | nFinalArcMask)[2:], "0", "0")
+            return s
+        for i, arc in enumerate(self.arcs, 1):
+            val = arc
+            if i == 1 and self.final:
+                val = val | nFinalNodeMask
+            if i == nArc:
+                val = val | nFinalArcMask
+            s += "  {:<20}  {:0>16}  i{:_>10}   #{:_>10}\n".format(lVal[arc], bin(val)[2:], self.arcs[arc].i, self.arcs[arc].addr)
+        return s
+
+    # VERSION 2 =====================================================================================================
+    def convToBytes2 (self, nBytesArc, nBytesNodeAddress):
+        """
+        Node scheme:
+        - Arc length is defined by nBytesArc
+        - Address length is defined by nBytesNodeAddress
+                                       
+        |                Arc                |                         Address of next node                          |
+        |                                   |                                                                       |
+         /---------------\ /---------------\ /---------------\ /---------------\ /---------------\ /---------------\
+         | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+         \---------------/ \---------------/ \---------------/ \---------------/ \---------------/ \---------------/
+         [...]
+         /---------------\ /---------------\ /---------------\ /---------------\ /---------------\ /---------------\
+         | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+         \---------------/ \---------------/ \---------------/ \---------------/ \---------------/ \---------------/
+          ^ ^ ^
+          | | |
+          | |  \_ if 1, caution, no address: next node is the following node
+          |  \___ if 1, last arc of this node
+           \_____ if 1, this node is final (only on the first arc)
+        """
+        nArc = len(self.arcs)
+        nFinalNodeMask = 1 << ((nBytesArc*8)-1)
+        nFinalArcMask = 1 << ((nBytesArc*8)-2)
+        nNextNodeMask = 1 << ((nBytesArc*8)-3)
+        if len(self.arcs) == 0:
+            val = nFinalNodeMask | nFinalArcMask
+            by = val.to_bytes(nBytesArc, byteorder='big')
+            by += (0).to_bytes(nBytesNodeAddress, byteorder='big')
+            return by
+        by = b""
+        for i, arc in enumerate(self.arcs, 1):
+            val = arc
+            if i == 1 and self.final:
+                val = val | nFinalNodeMask
+            if i == nArc:
+                val = val | nFinalArcMask
+            if (self.pos + 1) == self.arcs[arc].pos and self.i != 0:
+                val = val | nNextNodeMask
+                by += val.to_bytes(nBytesArc, byteorder='big')
+            else:
+                by += val.to_bytes(nBytesArc, byteorder='big')
+                by += self.arcs[arc].addr.to_bytes(nBytesNodeAddress, byteorder='big')
+        return by
+        
+    def getTxtRepr2 (self, nBytesArc, nBytesNodeAddress, lVal):
+        nArc = len(self.arcs)
+        nFinalNodeMask = 1 << ((nBytesArc*8)-1)
+        nFinalArcMask = 1 << ((nBytesArc*8)-2)
+        nNextNodeMask = 1 << ((nBytesArc*8)-3)
+        s = "i{:_>10} -- #{:_>10}\n".format(self.i, self.addr)
+        if nArc == 0:
+            s += "  {:<20}  {:0>16}  i{:_>10}   #{:_>10}\n".format("", bin(nFinalNodeMask | nFinalArcMask)[2:], "0", "0")
+            return s
+        for i, arc in enumerate(self.arcs, 1):
+            val = arc
+            if i == 1 and self.final:
+                val = val | nFinalNodeMask
+            if i == nArc:
+                val = val | nFinalArcMask
+            if (self.pos + 1) == self.arcs[arc].pos  and self.i != 0:
+                val = val | nNextNodeMask
+                s += "  {:<20}  {:0>16}\n".format(lVal[arc], bin(val)[2:], "")
+            else:
+                s += "  {:<20}  {:0>16}  i{:_>10}   #{:_>10}\n".format(lVal[arc], bin(val)[2:], self.arcs[arc].i, self.arcs[arc].addr)
+        return s
+
+    # VERSION 3 =====================================================================================================
+    def convToBytes3 (self, nBytesArc, nBytesNodeAddress, nBytesOffset):
+        """
+        Node scheme:
+        - Arc length is defined by nBytesArc
+        - Address length is defined by nBytesNodeAddress
+        - Offset length is defined by nBytesOffset
+                                       
+        |                Arc                |            Address of next node  or  offset to next node              |
+        |                                   |                                                                       |
+         /---------------\ /---------------\ /---------------\ /---------------\ /---------------\ /---------------\
+         |1|0|0| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+         \---------------/ \---------------/ \---------------/ \---------------/ \---------------/ \---------------/
+         [...]
+         /---------------\ /---------------\ /---------------\
+         |0|0|1| | | | | | | | | | | | | | | | | | | | | | | |     Offsets are shorter than addresses
+         \---------------/ \---------------/ \---------------/ 
+         /---------------\ /---------------\ /---------------\ /---------------\ /---------------\ /---------------\
+         |0|1|0| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+         \---------------/ \---------------/ \---------------/ \---------------/ \---------------/ \---------------/
+
+          ^ ^ ^
+          | | |
+          | |  \_ if 1, offset instead of address of next node
+          |  \___ if 1, last arc of this node
+           \_____ if 1, this node is final (only on the first arc)
+        """
+        nArc = len(self.arcs)
+        nFinalNodeMask = 1 << ((nBytesArc*8)-1)
+        nFinalArcMask = 1 << ((nBytesArc*8)-2)
+        nNextNodeMask = 1 << ((nBytesArc*8)-3)
+        nMaxOffset = (2 ** (nBytesOffset * 8)) - 1
+        if nArc == 0:
+            val = nFinalNodeMask | nFinalArcMask
+            by = val.to_bytes(nBytesArc, byteorder='big')
+            by += (0).to_bytes(nBytesNodeAddress, byteorder='big')
+            return by
+        by = b""
+        for i, arc in enumerate(self.arcs, 1):
+            val = arc
+            if i == 1 and self.final:
+                val = val | nFinalNodeMask
+            if i == nArc:
+                val = val | nFinalArcMask
+            if 1 < (self.arcs[arc].addr - self.addr) < nMaxOffset and self.i != 0:
+                val = val | nNextNodeMask
+                by += val.to_bytes(nBytesArc, byteorder='big')
+                by += (self.arcs[arc].addr-self.addr).to_bytes(nBytesOffset, byteorder='big')
+            else:
+                by += val.to_bytes(nBytesArc, byteorder='big')
+                by += self.arcs[arc].addr.to_bytes(nBytesNodeAddress, byteorder='big')
+        return by
+        
+    def getTxtRepr3 (self, nBytesArc, nBytesNodeAddress, nBytesOffset, lVal):
+        nArc = len(self.arcs)
+        nFinalNodeMask = 1 << ((nBytesArc*8)-1)
+        nFinalArcMask = 1 << ((nBytesArc*8)-2)
+        nNextNodeMask = 1 << ((nBytesArc*8)-3)
+        nMaxOffset = (2 ** (nBytesOffset * 8)) - 1
+        s = "i{:_>10} -- #{:_>10}  ({})\n".format(self.i, self.addr, self.size)
+        if nArc == 0:
+            s += "  {:<20}  {:0>16}  i{:_>10}   #{:_>10}\n".format("", bin(nFinalNodeMask | nFinalArcMask)[2:], "0", "0")
+            return s
+        for i, arc in enumerate(self.arcs, 1):
+            val = arc
+            if i == 1 and self.final:
+                val = val | nFinalNodeMask
+            if i == nArc:
+                val = val | nFinalArcMask
+            if 1 < (self.arcs[arc].addr - self.addr) < nMaxOffset and self.i != 0:
+                val = val | nNextNodeMask
+                s += "  {:<20}  {:0>16}  i{:_>10}   +{:_>10}\n".format(lVal[arc], bin(val)[2:], self.arcs[arc].i, self.arcs[arc].addr - self.addr)
+            else:
+                s += "  {:<20}  {:0>16}  i{:_>10}   #{:_>10}\n".format(lVal[arc], bin(val)[2:], self.arcs[arc].i, self.arcs[arc].addr)
+        return s
+
+
+
+# Another attempt to sort node arcs
+
+_dCharOrder = {
+    # key: previous char, value: dictionary of chars {c: nValue}
+    "": {}
+}
+
+
+def addWordToCharDict (sWord):
+    cPrevious = ""
+    for cChar in sWord:
+        if cPrevious not in _dCharOrder:
+            _dCharOrder[cPrevious] = {}
+        _dCharOrder[cPrevious][cChar] = _dCharOrder[cPrevious].get(cChar, 0) + 1
+        cPrevious = cChar
+
+
+def getCharOrderAfterChar (cChar):
+    return _dCharOrder.get(cChar, None)
+
+
+def displayCharOrder ():
+    for key, value in _dCharOrder.items():
+        print("[" + key + "]: ", ", ".join([ c+":"+str(n)  for c, n  in  sorted(value.items(), key=lambda t: t[1], reverse=True) ]))

ADDED   graphspell/echo.py
Index: graphspell/echo.py
==================================================================
--- /dev/null
+++ graphspell/echo.py
@@ -0,0 +1,29 @@
+#!python3
+
+# The most boring yet indispensable function: print!
+
+
+import sys
+
+
+_CHARMAP = str.maketrans({  'œ': 'ö',  'Œ': 'Ö',  'ʳ': "r",  'ᵉ': "e",  '…': "_",  \
+                            '“': '"',  '”': '"',  '„': '"',  '‘': "'",  '’': "'",  \
+                            'ā': 'â',  'Ā': 'Â',  'ē': 'ê',  'Ē': 'Ê',  'ī': 'î',  'Ī': 'Î',  \
+                            'ō': 'ô',  'Ō': 'Ô',  'ū': 'û',  'Ū': 'Û',  'Ÿ': 'Y',  \
+                            'ś': 's',  'ŝ': 's',  \
+                            '—': '-',  '–': '-'
+                         })
+
+
+def echo (obj, sep=' ', end='\n', file=sys.stdout, flush=False):
+    """ Print for Windows to avoid Python crashes.
+        Encoding depends on Windows locale. No useful standard.
+        Always returns True (useful for debugging)."""
+    if sys.platform != "win32":
+        print(obj, sep=sep, end=end, file=file, flush=flush)
+        return True
+    try:
+        print(str(obj).translate(_CHARMAP), sep=sep, end=end, file=file, flush=flush)
+    except:
+        print(str(obj).encode('ascii', 'replace').decode('ascii', 'replace'), sep=sep, end=end, file=file, flush=flush)
+    return True

ADDED   graphspell/ibdawg.py
Index: graphspell/ibdawg.py
==================================================================
--- /dev/null
+++ graphspell/ibdawg.py
@@ -0,0 +1,720 @@
+#!python3
+
+import os
+import traceback
+import pkgutil
+import re
+from functools import wraps
+import time
+
+#import logging
+#logging.basicConfig(filename="suggestions.log", level=logging.DEBUG)
+
+from . import str_transform as st
+from . import char_player as cp
+from .echo import echo
+
+
+def timethis (func):
+    "decorator for the execution time"
+    @wraps(func)
+    def wrapper (*args, **kwargs):
+        fStart = time.time()
+        result = func(*args, **kwargs)
+        fEnd = time.time()
+        print(func.__name__, fEnd - fStart)
+        return result
+    return wrapper
+
+
+class SuggResult:
+    """Structure for storing, classifying and filtering suggestions"""
+
+    def __init__ (self, sWord, nDistLimit=-1):
+        self.sWord = sWord
+        self.sSimplifiedWord = cp.simplifyWord(sWord)
+        self.nDistLimit = nDistLimit  if nDistLimit >= 0  else  (len(sWord) // 3) + 1
+        self.nMinDist = 1000
+        self.aSugg = set()
+        self.dSugg = { 0: [],  1: [],  2: [] }
+
+    def addSugg (self, sSugg, nDeep=0):
+        "add a suggestion"
+        #logging.info((nDeep * "  ") + "__" + sSugg + "__")
+        if sSugg not in self.aSugg:
+            nDist = st.distanceDamerauLevenshtein(self.sSimplifiedWord, cp.simplifyWord(sSugg))
+            if nDist <= self.nDistLimit:
+                if nDist not in self.dSugg:
+                    self.dSugg[nDist] = []
+                self.dSugg[nDist].append(sSugg)
+                self.aSugg.add(sSugg)
+                if nDist < self.nMinDist:
+                    self.nMinDist = nDist
+                self.nDistLimit = min(self.nDistLimit, self.nMinDist+2)
+
+    def getSuggestions (self, nSuggLimit=10, nDistLimit=-1):
+        "return a list of suggestions"
+        lRes = []
+        if self.dSugg[0]:
+            # we sort the better results with the original word
+            self.dSugg[0].sort(key=lambda sSugg: st.distanceDamerauLevenshtein(self.sWord, sSugg))
+        for lSugg in self.dSugg.values():
+            lRes.extend(lSugg)
+            if len(lRes) > nSuggLimit:
+                break
+        lRes = list(cp.filterSugg(lRes))
+        if self.sWord.istitle():
+            lRes = list(map(lambda sSugg: sSugg.title(), lRes))
+        elif self.sWord.isupper():
+            lRes = list(map(lambda sSugg: sSugg.upper(), lRes))
+        return lRes[:nSuggLimit]
+
+    def reset (self):
+        self.aSugg.clear()
+        self.dSugg.clear()
+
+
+class IBDAWG:
+    """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""
+
+    def __init__ (self, sDicName):
+        self.by = pkgutil.get_data(__package__, "_dictionaries/" + sDicName)
+        if not self.by:
+            raise OSError("# Error. File not found or not loadable: "+sDicName)
+
+        if self.by[0:7] != b"/pyfsa/":
+            raise TypeError("# Error. Not a pyfsa binary dictionary. Header: {}".format(self.by[0:9]))
+        if not(self.by[7:8] == b"1" or self.by[7:8] == b"2" or self.by[7:8] == b"3"):
+            raise ValueError("# Error. Unknown dictionary version: {}".format(self.by[7:8]))
+        try:
+            header, info, values, bdic = self.by.split(b"\0\0\0\0", 3)
+        except Exception:
+            raise Exception
+
+        self.sName = sDicName
+        self.nVersion = int(self.by[7:8].decode("utf-8"))
+        self.sHeader = header.decode("utf-8")
+        self.lArcVal = values.decode("utf-8").split("\t")
+        self.nArcVal = len(self.lArcVal)
+        self.byDic = bdic
+
+        l = info.decode("utf-8").split("/")
+        self.sLang = l[0]
+        self.nChar = int(l[1])
+        self.nBytesArc = int(l[2])
+        self.nBytesNodeAddress = int(l[3])
+        self.nEntries = int(l[4])
+        self.nNode = int(l[5])
+        self.nArc = int(l[6])
+        self.nAff = int(l[7])
+        self.cStemming = l[8]
+        if self.cStemming == "S":
+            self.funcStemming = st.changeWordWithSuffixCode
+        elif self.cStemming == "A":
+            self.funcStemming = st.changeWordWithAffixCode
+        else:
+            self.funcStemming = st.noStemming
+        self.nTag = self.nArcVal - self.nChar - self.nAff
+        # <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
+        self.dChar = {}
+        for i in range(1, self.nChar):
+            self.dChar[self.lArcVal[i]] = i
+        self.dCharVal = { v: k  for k, v in self.dChar.items() }
+            
+        self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1
+        self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1)
+        self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2)
+        self._addrBitMask = 1 << ((self.nBytesArc * 8) - 3)  # version 2
+
+        self.nBytesOffset = 1 # version 3
+
+        # Configuring DAWG functions according to nVersion
+        if self.nVersion == 1:
+            self.morph = self._morph1
+            self.stem = self._stem1
+            self._lookupArcNode = self._lookupArcNode1
+            self._getArcs = self._getArcs1
+            self._writeNodes = self._writeNodes1
+        elif self.nVersion == 2:
+            self.morph = self._morph2
+            self.stem = self._stem2
+            self._lookupArcNode = self._lookupArcNode2
+            self._getArcs = self._getArcs2
+            self._writeNodes = self._writeNodes2
+        elif self.nVersion == 3:
+            self.morph = self._morph3
+            self.stem = self._stem3
+            self._lookupArcNode = self._lookupArcNode3
+            self._getArcs = self._getArcs3
+            self._writeNodes = self._writeNodes3
+        else:
+            raise ValueError("  # Error: unknown code: {}".format(self.nVersion))
+
+        self.bOptNumSigle = False
+        self.bOptNumAtLast = False
+
+    def getInfo (self):
+        return  "  Language: {0.sLang:>10}      Version: {0.nVersion:>2}      Stemming: {0.cStemming}FX\n" \
+                "  Arcs values:  {0.nArcVal:>10,} = {0.nChar:>5,} characters,  {0.nAff:>6,} affixes,  {0.nTag:>6,} tags\n" \
+                "  Dictionary: {0.nEntries:>12,} entries,    {0.nNode:>11,} nodes,   {0.nArc:>11,} arcs\n" \
+                "  Address size: {0.nBytesNodeAddress:>1} bytes,  Arc size: {0.nBytesArc:>1} bytes\n".format(self)
+
+    def writeAsJSObject (self, spfDest, bInJSModule=False, bBinaryDictAsHexString=False):
+        "write IBDAWG as a JavaScript object in a JavaScript module"
+        import json
+        with open(spfDest, "w", encoding="utf-8", newline="\n") as hDst:
+            if bInJSModule:
+                hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ')
+            hDst.write(json.dumps({
+                            "sName": self.sName,
+                            "nVersion": self.nVersion,
+                            "sHeader": self.sHeader,
+                            "lArcVal": self.lArcVal,
+                            "nArcVal": self.nArcVal,
+                            # JavaScript is a pile of shit, so Mozilla’s JS parser don’t like file bigger than 4 Mb!
+                            # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension.
+                            # https://github.com/mozilla/addons-linter/issues/1361
+                            "byDic": self.byDic.hex()  if bBinaryDictAsHexString  else [ e  for e in self.byDic ],
+                            "sLang": self.sLang,
+                            "nChar": self.nChar,
+                            "nBytesArc": self.nBytesArc,
+                            "nBytesNodeAddress": self.nBytesNodeAddress,
+                            "nEntries": self.nEntries,
+                            "nNode": self.nNode,
+                            "nArc": self.nArc,
+                            "nAff": self.nAff,
+                            "cStemming": self.cStemming,
+                            "nTag": self.nTag,
+                            "dChar": self.dChar,
+                            "_arcMask": self._arcMask,
+                            "_finalNodeMask": self._finalNodeMask,
+                            "_lastArcMask": self._lastArcMask,
+                            "_addrBitMask": self._addrBitMask,
+                            "nBytesOffset": self.nBytesOffset
+                        }, ensure_ascii=False))
+            if bInJSModule:
+                hDst.write(";\n\nexports.dictionary = dictionary;\n")
+
+    def isValidToken (self, sToken):
+        "checks if <sToken> is valid (if there is hyphens in <sToken>, <sToken> is split, each part is checked)"
+        if self.isValid(sToken):
+            return True
+        if "-" in sToken:
+            if sToken.count("-") > 4:
+                return True
+            return all(self.isValid(sWord)  for sWord in sToken.split("-"))
+        return False
+
+    def isValid (self, sWord):
+        "checks if <sWord> is valid (different casing tested if the first letter is a capital)"
+        if not sWord:
+            return None
+        if "’" in sWord: # ugly hack
+            sWord = sWord.replace("’", "'")
+        if self.lookup(sWord):
+            return True
+        if sWord[0:1].isupper():
+            if len(sWord) > 1:
+                if sWord.istitle():
+                    return self.lookup(sWord.lower())
+                if sWord.isupper():
+                    if self.bOptNumSigle:
+                        return True
+                    return self.lookup(sWord.lower()) or self.lookup(sWord.capitalize())
+                return self.lookup(sWord[:1].lower() + sWord[1:])
+            else:
+                return self.lookup(sWord.lower())
+        return False
+
+    def lookup (self, sWord):
+        "returns True if <sWord> in dictionary (strict verification)"
+        iAddr = 0
+        for c in sWord:
+            if c not in self.dChar:
+                return False
+            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
+            if iAddr == None:
+                return False
+        return bool(int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask)
+
+    def getMorph (self, sWord):
+        "retrieves morphologies list, different casing allowed"
+        l = self.morph(sWord)
+        if sWord[0:1].isupper():
+            l.extend(self.morph(sWord.lower()))
+            if sWord.isupper() and len(sWord) > 1:
+                l.extend(self.morph(sWord.capitalize()))
+        return l
+
+    #@timethis
+    def suggest (self, sWord, nSuggLimit=10):
+        "returns a set of suggestions for <sWord>"
+        sPfx, sWord, sSfx = cp.cut(sWord)
+        nMaxSwitch = max(len(sWord) // 3, 1)
+        nMaxDel = len(sWord) // 5
+        nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
+        oSuggResult = SuggResult(sWord)
+        self._suggest(oSuggResult, sWord, nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)
+        if sWord.istitle():
+            self._suggest(oSuggResult, sWord.lower(), nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)
+        elif sWord.islower():
+            self._suggest(oSuggResult, sWord.title(), nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)
+        aSugg = oSuggResult.getSuggestions(nSuggLimit)
+        if sSfx or sPfx:
+            # we add what we removed
+            return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
+        return aSugg
+
+    def _suggest (self, oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
+        # recursive function
+        #logging.info((nDeep * "  ") + sNewWord + ":" + sRemain)
+        if not sRemain:
+            if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
+                oSuggResult.addSugg(sNewWord, nDeep)
+            for sTail in self._getTails(iAddr):
+                oSuggResult.addSugg(sNewWord+sTail, nDeep)
+            return
+        cCurrent = sRemain[0:1]
+        for cChar, jAddr in self._getCharArcs(iAddr):
+            if cChar in cp.d1to1.get(cCurrent, cCurrent):
+                self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, jAddr, sNewWord+cChar)
+            elif not bAvoidLoop and nMaxHardRepl:
+                self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl-1, nDeep+1, jAddr, sNewWord+cChar, True)
+        if not bAvoidLoop: # avoid infinite loop
+            if len(sRemain) > 1:
+                if cCurrent == sRemain[1:2]:
+                    # same char, we remove 1 char without adding 1 to <sNewWord>
+                    self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord)
+                else:
+                    # switching chars
+                    if nMaxSwitch:
+                        self._suggest(oSuggResult, sRemain[1:2]+sRemain[0:1]+sRemain[2:], nMaxSwitch-1, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True)
+                    # delete char
+                    if nMaxDel:
+                        self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True)
+                # Phonetic replacements
+                for sRepl in cp.get1toXReplacement(sNewWord[-1:], cCurrent, sRemain[1:2]):
+                    self._suggest(oSuggResult, sRepl + sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True)
+                for sRepl in cp.d2toX.get(sRemain[0:2], ()):
+                    self._suggest(oSuggResult, sRepl + sRemain[2:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True)
+            # end of word
+            if len(sRemain) == 2:
+                for sRepl in cp.dFinal2.get(sRemain, ()):
+                    self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True)
+            elif len(sRemain) == 1:
+                self._suggest(oSuggResult, "", nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True) # remove last char and go on
+                for sRepl in cp.dFinal1.get(sRemain, ()):
+                    self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True)
+
+    #@timethis
+    def suggest2 (self, sWord, nMaxSugg=10):
+        "returns a set of suggestions for <sWord>"
+        sPfx, sWord, sSfx = cp.cut(sWord)
+        oSuggResult = SuggResult(sWord)
+        self._suggest2(oSuggResult)
+        aSugg = oSuggResult.getSuggestions()
+        if sSfx or sPfx:
+            # we add what we removed
+            return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
+        return aSugg
+
+    def _suggest2 (self, oSuggResult, nDeep=0, iAddr=0, sNewWord=""):
+        # recursive function
+        #logging.info((nDeep * "  ") + sNewWord)
+        if nDeep >= oSuggResult.nDistLimit:
+            sCleanNewWord = cp.simplifyWord(sNewWord)
+            if st.distanceSift4(oSuggResult.sCleanWord[:len(sCleanNewWord)], sCleanNewWord) > oSuggResult.nDistLimit:
+                return
+        if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
+            oSuggResult.addSugg(sNewWord, nDeep)
+        for cChar, jAddr in self._getCharArcsWithPriority(iAddr, oSuggResult.sWord[nDeep:nDeep+1]):
+            self._suggest2(oSuggResult, nDeep+1, jAddr, sNewWord+cChar)
+        return
+
+    def _getCharArcs (self, iAddr):
+        "generator: yield all chars and addresses from node at address <iAddr>"
+        for nVal, jAddr in self._getArcs(iAddr):
+            if nVal < self.nChar:
+                yield (self.dCharVal[nVal], jAddr)
+
+    def _getSimilarCharArcs (self, cChar, iAddr):
+        "generator: yield similar char of <cChar> and address of the following node"
+        for c in cp.d1to1.get(cChar, [cChar]):
+            if c in self.dChar:
+                jAddr = self._lookupArcNode(self.dChar[c], iAddr)
+                if jAddr:
+                    yield (c, jAddr)
+
+    def _getCharArcsWithPriority (self, iAddr, cChar):
+        if not cChar:
+            yield from self._getCharArcs(iAddr)
+        lTuple = list(self._getCharArcs(iAddr))
+        lTuple.sort(key=lambda t: 0  if t[0] in cp.d1to1.get(cChar, cChar)  else  1)
+        yield from lTuple
+
+    def _getTails (self, iAddr, sTail="", n=2):
+        "return a list of suffixes ending at a distance of <n> from <iAddr>"
+        aTails = set()
+        for nVal, jAddr in self._getArcs(iAddr):
+            if nVal < self.nChar:
+                if int.from_bytes(self.byDic[jAddr:jAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
+                    aTails.add(sTail + self.dCharVal[nVal])
+                if n and not aTails:
+                    aTails.update(self._getTails(jAddr, sTail+self.dCharVal[nVal], n-1))
+        return aTails
+
+    def drawPath (self, sWord, iAddr=0):
+        "show the path taken by <sWord> in the graph"
+        c1 = sWord[0:1]  if sWord  else " "
+        iPos = -1
+        n = 0
+        print(c1 + ": ", end="")
+        for c2, jAddr in self._getCharArcs(iAddr):
+            print(c2, end="")
+            if c2 == sWord[0:1]:
+                iNextNodeAddr = jAddr
+                iPos = n
+            n += 1
+        if not sWord:
+            return
+        if iPos >= 0:
+            print("\n   "+ " " * iPos + "|")
+            self.drawPath(sWord[1:], iNextNodeAddr)
+
+    def select (self, sPattern=""):
+        "generator: returns all entries which morphology fits <sPattern>"
+        zPattern = None
+        try:
+            zPattern = re.compile(sPattern)
+        except:
+            print("# Error in regex pattern")
+            traceback.print_exc()
+        yield from self._select1(zPattern, 0, "")
+
+    # def morph (self, sWord):
+    #     is defined in __init__
+
+    # VERSION 1
+    def _select1 (self, zPattern, iAddr, sWord):
+        # recursive generator
+        for nVal, jAddr in self._getArcs1(iAddr):
+            if nVal < self.nChar:
+                # simple character
+                yield from self._select1(zPattern, jAddr, sWord + self.lArcVal[nVal])
+            else:
+                sEntry = sWord + "\t" + self.funcStemming(sWord, self.lArcVal[nVal])
+                for nMorphVal, _ in self._getArcs1(jAddr):
+                    if not zPattern or zPattern.search(self.lArcVal[nMorphVal]):
+                        yield sEntry + "\t" + self.lArcVal[nMorphVal]
+
+    def _morph1 (self, sWord):
+        "returns morphologies of <sWord>"
+        iAddr = 0
+        for c in sWord:
+            if c not in self.dChar:
+                return []
+            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
+            if iAddr == None:
+                return []
+        if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask):
+            l = []
+            nRawArc = 0
+            while not (nRawArc & self._lastArcMask):
+                iEndArcAddr = iAddr + self.nBytesArc
+                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
+                nArc = nRawArc & self._arcMask
+                if nArc >= self.nChar:
+                    # This value is not a char, this is a stemming code 
+                    sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc])
+                    # Now , we go to the next node and retrieve all following arcs values, all of them are tags
+                    iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
+                    nRawArc2 = 0
+                    while not (nRawArc2 & self._lastArcMask):
+                        iEndArcAddr2 = iAddr2 + self.nBytesArc
+                        nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
+                        l.append(sStem + " " + self.lArcVal[nRawArc2 & self._arcMask])
+                        iAddr2 = iEndArcAddr2+self.nBytesNodeAddress
+                iAddr = iEndArcAddr+self.nBytesNodeAddress
+            return l
+        return []
+
+    def _stem1 (self, sWord):
+        "returns stems list of <sWord>"
+        iAddr = 0
+        for c in sWord:
+            if c not in self.dChar:
+                return []
+            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
+            if iAddr == None:
+                return []
+        if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask):
+            l = []
+            nRawArc = 0
+            while not (nRawArc & self._lastArcMask):
+                iEndArcAddr = iAddr + self.nBytesArc
+                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
+                nArc = nRawArc & self._arcMask
+                if nArc >= self.nChar:
+                    # This value is not a char, this is a stemming code 
+                    l.append(self.funcStemming(sWord, self.lArcVal[nArc]))
+                iAddr = iEndArcAddr+self.nBytesNodeAddress
+            return l
+        return []
+
+    def _lookupArcNode1 (self, nVal, iAddr):
+        "looks if <nVal> is an arc at the node at <iAddr>, if yes, returns address of next node else None"
+        while True:
+            iEndArcAddr = iAddr+self.nBytesArc
+            nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
+            if nVal == (nRawArc & self._arcMask):
+                # the value we are looking for 
+                # we return the address of the next node
+                return int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
+            else:
+                # value not found
+                if (nRawArc & self._lastArcMask):
+                    return None
+                iAddr = iEndArcAddr+self.nBytesNodeAddress
+
+    def _getArcs1 (self, iAddr):
+        "generator: return all arcs at <iAddr> as tuples of (nVal, iAddr)"
+        while True:
+            iEndArcAddr = iAddr+self.nBytesArc
+            nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
+            yield (nRawArc & self._arcMask, int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big'))
+            if (nRawArc & self._lastArcMask):
+                break
+            iAddr = iEndArcAddr+self.nBytesNodeAddress
+
+    def _writeNodes1 (self, spfDest):
+        "for debugging only"
+        print(" > Write binary nodes")
+        with codecs.open(spfDest, 'w', 'utf-8', newline="\n") as hDst:
+            iAddr = 0
+            hDst.write("i{:_>10} -- #{:_>10}\n".format("0", iAddr))
+            while iAddr < len(self.byDic):
+                iEndArcAddr = iAddr+self.nBytesArc
+                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
+                nArc = nRawArc & self._arcMask
+                hDst.write("  {:<20}  {:0>16}  i{:>10}   #{:_>10}\n".format(self.lArcVal[nArc], bin(nRawArc)[2:], "?", \
+                                                                            int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], \
+                                                                                           byteorder='big')))
+                iAddr = iEndArcAddr+self.nBytesNodeAddress
+                if (nRawArc & self._lastArcMask) and iAddr < len(self.byDic):
+                    hDst.write("\ni{:_>10} -- #{:_>10}\n".format("?", iAddr))
+            hDst.close()
+
+    # VERSION 2
+    def _morph2 (self, sWord):
+        "returns morphologies of <sWord>"
+        iAddr = 0
+        for c in sWord:
+            if c not in self.dChar:
+                return []
+            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
+            if iAddr == None:
+                return []
+        if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask):
+            l = []
+            nRawArc = 0
+            while not (nRawArc & self._lastArcMask):
+                iEndArcAddr = iAddr + self.nBytesArc
+                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
+                nArc = nRawArc & self._arcMask
+                if nArc >= self.nChar:
+                    # This value is not a char, this is a stemming code 
+                    sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc])
+                    # Now , we go to the next node and retrieve all following arcs values, all of them are tags
+                    if not (nRawArc & self._addrBitMask):
+                        iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
+                    else:
+                        # we go to the end of the node
+                        iAddr2 = iEndArcAddr
+                        while not (nRawArc & self._lastArcMask):
+                            nRawArc = int.from_bytes(self.byDic[iAddr2:iAddr2+self.nBytesArc], byteorder='big')
+                            iAddr2 += self.nBytesArc + self.nBytesNodeAddress
+                    nRawArc2 = 0
+                    while not (nRawArc2 & self._lastArcMask):
+                        iEndArcAddr2 = iAddr2 + self.nBytesArc
+                        nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
+                        l.append(sStem + " " + self.lArcVal[nRawArc2 & self._arcMask])
+                        iAddr2 = iEndArcAddr2+self.nBytesNodeAddress  if not (nRawArc2 & self._addrBitMask) else iEndArcAddr2
+                iAddr = iEndArcAddr+self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else iEndArcAddr
+            return l
+        return []
+
+    def _stem2 (self, sWord):
+        "returns stems list of <sWord>"
+        iAddr = 0
+        for c in sWord:
+            if c not in self.dChar:
+                return []
+            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
+            if iAddr == None:
+                return []
+        if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask):
+            l = []
+            nRawArc = 0
+            while not (nRawArc & self._lastArcMask):
+                iEndArcAddr = iAddr + self.nBytesArc
+                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
+                nArc = nRawArc & self._arcMask
+                if nArc >= self.nChar:
+                    # This value is not a char, this is a stemming code 
+                    l.append(self.funcStemming(sWord, self.lArcVal[nArc]))
+                    # Now , we go to the next node
+                    if not (nRawArc & self._addrBitMask):
+                        iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
+                    else:
+                        # we go to the end of the node
+                        iAddr2 = iEndArcAddr
+                        while not (nRawArc & self._lastArcMask):
+                            nRawArc = int.from_bytes(self.byDic[iAddr2:iAddr2+self.nBytesArc], byteorder='big')
+                            iAddr2 += self.nBytesArc + self.nBytesNodeAddress
+                iAddr = iEndArcAddr+self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else iEndArcAddr
+            return l
+        return []
+
+    def _lookupArcNode2 (self, nVal, iAddr):
+        "looks if <nVal> is an arc at the node at <iAddr>, if yes, returns address of next node else None"
+        while True:
+            iEndArcAddr = iAddr+self.nBytesArc
+            nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
+            if nVal == (nRawArc & self._arcMask):
+                # the value we are looking for 
+                if not (nRawArc & self._addrBitMask):
+                    # we return the address of the next node
+                    return int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
+                else:
+                    # we go to the end of the node
+                    iAddr = iEndArcAddr
+                    while not (nRawArc & self._lastArcMask):
+                        nRawArc = int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big')
+                        iAddr += self.nBytesArc + self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else self.nBytesArc
+                    return iAddr
+            else:
+                # value not found
+                if (nRawArc & self._lastArcMask):
+                    return None
+                iAddr = iEndArcAddr+self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else iEndArcAddr
+
+    def _writeNodes2 (self, spfDest):
+        "for debugging only"
+        print(" > Write binary nodes")
+        with codecs.open(spfDest, 'w', 'utf-8', newline="\n") as hDst:
+            iAddr = 0
+            hDst.write("i{:_>10} -- #{:_>10}\n".format("0", iAddr))
+            while iAddr < len(self.byDic):
+                iEndArcAddr = iAddr+self.nBytesArc
+                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
+                nArc = nRawArc & self._arcMask
+                if not (nRawArc & self._addrBitMask):
+                    iNextNodeAddr = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
+                    hDst.write("  {:<20}  {:0>16}  i{:>10}   #{:_>10}\n".format(self.lArcVal[nArc], bin(nRawArc)[2:], "?", iNextNodeAddr))
+                    iAddr = iEndArcAddr+self.nBytesNodeAddress
+                else:
+                    hDst.write("  {:<20}  {:0>16}\n".format(self.lArcVal[nArc], bin(nRawArc)[2:]))
+                    iAddr = iEndArcAddr
+                if (nRawArc & self._lastArcMask):
+                    hDst.write("\ni{:_>10} -- #{:_>10}\n".format("?", iAddr))
+            hDst.close()
+
+    # VERSION 3
+    def _morph3 (self, sWord):
+        "returns morphologies of <sWord>"
+        iAddr = 0
+        for c in sWord:
+            if c not in self.dChar:
+                return []
+            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
+            if iAddr == None:
+                return []
+        if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask):
+            l = []
+            nRawArc = 0
+            iAddrNode = iAddr
+            while not (nRawArc & self._lastArcMask):
+                iEndArcAddr = iAddr + self.nBytesArc
+                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
+                nArc = nRawArc & self._arcMask
+                if nArc >= self.nChar:
+                    # This value is not a char, this is a stemming code 
+                    sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc])
+                    # Now , we go to the next node and retrieve all following arcs values, all of them are tags
+                    if not (nRawArc & self._addrBitMask):
+                        iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
+                    else:
+                        iAddr2 = iAddrNode + int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesOffset], byteorder='big')
+                    nRawArc2 = 0
+                    while not (nRawArc2 & self._lastArcMask):
+                        iEndArcAddr2 = iAddr2 + self.nBytesArc
+                        nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
+                        l.append(sStem + " " + self.lArcVal[nRawArc2 & self._arcMask])
+                        iAddr2 = iEndArcAddr2+self.nBytesNodeAddress  if not (nRawArc2 & self._addrBitMask) else iEndArcAddr2+self.nBytesOffset
+                iAddr = iEndArcAddr+self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else iEndArcAddr+self.nBytesOffset
+            return l
+        return []
+
+    def _stem3 (self, sWord):
+        "returns stems list of <sWord>"
+        iAddr = 0
+        for c in sWord:
+            if c not in self.dChar:
+                return []
+            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
+            if iAddr == None:
+                return []
+        if (int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask):
+            l = []
+            nRawArc = 0
+            iAddrNode = iAddr
+            while not (nRawArc & self._lastArcMask):
+                iEndArcAddr = iAddr + self.nBytesArc
+                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
+                nArc = nRawArc & self._arcMask
+                if nArc >= self.nChar:
+                    # This value is not a char, this is a stemming code 
+                    l.append(self.funcStemming(sWord, self.lArcVal[nArc]))
+                iAddr = iEndArcAddr+self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else iEndArcAddr+self.nBytesOffset
+            return l
+        return []
+
+    def _lookupArcNode3 (self, nVal, iAddr):
+        "looks if <nVal> is an arc at the node at <iAddr>, if yes, returns address of next node else None"
+        iAddrNode = iAddr
+        while True:
+            iEndArcAddr = iAddr+self.nBytesArc
+            nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
+            if nVal == (nRawArc & self._arcMask):
+                # the value we are looking for 
+                if not (nRawArc & self._addrBitMask):
+                    return int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
+                else:
+                    return iAddrNode + int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesOffset], byteorder='big')
+            else:
+                # value not found
+                if (nRawArc & self._lastArcMask):
+                    return None
+                iAddr = iEndArcAddr+self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else iEndArcAddr+self.nBytesOffset
+
+    def _writeNodes3 (self, spfDest):
+        "for debugging only"
+        print(" > Write binary nodes")
+        with codecs.open(spfDest, 'w', 'utf-8', newline="\n") as hDst:
+            iAddr = 0
+            hDst.write("i{:_>10} -- #{:_>10}\n".format("0", iAddr))
+            while iAddr < len(self.byDic):
+                iEndArcAddr = iAddr+self.nBytesArc
+                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
+                nArc = nRawArc & self._arcMask
+                if not (nRawArc & self._addrBitMask):
+                    iNextNodeAddr = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
+                    hDst.write("  {:<20}  {:0>16}  i{:>10}   #{:_>10}\n".format(self.lArcVal[nArc], bin(nRawArc)[2:], "?", iNextNodeAddr))
+                    iAddr = iEndArcAddr+self.nBytesNodeAddress
+                else:
+                    iNextNodeAddr = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesOffset], byteorder='big')
+                    hDst.write("  {:<20}  {:0>16}  i{:>10}   +{:_>10}\n".format(self.lArcVal[nArc], bin(nRawArc)[2:], "?", iNextNodeAddr))
+                    iAddr = iEndArcAddr+self.nBytesOffset
+                if (nRawArc & self._lastArcMask):
+                    hDst.write("\ni{:_>10} -- #{:_>10}\n".format("?", iAddr))
+            hDst.close()

ADDED   graphspell/keyboard_chars_proximity.py
Index: graphspell/keyboard_chars_proximity.py
==================================================================
--- /dev/null
+++ graphspell/keyboard_chars_proximity.py
@@ -0,0 +1,220 @@
+# Keyboard chars proximity
+
+
+def getKeyboardMap (sKeyboard):
+    return _dKeyboardMap.get(sKeyboard.lower(), {})
+
+
+def getKeyboardList ():
+    return _dKeyboardMap.keys()
+
+
+_dKeyboardMap = {
+    # keyboards by alphabetical order
+    # bépo, colemak and dvorak users are assumed to do less typing errors.
+    "azerty": {
+        # fr
+        # line 1
+        "é": "az",
+        "è": "yu",
+        "ç": "àio",
+        "à": "op",
+        # line 2
+        "a": "zéq",
+        "z": "aesq",
+        "e": "zrds",
+        "r": "etfd",
+        "t": "rygf",
+        "y": "tuhg",
+        "u": "yijh",
+        "i": "uokj",
+        "o": "iplk",
+        "p": "oml",
+        # line 3
+        "q": "sawz",
+        "s": "qdzwxe",
+        "d": "sfexcr",
+        "f": "dgrcvt",
+        "g": "fhtvby",
+        "h": "gjybnu",
+        "j": "hkuni",
+        "k": "jlio",
+        "l": "kmop",
+        "m": "lùp",
+        "ù": "m",
+        # line 4
+        "w": "xqs",
+        "x": "wcsd",
+        "c": "xvdf",
+        "v": "cbfg",
+        "b": "vngh",
+        "n": "bhj",
+    },
+    "bépo": {
+        # fr
+        # line 2
+        "b": "éa",
+        "é": "bpu",
+        "p": "éoi",
+        "o": "pèe",
+        "è": "o",
+        "v": "dt",
+        "d": "vls",
+        "l": "djr",
+        "j": "lzn",
+        "z": "jmw",
+        # line 3
+        "a": "ubà",
+        "u": "aiéy",
+        "i": "uepx",
+        "e": "io",
+        "c": "t",
+        "t": "csvq",
+        "s": "trdg",
+        "r": "snlh",
+        "n": "rmjf",
+        "m": "nzç",
+        # line 4
+        "à": "yêa",
+        "y": "àxu",
+        "x": "ywi",
+        "w": "z",
+        "k": "c",
+        "q": "gt",
+        "g": "qhs",
+        "h": "gfr",
+        "f": "hçn",
+        "ç": "fm",
+    },
+    "colemak": {
+        # en, us, intl
+        # line 2
+        "q": "wa",
+        "w": "qfr",
+        "f": "wps",
+        "p": "fgt",
+        "g": "pjd",
+        "j": "glh",
+        "l": "jun",
+        "u": "lye",
+        "y": "ui",
+        # line 3
+        "a": "rqz",
+        "r": "aswx",
+        "s": "rtfc",
+        "t": "sdpv",
+        "d": "thgb",
+        "h": "dnjk",
+        "n": "helm",
+        "e": "niu",
+        "i": "eoy",
+        "o": "i",
+        # line 4
+        "z": "xa",
+        "x": "zcr",
+        "c": "xvs",
+        "v": "cbt",
+        "b": "vkd",
+        "k": "bmh",
+        "m": "kn",
+    },
+    "dvorak": {
+        # en, us, intl
+        # line 2
+        "p": "yu",
+        "y": "pfi",
+        "f": "ygd",
+        "g": "fch",
+        "c": "grt",
+        "r": "cln",
+        "l": "rs",
+        # line 3
+        "a": "o",
+        "o": "aeq",
+        "e": "ouj",
+        "u": "eipk",
+        "i": "udyx",
+        "d": "ihfb",
+        "h": "dtgm",
+        "t": "hncw",
+        "n": "tsrv",
+        "s": "nlz",
+        # line 4
+        "q": "jo",
+        "j": "qke",
+        "k": "jxu",
+        "x": "kbi",
+        "b": "xmd",
+        "m": "bwh",
+        "w": "mvt",
+        "v": "wzn",
+        "z": "vs",
+    },
+    "qwerty": {
+        # en, us, intl
+        # line 2
+        "q": "wa",
+        "w": "qeas",
+        "e": "wrds",
+        "r": "etfd",
+        "t": "rygf",
+        "y": "tuhg",
+        "u": "yijh",
+        "i": "uokj",
+        "o": "iplk",
+        "p": "ol",
+        # line 3
+        "a": "sqzw",
+        "s": "adwzxe",
+        "d": "sfexcr",
+        "f": "dgrcvt",
+        "g": "fhtvby",
+        "h": "gjybnu",
+        "j": "hkunmi",
+        "k": "jlimo",
+        "l": "kop",
+        # line 4
+        "z": "xas",
+        "x": "zcsd",
+        "c": "xvdf",
+        "v": "cbfg",
+        "b": "vngh",
+        "n": "bmhj",
+        "m": "njk",
+    },
+    "qwertz": {
+        # ge, au
+        # line 2
+        "q": "wa",
+        "w": "qeas",
+        "e": "wrds",
+        "r": "etfd",
+        "t": "rzgf",
+        "z": "tuhg",
+        "u": "zijh",
+        "i": "uokj",
+        "o": "iplk",
+        "p": "oüöl",
+        "ü": "päö",
+        # line 3
+        "a": "sqyw",
+        "s": "adwyxe",
+        "d": "sfexcr",
+        "f": "dgrcvt",
+        "g": "fhtvbz",
+        "h": "gjzbnu",
+        "j": "hkunmi",
+        "k": "jlimo",
+        "l": "köop",
+        "ö": "läpü",
+        "ä": "öü",
+        # line 4
+        "y": "xas",
+        "x": "ycsd",
+        "c": "xvdf",
+        "v": "cbfg",
+        "b": "vngh",
+        "n": "bmhj",
+        "m": "njk",
+    }
+}

ADDED   graphspell/progressbar.py
Index: graphspell/progressbar.py
==================================================================
--- /dev/null
+++ graphspell/progressbar.py
@@ -0,0 +1,35 @@
+# Textual progressbar
+# by Olivier R.
+# License: MPL 2
+
+import time
+
+class ProgressBar:
+    "Textual progressbar"
+    
+    def __init__ (self, nMin=0, nMax=100, nWidth=78):
+        "initiate with minimum nMin to maximum nMax"
+        self.nMin = nMin
+        self.nMax = nMax
+        self.nSpan = nMax - nMin
+        self.nWidth = nWidth-9
+        self.nAdvance = -1
+        self.nCurVal = nMin
+        self.startTime = time.time()
+        self._update()
+
+    def _update (self):
+        fDone = ((self.nCurVal - self.nMin) / self.nSpan)
+        nAdvance = int(fDone * self.nWidth)
+        if (nAdvance > self.nAdvance):
+            self.nAdvance = nAdvance
+            print("\r[ {}{}  {}% ] ".format('>'*nAdvance, ' '*(self.nWidth-nAdvance), round(fDone*100)), end="")
+
+    def increment (self, n=1):
+        "increment value by n (1 by default)"
+        self.nCurVal += n
+        self._update()
+    
+    def done (self):
+        "to call when it’s finished"
+        print("\r[ task done in {:.1f} s ] ".format(time.time() - self.startTime))

ADDED   graphspell/spellchecker.py
Index: graphspell/spellchecker.py
==================================================================
--- /dev/null
+++ graphspell/spellchecker.py
@@ -0,0 +1,134 @@
+# Spellchecker
+# Wrapper for the IBDAWG class.
+# Useful to check several dictionaries at once.
+
+from . import ibdawg
+
+
+dDictionaries = {
+    "fr": "French.bdic",
+    "en": "English.bdic"
+}
+
+
+class Spellchecker ():
+
+    def __init__ (self, sLangCode):
+        self.sLangCode = sLangCode
+        self.oMainDic = None
+        if sLangCode in dDictionaries:
+            self.oMainDic = ibdawg.IBDAWG(dDictionaries[sLangCode])
+        self.lOtherDic = []
+        return bool(self.oMainDic)
+
+
+    def setMainDictionary (self, sDicName):
+        try:
+            self.oMainDic = ibdawg.IBDAWG(sDicName)
+            return True
+        except:
+            print("Error: <" + sDicName + "> not set as main dictionary.")
+            return False
+
+    def addDictionary (self, sDicName):
+        try:
+            self.lOtherDic.append(ibdawg.IBDAWG(sDicName))
+            return True
+        except:
+            print("Error: <" + sDicName + "> not added to the list.")
+            return False
+
+    # Return codes:
+    #   0: invalid
+    #   1: correct in main dictionary
+    #   2+: correct in foreign dictionaries
+
+
+    # check in the main dictionary only
+
+    def isValidToken (self, sToken):
+        "(in main dictionary) checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)"
+        if self.oMainDic.isValidToken(sToken):
+            return 1
+        return 0
+
+    def isValid (self, sWord):
+        "(in main dictionary) checks if sWord is valid (different casing tested if the first letter is a capital)"
+        if self.oMainDic.isValid(sWord):
+            return 1
+        return 0
+
+    def lookup (self, sWord):
+        "(in main dictionary) checks if sWord is in dictionary as is (strict verification)"
+        if self.oMainDic.lookup(sWord):
+            return 1
+        return 0
+
+
+    # check in all dictionaries
+
+    def isValidTokenAll (self, sToken):
+        "(in all dictionaries) checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)"
+        if self.oMainDic.isValidToken(sToken):
+            return 1
+        for i, oDic in enumerate(self.lOtherDic, 2):
+            if oDic.isValidToken(sToken):
+                return i
+        return 0
+
+    def isValidAll (self, sWord):
+        "(in all dictionaries) checks if sWord is valid (different casing tested if the first letter is a capital)"
+        if self.oMainDic.isValid(sToken):
+            return 1
+        for i, oDic in enumerate(self.lOtherDic, 2):
+            if oDic.isValid(sToken):
+                return i
+        return 0
+
+    def lookupAll (self, sWord):
+        "(in all dictionaries) checks if sWord is in dictionary as is (strict verification)"
+        if self.oMainDic.lookup(sToken):
+            return 1
+        for i, oDic in enumerate(self.lOtherDic, 2):
+            if oDic.lookup(sToken):
+                return i
+        return 0
+
+
+    # check in dictionaries up to level n
+
+    def isValidTokenLevel (self, sToken, nLevel):
+        "(in dictionaries up to level n) checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)"
+        if self.oMainDic.isValidToken(sToken):
+            return 1
+        if nLevel >= 2:
+            for i, oDic in enumerate(self.lOtherDic, 2):
+                if oDic.isValidToken(sToken):
+                    return i
+                if i == nLevel:
+                    break
+        return 0
+
+    def isValidLevel (self, sWord, nLevel):
+        "(in dictionaries up to level n) checks if sWord is valid (different casing tested if the first letter is a capital)"
+        if self.oMainDic.isValid(sToken):
+            return 1
+        if nLevel >= 2:
+            for i, oDic in enumerate(self.lOtherDic, 2):
+                if oDic.isValid(sToken):
+                    return i
+                if i == nLevel:
+                    break
+        return 0
+
+    def lookupLevel (self, sWord, nLevel):
+        "(in dictionaries up to level n) checks if sWord is in dictionary as is (strict verification)"
+        if self.oMainDic.lookup(sToken):
+            return 1
+        if nLevel >= 2:
+            for i, oDic in enumerate(self.lOtherDic, 2):
+                if oDic.lookup(sToken):
+                    return i
+                if i == nLevel:
+                    break
+        return 0

ADDED   graphspell/str_transform.py
Index: graphspell/str_transform.py
==================================================================
--- /dev/null
+++ graphspell/str_transform.py
@@ -0,0 +1,203 @@
+#!python3
+
+
+#### DISTANCE CALCULATIONS
+
+def longestCommonSubstring (s1, s2):
+    # http://en.wikipedia.org/wiki/Longest_common_substring_problem
+    # http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_substring
+    M = [ [0]*(1+len(s2)) for i in range(1+len(s1)) ]
+    longest, x_longest = 0, 0
+    for x in range(1, 1+len(s1)):
+        for y in range(1, 1+len(s2)):
+            if s1[x-1] == s2[y-1]:
+                M[x][y] = M[x-1][y-1] + 1
+                if M[x][y] > longest:
+                    longest = M[x][y]
+                    x_longest = x
+            else:
+                M[x][y] = 0
+    return s1[x_longest-longest : x_longest]
+
+
+def distanceDamerauLevenshtein (s1, s2):
+    "distance of Damerau-Levenshtein between <s1> and <s2>"
+    # https://fr.wikipedia.org/wiki/Distance_de_Damerau-Levenshtein
+    d = {}
+    nLen1 = len(s1)
+    nLen2 = len(s2)
+    for i in range(-1, nLen1+1):
+        d[i, -1] = i + 1
+    for j in range(-1, nLen2+1):
+        d[-1, j] = j + 1
+    for i in range(nLen1):
+        for j in range(nLen2):
+            nCost = 0  if s1[i] == s2[j]  else 1
+            d[i, j] = min(
+                d[i-1, j]   + 1,        # Deletion
+                d[i,   j-1] + 1,        # Insertion
+                d[i-1, j-1] + nCost,    # Substitution
+            )
+            if i and j and s1[i] == s2[j-1] and s1[i-1] == s2[j]:
+                d[i, j] = min(d[i, j], d[i-2, j-2] + nCost)     # Transposition
+    return d[nLen1-1, nLen2-1]
+
+
+def distanceSift4 (s1, s2, nMaxOffset=5):
+    "implementation of general Sift4."
+    # https://siderite.blogspot.com/2014/11/super-fast-and-accurate-string-distance.html
+    if not s1:
+        return len(s2)
+    if not s2:
+        return len(s1)
+    nLen1, nLen2 = len(s1), len(s2)
+    i1, i2 = 0, 0   # Cursors for each string
+    nLargestCS = 0  # Largest common substring
+    nLocalCS = 0    # Local common substring
+    nTrans = 0      # Number of transpositions ('ab' vs 'ba')
+    lOffset = []    # Offset pair array, for computing the transpositions
+ 
+    while i1 < nLen1 and i2 < nLen2:
+        if s1[i1] == s2[i2]:
+            nLocalCS += 1
+            # Check if current match is a transposition
+            bTrans = False
+            i = 0
+            while i < len(lOffset):
+                t = lOffset[i]
+                if i1 <= t[0] or i2 <= t[1]:
+                    bTrans = abs(i2-i1) >= abs(t[1] - t[0])
+                    if bTrans:
+                        nTrans += 1
+                    elif not t[2]:
+                        t[2] = True
+                        nTrans += 1
+                    break
+                elif i1 > t[1] and i2 > t[0]:
+                    del lOffset[i]
+                else:
+                    i += 1
+            lOffset.append([i1, i2, bTrans])
+        else:
+            nLargestCS += nLocalCS
+            nLocalCS = 0
+            if i1 != i2:
+                i1 = i2 = min(i1, i2)
+            for i in range(nMaxOffset):
+                if i1 + i >= nLen1 and i2 + i >= nLen2:
+                    break
+                elif i1 + i < nLen1 and s1[i1+i] == s2[i2]:
+                    i1 += i - 1
+                    i2 -= 1
+                    break
+                elif i2 + i < nLen2 and s1[i1] == s2[i2+i]:
+                    i2 += i - 1
+                    i1 -= 1
+                    break
+        i1 += 1
+        i2 += 1
+        if i1 >= nLen1 or i2 >= nLen2:
+            nLargestCS += nLocalCS
+            nLocalCS = 0
+            i1 = i2 = min(i1, i2)
+    nLargestCS += nLocalCS
+    return round(max(nLen1, nLen2) - nLargestCS + nTrans)
+
+
+def showDistance (s1, s2):
+    print("Damerau-Levenshtein: " + s1 + "/" + s2 + " = " + distanceDamerauLevenshtein(s1, s2))
+    print("Sift4:" + s1 + "/" + s2 + " = " + distanceSift4(s1, s2))
+
+
+
+
+#### STEMMING OPERATIONS
+
+## No stemming
+
+def noStemming (sFlex, sStem):
+    return sStem
+
+def rebuildWord (sFlex, cmd1, cmd2):
+    if cmd1 == "_":
+        return sFlex
+    n, c = cmd1.split(":")
+    s = s[:n] + c + s[n:]
+    if cmd2 == "_":
+        return s
+    n, c = cmd2.split(":")
+    return s[:n] + c + s[n:]
+
+    
+## Define affixes for stemming
+
+# Note: 48 is the ASCII code for "0"
+
+
+# Suffix only
+def defineSuffixCode (sFlex, sStem):
+    """ Returns a string defining how to get stem from flexion
+            "n(sfx)"
+        with n: a char with numeric meaning, "0" = 0, "1" = 1, ... ":" = 10, etc. (See ASCII table.) Says how many letters to strip from flexion.
+             sfx [optional]: string to add on flexion
+        Examples:
+            "0": strips nothing, adds nothing
+            "1er": strips 1 letter, adds "er"
+            "2": strips 2 letters, adds nothing
+    """
+    if sFlex == sStem:
+        return "0"
+    jSfx = 0
+    for i in range(min(len(sFlex), len(sStem))):
+        if sFlex[i] != sStem[i]:
+            break
+        jSfx += 1
+    return chr(len(sFlex)-jSfx+48) + sStem[jSfx:]  
+
+
+def changeWordWithSuffixCode (sWord, sSfxCode):
+    if sSfxCode == "0":
+        return sWord
+    return sWord[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:]  if sSfxCode[0] != '0'  else sWord + sSfxCode[1:]
+
+
+# Prefix and suffix
+
+def defineAffixCode (sFlex, sStem):
+    """ Returns a string defining how to get stem from flexion. Examples:
+            "0" if stem = flexion
+            "stem" if no common substring
+            "n(pfx)/m(sfx)"
+        with n and m: chars with numeric meaning, "0" = 0, "1" = 1, ... ":" = 10, etc. (See ASCII table.) Says how many letters to strip from flexion.
+            pfx [optional]: string to add before the flexion 
+            sfx [optional]: string to add after the flexion
+    """
+    if sFlex == sStem:
+        return "0"
+    # is stem a substring of flexion?
+    n = sFlex.find(sStem)
+    if n >= 0:
+        return "{}/{}".format(chr(n+48), chr(len(sFlex)-(len(sStem)+n)+48))
+    # no, so we are looking for common substring
+    sSubs = longestCommonSubstring(sFlex, sStem)
+    if len(sSubs) > 1:
+        iPos = sStem.find(sSubs)
+        sPfx = sStem[:iPos]
+        sSfx = sStem[iPos+len(sSubs):]
+        n = sFlex.find(sSubs)
+        m = len(sFlex) - (len(sSubs)+n)
+        sAff = "{}/".format(chr(n+48))  if not sPfx  else "{}{}/".format(chr(n+48), sPfx)
+        sAff += chr(m+48)  if not sSfx  else "{}{}".format(chr(m+48), sSfx)
+        return sAff
+    return sStem
+
+
+def changeWordWithAffixCode (sWord, sAffCode):
+    if sAffCode == "0":
+        return sWord
+    if '/' not in sAffCode:
+        return "# error #"
+    sPfxCode, sSfxCode = sAffCode.split('/')
+    sWord = sPfxCode[1:] + sWord[(ord(sPfxCode[0])-48):] 
+    return sWord[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:]  if sSfxCode[0] != '0'  else sWord + sSfxCode[1:]
+

ADDED   graphspell/tokenizer.py
Index: graphspell/tokenizer.py
==================================================================
--- /dev/null
+++ graphspell/tokenizer.py
@@ -0,0 +1,49 @@
+# Very simple tokenizer
+
+import re
+
+_PATTERNS = {
+    "default":
+        (
+            r'(?P<FOLDERUNIX>/(?:bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:/[\w.()-]+)*)',
+            r'(?P<FOLDERWIN>[a-zA-Z]:\\(?:Program Files(?: [(]x86[)]|)|[\w.()]+)(?:\\[\w.()-]+)*)',
+            r'(?P<PUNC>[.,?!:;…«»“”"()/·]+)',
+            r'(?P<ACRONYM>[A-Z][.][A-Z][.](?:[A-Z][.])*)',
+            r'(?P<LINK>(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
+            r'(?P<HASHTAG>[#@][\w-]+)',
+            r'(?P<HTML><\w+.*?>|</\w+ *>)',
+            r'(?P<PSEUDOHTML>\[/?\w+\])',
+            r'(?P<HOUR>\d\d?h\d\d\b)',
+            r'(?P<NUM>-?\d+(?:[.,]\d+))',
+            r"(?P<WORD>\w+(?:[’'`-]\w+)*)"
+        ),
+    "fr":
+        (
+            r'(?P<FOLDERUNIX>/(?:bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:/[\w.()-]+)*)',
+            r'(?P<FOLDERWIN>[a-zA-Z]:\\(?:Program Files(?: [(]x86[)]|)|[\w.()]+)(?:\\[\w.()-]+)*)',
+            r'(?P<PUNC>[.,?!:;…«»“”"()/·]+)',
+            r'(?P<ACRONYM>[A-Z][.][A-Z][.](?:[A-Z][.])*)',
+            r'(?P<LINK>(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
+            r'(?P<HASHTAG>[#@][\w-]+)',
+            r'(?P<HTML><\w+.*?>|</\w+ *>)',
+            r'(?P<PSEUDOHTML>\[/?\w+\])',
+            r"(?P<ELPFX>(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`])",
+            r'(?P<ORDINAL>\d+(?:er|nd|e|de|ième|ème|eme)\b)',
+            r'(?P<HOUR>\d\d?h\d\d\b)',
+            r'(?P<NUM>-?\d+(?:[.,]\d+|))',
+            r"(?P<WORD>\w+(?:[’'`-]\w+)*)"
+        )
+}
+
+
+class Tokenizer:
+
+    def __init__ (self, sLang):
+        self.sLang = sLang
+        if sLang not in _PATTERNS:
+            self.sLang = "default"
+        self.zToken = re.compile( "(?i)" + '|'.join(sRegex for sRegex in _PATTERNS[sLang]) )
+
+    def genTokens (self, sText):
+        for m in self.zToken.finditer(sText):
+            yield { "sType": m.lastgroup, "sValue": m.group(), "nStart": m.start(), "nEnd": m.end() }