Index: gc_core/py/char_player.py ================================================================== --- gc_core/py/char_player.py +++ gc_core/py/char_player.py @@ -169,10 +169,11 @@ "u": ("ut", "us"), } dFinal2 = { + "ai": ("aient", "ais", "et"), "an": ("ant", "ent"), "en": ("ent", "ant"), "ei": ("ait", "ais"), "on": ("ons", "ont"), "oi": ("ois", "oit", "oix"), @@ -179,84 +180,12 @@ } # Préfixes -aPfx = ("anti", "contre", "mé", "im", "in", "ir", "par", "pré", "re", "ré", "sans", "sous", "sur") - - -# Keyboards - -dBépo = { - # on présume que le bépoète est moins susceptible de faire des erreurs de frappe que l’azertyste. - # ligne 2 - "b": "éa", - "é": "bpu", - "p": "éoi", - "o": "pèe", - "è": "o", - "v": "dt", - "d": "vls", - "l": "djr", - "j": "lzn", - "z": "jmw", - # ligne 3 - "a": "ubà", - "u": "aiéy", - "i": "uepx", - "e": "io", - "c": "t", - "t": "csvq", - "s": "trdg", - "r": "snlh", - "n": "rmjf", - "m": "nzç", - # ligne 4 - "à": "yêa", - "y": "àxu", - "x": "ywi", - "w": "z", - "k": "c", - "q": "gt", - "g": "qhs", - "h": "gfr", - "f": "hçn", - "ç": "fm", -} - -dAzerty = { - # ligne 1 - "é": "az", - "è": "yu", - "ç": "àio", - "à": "op", - # ligne 2 - "a": "zéqs", - "z": "aesqd", - "e": "zrdsf", - "r": "etfdg", - "t": "rygfh", - "y": "tuhgj", - "u": "yijhk", - "i": "uokjl", - "o": "iplkm", - "p": "oml", - # ligne 3 - "q": "sawz", - "s": "qdzwxe", - "d": "sfexcr", - "f": "dgrcvt", - "g": "fhtvby", - "h": "gjybnu", - "j": "hkuni", - "k": "jlio", - "l": "kmop", - "m": "lùp", - "ù": "m", - # ligne 4 - "w": "xqs", - "x": "wcsd", - "c": "xvdf", - "v": "cbfg", - "b": "vngh", - "n": "bhj", -} +aPfx1 = frozenset([ + "anti", "archi", "contre", "hyper", "mé", "méta", "im", "in", "ir", "par", "proto", + "pseudo", "pré", "re", "ré", "sans", "sous", "supra", "sur", "ultra" +]) +aPfx2 = frozenset([ + "belgo", "franco", "génito", "gynéco", "médico", "russo" +]) Index: gc_core/py/ibdawg.py ================================================================== --- gc_core/py/ibdawg.py +++ gc_core/py/ibdawg.py @@ -132,21 +132,21 @@ }, ensure_ascii=False)) if bInJSModule: hDst.write(";\n\nexports.dictionary = dictionary;\n") def isValidToken (self, sToken): - "checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)" + "checks if is valid (if there is hyphens in , is split, each part is checked)" if self.isValid(sToken): return True if "-" in sToken: if sToken.count("-") > 4: return True return all(self.isValid(sWord) for sWord in sToken.split("-")) return False def isValid (self, sWord): - "checks if sWord is valid (different casing tested if the first letter is a capital)" + "checks if is valid (different casing tested if the first letter is a capital)" if not sWord: return None if "’" in sWord: # ugly hack sWord = sWord.replace("’", "'") if self.lookup(sWord): @@ -163,11 +163,11 @@ else: return bool(self.lookup(sWord.lower())) return False def lookup (self, sWord): - "returns True if sWord in dictionary (strict verification)" + "returns True if in dictionary (strict verification)" iAddr = 0 for c in sWord: if c not in self.dChar: return False iAddr = self._lookupArcNode(self.dChar[c], iAddr) @@ -176,21 +176,21 @@ return int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask def suggest (self, sWord): "returns a set of similar words" # first, we check for similar words - return set(self._suggestWithCrushedUselessChars(cp.clearWord(sWord))) + #return set(self._suggestWithCrushedUselessChars(cp.clearWord(sWord))) lSugg = self._suggest(sWord) if not lSugg: lSugg.extend(self._suggest(sWord[1:])) lSugg.extend(self._suggest(sWord[:-1])) lSugg.extend(self._suggest(sWord[1:-1])) if not lSugg: lSugg.extend(self._suggestWithCrushedUselessChars(cp.clearWord(sWord))) return set(lSugg) - def _suggest (self, sWord, cPrevious='', nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False): + def _suggest (self, sWord, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False): # RECURSIVE FUNCTION if not sWord: if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask: show(nDeep, "!!! " + sNewWord + " !!!") return [sNewWord] @@ -198,25 +198,29 @@ #show(nDeep, "<" + sWord + "> ===> " + sNewWord) lSugg = [] cCurrent = sWord[0:1] for cChar, jAddr in self._getSimilarArcs(cCurrent, iAddr): #show(nDeep, cChar) - lSugg.extend(self._suggest(sWord[1:], cCurrent, nDeep+1, jAddr, sNewWord+cChar)) + lSugg.extend(self._suggest(sWord[1:], nDeep+1, jAddr, sNewWord+cChar)) if not bAvoidLoop: # avoid infinite loop #show(nDeep, ":no loop:") - if cPrevious == cCurrent: + if cCurrent == sWord[1:2]: # same char, we remove 1 char without adding 1 to - lSugg.extend(self._suggest(sWord[1:], cCurrent, nDeep+1, iAddr, sNewWord)) + lSugg.extend(self._suggest(sWord[1:], nDeep+1, iAddr, sNewWord)) for sRepl in cp.d1toX.get(cCurrent, ()): #show(nDeep, sRepl) - lSugg.extend(self._suggest(sRepl + sWord[1:], cCurrent, nDeep+1, iAddr, sNewWord, True)) - if len(sWord) == 1: + lSugg.extend(self._suggest(sRepl + sWord[1:], nDeep+1, iAddr, sNewWord, True)) + if len(sWord) == 2: + for sRepl in cp.dFinal2.get(sWord, ()): + #show(nDeep, sRepl) + lSugg.extend(self._suggest(sRepl, nDeep+1, iAddr, sNewWord, True)) + elif len(sWord) == 1: #show(nDeep, ":end of word:") # end of word for sRepl in cp.dFinal1.get(sWord, ()): #show(nDeep, sRepl) - lSugg.extend(self._suggest(sRepl, cCurrent, nDeep+1, iAddr, sNewWord, True)) + lSugg.extend(self._suggest(sRepl, nDeep+1, iAddr, sNewWord, True)) return lSugg def _getSimilarArcs (self, cChar, iAddr): "generator: yield similar char of and address of the following node" for c in cp.d1to1.get(cChar, [cChar]): @@ -223,21 +227,21 @@ if c in self.dChar: jAddr = self._lookupArcNode(self.dChar[c], iAddr) if jAddr: yield (c, jAddr) - def _suggestWithCrushedUselessChars (self, sWord, cPrevious='', nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False): + def _suggestWithCrushedUselessChars (self, sWord, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False): if not sWord: if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask: show(nDeep, "!!! " + sNewWord + " !!!") return [sNewWord] return [] lSugg = [] cCurrent = sWord[0:1] for cChar, jAddr in self._getSimilarArcsAndCrushedChars(cCurrent, iAddr): show(nDeep, cChar) - lSugg.extend(self._suggestWithCrushedUselessChars(sWord[1:], cCurrent, nDeep+1, jAddr, sNewWord+cChar)) + lSugg.extend(self._suggestWithCrushedUselessChars(sWord[1:], nDeep+1, jAddr, sNewWord+cChar)) return lSugg def _getSimilarArcsAndCrushedChars (self, cChar, iAddr): "generator: yield similar char of and address of the following node" for nVal, jAddr in self._getArcs(iAddr): @@ -292,11 +296,11 @@ iAddr = iEndArcAddr+self.nBytesNodeAddress return l return [] def _stem1 (self, sWord): - "returns stems list of sWord" + "returns stems list of " iAddr = 0 for c in sWord: if c not in self.dChar: return [] iAddr = self._lookupArcNode(self.dChar[c], iAddr) @@ -315,11 +319,11 @@ iAddr = iEndArcAddr+self.nBytesNodeAddress return l return [] def _lookupArcNode1 (self, nVal, iAddr): - "looks if nVal is an arc at the node at iAddr, if yes, returns address of next node else None" + "looks if is an arc at the node at , if yes, returns address of next node else None" while True: iEndArcAddr = iAddr+self.nBytesArc nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big') if nVal == (nRawArc & self._arcMask): # the value we are looking for @@ -359,11 +363,11 @@ hDst.write("\ni{:_>10} -- #{:_>10}\n".format("?", iAddr)) hDst.close() # VERSION 2 def _morph2 (self, sWord): - "returns morphologies of sWord" + "returns morphologies of " iAddr = 0 for c in sWord: if c not in self.dChar: return [] iAddr = self._lookupArcNode(self.dChar[c], iAddr) @@ -397,11 +401,11 @@ iAddr = iEndArcAddr+self.nBytesNodeAddress if not (nRawArc & self._addrBitMask) else iEndArcAddr return l return [] def _stem2 (self, sWord): - "returns stems list of sWord" + "returns stems list of " iAddr = 0 for c in sWord: if c not in self.dChar: return [] iAddr = self._lookupArcNode(self.dChar[c], iAddr) @@ -429,11 +433,11 @@ iAddr = iEndArcAddr+self.nBytesNodeAddress if not (nRawArc & self._addrBitMask) else iEndArcAddr return l return [] def _lookupArcNode2 (self, nVal, iAddr): - "looks if nVal is an arc at the node at iAddr, if yes, returns address of next node else None" + "looks if is an arc at the node at , if yes, returns address of next node else None" while True: iEndArcAddr = iAddr+self.nBytesArc nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big') if nVal == (nRawArc & self._arcMask): # the value we are looking for @@ -474,11 +478,11 @@ hDst.write("\ni{:_>10} -- #{:_>10}\n".format("?", iAddr)) hDst.close() # VERSION 3 def _morph3 (self, sWord): - "returns morphologies of sWord" + "returns morphologies of " iAddr = 0 for c in sWord: if c not in self.dChar: return [] iAddr = self._lookupArcNode(self.dChar[c], iAddr) @@ -509,11 +513,11 @@ iAddr = iEndArcAddr+self.nBytesNodeAddress if not (nRawArc & self._addrBitMask) else iEndArcAddr+self.nBytesOffset return l return [] def _stem3 (self, sWord): - "returns stems list of sWord" + "returns stems list of " iAddr = 0 for c in sWord: if c not in self.dChar: return [] iAddr = self._lookupArcNode(self.dChar[c], iAddr) @@ -533,11 +537,11 @@ iAddr = iEndArcAddr+self.nBytesNodeAddress if not (nRawArc & self._addrBitMask) else iEndArcAddr+self.nBytesOffset return l return [] def _lookupArcNode3 (self, nVal, iAddr): - "looks if nVal is an arc at the node at iAddr, if yes, returns address of next node else None" + "looks if is an arc at the node at , if yes, returns address of next node else None" iAddrNode = iAddr while True: iEndArcAddr = iAddr+self.nBytesArc nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big') if nVal == (nRawArc & self._arcMask): ADDED gc_core/py/keyboard_chars_proximity.py Index: gc_core/py/keyboard_chars_proximity.py ================================================================== --- /dev/null +++ gc_core/py/keyboard_chars_proximity.py @@ -0,0 +1,219 @@ +# Keyboard chars proximity + + +def getKeyboardMap (sKeyboard): + return _dKeyboardMap.get(sKeyboard.lower(), {}) + + +def getKeyboardList (): + return _dKeyboardMap.keys() + + +# bépo, colemak and dvorak users are assumed to do less typing errors. +_dKeyboardMap = { + "azerty": { + # fr + # line 1 + "é": "az", + "è": "yu", + "ç": "àio", + "à": "op", + # line 2 + "a": "zéq", + "z": "aesq", + "e": "zrds", + "r": "etfd", + "t": "rygf", + "y": "tuhg", + "u": "yijh", + "i": "uokj", + "o": "iplk", + "p": "oml", + # line 3 + "q": "sawz", + "s": "qdzwxe", + "d": "sfexcr", + "f": "dgrcvt", + "g": "fhtvby", + "h": "gjybnu", + "j": "hkuni", + "k": "jlio", + "l": "kmop", + "m": "lùp", + "ù": "m", + # line 4 + "w": "xqs", + "x": "wcsd", + "c": "xvdf", + "v": "cbfg", + "b": "vngh", + "n": "bhj", + }, + "bépo": { + # fr + # line 2 + "b": "éa", + "é": "bpu", + "p": "éoi", + "o": "pèe", + "è": "o", + "v": "dt", + "d": "vls", + "l": "djr", + "j": "lzn", + "z": "jmw", + # line 3 + "a": "ubà", + "u": "aiéy", + "i": "uepx", + "e": "io", + "c": "t", + "t": "csvq", + "s": "trdg", + "r": "snlh", + "n": "rmjf", + "m": "nzç", + # line 4 + "à": "yêa", + "y": "àxu", + "x": "ywi", + "w": "z", + "k": "c", + "q": "gt", + "g": "qhs", + "h": "gfr", + "f": "hçn", + "ç": "fm", + }, + "colemak": { + # en, us, intl + # line 2 + "q": "wa", + "w": "qfr", + "f": "wps", + "p": "fgt", + "g": "pjd", + "j": "glh", + "l": "jun", + "u": "lye", + "y": "ui", + # line 3 + "a": "rqz", + "r": "aswx", + "s": "rtfc", + "t": "sdpv", + "d": "thgb", + "h": "dnjk", + "n": "helm", + "e": "niu", + "i": "eoy", + "o": "i", + # line 4 + "z": "xa", + "x": "zcr", + "c": "xvs", + "v": "cbt", + "b": "vkd", + "k": "bmh", + "m": "kn", + }, + "dvorak": { + # en, us, intl + # line 2 + "p": "yu", + "y": "pfi", + "f": "ygd", + "g": "fch", + "c": "grt", + "r": "cln", + "l": "rs", + # line 3 + "a": "o", + "o": "aeq", + "e": "ouj", + "u": "eipk", + "i": "udyx", + "d": "ihfb", + "h": "dtgm", + "t": "hncw", + "n": "tsrv", + "s": "nlz", + # line 4 + "q": "jo", + "j": "qke", + "k": "jxu", + "x": "kbi", + "b": "xmd", + "m": "bwh", + "w": "mvt", + "v": "wzn", + "z": "vs", + }, + "qwerty": { + # en, us, intl + # line 2 + "q": "wa", + "w": "qeas", + "e": "wrds", + "r": "etfd", + "t": "rygf", + "y": "tuhg", + "u": "yijh", + "i": "uokj", + "o": "iplk", + "p": "ol", + # line 3 + "a": "sqzw", + "s": "adwzxe", + "d": "sfexcr", + "f": "dgrcvt", + "g": "fhtvby", + "h": "gjybnu", + "j": "hkunmi", + "k": "jlimo", + "l": "kop", + # line 4 + "z": "xas", + "x": "zcsd", + "c": "xvdf", + "v": "cbfg", + "b": "vngh", + "n": "bmhj", + "m": "njk", + }, + "qwertz": { + # ge, au + # line 2 + "q": "wa", + "w": "qeas", + "e": "wrds", + "r": "etfd", + "t": "rzgf", + "z": "tuhg", + "u": "zijh", + "i": "uokj", + "o": "iplk", + "p": "oüöl", + "ü": "päö", + # line 3 + "a": "sqyw", + "s": "adwyxe", + "d": "sfexcr", + "f": "dgrcvt", + "g": "fhtvbz", + "h": "gjzbnu", + "j": "hkunmi", + "k": "jlimo", + "l": "köop", + "ö": "läpü", + "ä": "öü", + # line 4 + "y": "xas", + "x": "ycsd", + "c": "xvdf", + "v": "cbfg", + "b": "vngh", + "n": "bmhj", + "m": "njk", + } +}