Index: graphspell-js/char_player.js ================================================================== --- graphspell-js/char_player.js +++ graphspell-js/char_player.js @@ -27,11 +27,12 @@ ['ä', 'a'], ['ê', 'e'], ['í', 'i'], ['ó', 'o'], ['ü', 'u'], ['ý', 'i'], ['á', 'a'], ['ë', 'e'], ['ì', 'i'], ['ò', 'o'], ['ú', 'u'], ['ỳ', 'i'], ['ā', 'a'], ['ē', 'e'], ['ī', 'i'], ['ō', 'o'], ['ū', 'u'], ['ȳ', 'i'], ['ç', 'c'], ['ñ', 'n'], ['k', 'q'], ['w', 'v'], ['œ', 'oe'], ['æ', 'ae'], - ['ſ', 's'], ['ffi', 'ffi'], ['ffl', 'ffl'], ['ff', 'ff'], ['ſt', 'ft'], ['fi', 'fi'], ['fl', 'fl'], ['st', 'st'] + ['ſ', 's'], ['ffi', 'ffi'], ['ffl', 'ffl'], ['ff', 'ff'], ['ſt', 'ft'], ['fi', 'fi'], ['fl', 'fl'], ['st', 'st'], + ["⁰", "0"], ["¹", "1"], ["²", "2"], ["³", "3"], ["⁴", "4"], ["⁵", "5"], ["⁶", "6"], ["⁷", "7"], ["⁸", "8"], ["⁹", "9"] ]), simplifyWord: function (sWord) { // word simplication before calculating distance between words sWord = sWord.toLowerCase(); @@ -44,10 +45,22 @@ } i++; } return sNewWord.replace(/eau/g, "o").replace(/au/g, "o").replace(/ai/g, "ẽ").replace(/ei/g, "ẽ").replace(/ph/g, "f"); }, + + _xTransNumbersToExponent: new Map([ + ["0", "⁰"], ["1", "¹"], ["2", "²"], ["3", "³"], ["4", "⁴"], ["5", "⁵"], ["6", "⁶"], ["7", "⁷"], ["8", "⁸"], ["9", "⁹"] + ]), + + numbersToExponent: function (sWord) { + let sNewWord = ""; + for (let c of sWord) { + sNewWord += this._xTransNumbersToExponent.gl_get(c, c); + } + return sNewWord; + }, aVowel: new Set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ"), aConsonant: new Set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ"), aDouble: new Set("bcdfjklmnprstzBCDFJKLMNPRSTZ"), // letters that may be used twice successively Index: graphspell-js/ibdawg.js ================================================================== --- graphspell-js/ibdawg.js +++ graphspell-js/ibdawg.js @@ -343,10 +343,16 @@ //console.timeEnd("Suggestions for " + sWord); return aSugg; } _splitSuggest (oSuggResult, sWord) { + // split trailing numbers + let m = /^([a-zA-Zà-öÀ-Ö_ø-ÿØ-ßĀ-ʯfi-st][a-zA-Zà-öÀ-Ö_ø-ÿØ-ßĀ-ʯfi-st-]+)([0-9]+)$/.exec(sWord); + if (m) { + oSuggResult.addSugg(m[1] + " " + char_player.numbersToExponent(m[2])); + } + // split at apostrophes for (let cSplitter of "'’") { if (sWord.includes(cSplitter)) { let [sWord1, sWord2] = sWord.split(cSplitter, 2); if (this.isValid(sWord1) && this.isValid(sWord2)) { oSuggResult.addSugg(sWord1+" "+sWord2); Index: graphspell/char_player.py ================================================================== --- graphspell/char_player.py +++ graphspell/char_player.py @@ -23,10 +23,11 @@ 'á': 'a', 'ë': 'e', 'ì': 'i', 'ò': 'o', 'ú': 'u', 'ỳ': 'i', 'ā': 'a', 'ē': 'e', 'ī': 'i', 'ō': 'o', 'ū': 'u', 'ȳ': 'i', 'ç': 'c', 'ñ': 'n', 'k': 'q', 'w': 'v', 'œ': 'oe', 'æ': 'ae', 'ſ': 's', 'ffi': 'ffi', 'ffl': 'ffl', 'ff': 'ff', 'ſt': 'ft', 'fi': 'fi', 'fl': 'fl', 'st': 'st', + "⁰": "0", "¹": "1", "²": "2", "³": "3", "⁴": "4", "⁵": "5", "⁶": "6", "⁷": "7", "⁸": "8", "⁹": "9" }) def simplifyWord (sWord): "word simplication before calculating distance between words" sWord = sWord.lower().translate(_xTransCharsForSimplification) @@ -34,10 +35,18 @@ for i, c in enumerate(sWord, 1): if c == 'e' or c != sWord[i:i+1]: # exception for to avoid confusion between crée / créai sNewWord += c return sNewWord.replace("eau", "o").replace("au", "o").replace("ai", "ẽ").replace("ei", "ẽ").replace("ph", "f") + +_xTransNumbersToExponent = str.maketrans({ + "0": "⁰", "1": "¹", "2": "²", "3": "³", "4": "⁴", "5": "⁵", "6": "⁶", "7": "⁷", "8": "⁸", "9": "⁹" +}) + +def numbersToExponent (sWord): + return sWord.translate(_xTransNumbersToExponent) + aVowel = set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ") aConsonant = set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ") aDouble = set("bcdfjklmnprstzBCDFJKLMNPRSTZ") # letters that may be used twice successively Index: graphspell/ibdawg.py ================================================================== --- graphspell/ibdawg.py +++ graphspell/ibdawg.py @@ -312,10 +312,16 @@ # we add what we removed return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg)) return aSugg def _splitSuggest (self, oSuggResult, sWord): + # split trailing numbers + m = re.match(r"(\D+)([0-9]+)$", sWord) + if m: + print("ok") + oSuggResult.addSugg(m.group(1) + " " + cp.numbersToExponent(m.group(2))) + # split at apostrophes for cSplitter in "'’": if cSplitter in sWord: sWord1, sWord2 = sWord.split(cSplitter, 1) if self.isValid(sWord1) and self.isValid(sWord2): oSuggResult.addSugg(sWord1+" "+sWord2)