Grammalecte  Diff

Differences From Artifact [bda5a789eb]:

To Artifact [d672255b46]:


11
12
13
14
15
16
17

18
19
20
21
22
23
24
import re
from functools import wraps
import time
import json
import binascii
import importlib
from collections import OrderedDict


#import logging
#logging.basicConfig(filename="suggestions.log", level=logging.DEBUG)

from . import str_transform as st
from . import char_player as cp
from .echo import echo







>







11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import re
from functools import wraps
import time
import json
import binascii
import importlib
from collections import OrderedDict
from math import floor

#import logging
#logging.basicConfig(filename="suggestions.log", level=logging.DEBUG)

from . import str_transform as st
from . import char_player as cp
from .echo import echo
36
37
38
39
40
41
42
43
44
45
46
47

48
49
50





51
52
53
54
55
56
57
58
59
60

61

62

63

64

65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88




89
90
91
92
93
94
95
96
97
98
99
100
101
102
        return result
    return wrapper


class SuggResult:
    """Structure for storing, classifying and filtering suggestions"""

    def __init__ (self, sWord, nDistLimit=-1):
        self.sWord = sWord
        self.sSimplifiedWord = st.simplifyWord(sWord)
        self.nDistLimit = nDistLimit  if nDistLimit >= 0  else  (len(sWord) // 3) + 1
        self.nMinDist = 1000

        self.aSugg = set()
        self.dSugg = { 0: [],  1: [],  2: [] }
        self.aAllSugg = set()       # all found words even those refused






    def addSugg (self, sSugg, nDeep=0):
        "add a suggestion"
        #logging.info((nDeep * "  ") + "__" + sSugg + "__")
        if sSugg in self.aAllSugg:
            return
        self.aAllSugg.add(sSugg)
        if sSugg not in self.aSugg:
            #nDist = min(st.distanceDamerauLevenshtein(self.sWord, sSugg), st.distanceDamerauLevenshtein(self.sSimplifiedWord, st.simplifyWord(sSugg)))
            nDist = int(st.distanceDamerauLevenshtein(self.sSimplifiedWord, st.simplifyWord(sSugg)))

            #logging.info((nDeep * "  ") + "__" + sSugg + "__ :" + self.sSimplifiedWord +"|"+ st.simplifyWord(sSugg) +" -> "+ str(nDist))

            if nDist <= self.nDistLimit:

                if " " in sSugg:

                    nDist += 1

                if nDist not in self.dSugg:
                    self.dSugg[nDist] = []
                self.dSugg[nDist].append(sSugg)
                self.aSugg.add(sSugg)
                if nDist < self.nMinDist:
                    self.nMinDist = nDist
                self.nDistLimit = min(self.nDistLimit, self.nMinDist+1)

    def getSuggestions (self, nSuggLimit=10):
        "return a list of suggestions"
        # we sort the better results with the original word
        lRes = []
        bFirstListSorted = False
        for nDist, lSugg in self.dSugg.items():
            if nDist > self.nDistLimit:
                break
            if not bFirstListSorted and len(lSugg) > 1:
                lSugg.sort(key=lambda sSugg: st.distanceDamerauLevenshtein(self.sWord, sSugg))
                bFirstListSorted = True
            #print(nDist, "|".join(lSugg))
            #for sSugg in lSugg:
            #    print(sSugg, st.distanceDamerauLevenshtein(self.sWord, sSugg))
            lRes.extend(lSugg)
            if len(lRes) > nSuggLimit:




                break
        if self.sWord.isupper():
            lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg.upper(), lRes))) # use dict, when Python 3.6+
        elif self.sWord[0:1].isupper():
            # dont’ use <.istitle>
            lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes))) # use dict, when Python 3.6+
        return lRes[:nSuggLimit]

    def reset (self):
        "clear data"
        self.aSugg.clear()
        self.dSugg.clear()









|




>
|
|
|
>
>
>
>
>







<
|
|
>
|
>
|
>
|
>
|
>
|
|
|
|
|
|
|

|



<
<
<
<
|
|
|
<
|
<
|
|
>
>
>
>
|





|







37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64

65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87




88
89
90

91

92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
        return result
    return wrapper


class SuggResult:
    """Structure for storing, classifying and filtering suggestions"""

    def __init__ (self, sWord, nSuggLimit=10, nDistLimit=-1):
        self.sWord = sWord
        self.sSimplifiedWord = st.simplifyWord(sWord)
        self.nDistLimit = nDistLimit  if nDistLimit >= 0  else  (len(sWord) // 3) + 1
        self.nMinDist = 1000
        # Temporary sets
        self.aAllSugg = set()   # All suggestions, even the one rejected
        self.dGoodSugg = {}     # Acceptable suggestions
        self.dBestSugg = {}     # Best suggestions
        # Parameters
        self.nSuggLimit = nSuggLimit
        self.nSuggLimitExt = nSuggLimit + 2             # we add few entries in case suggestions merge after casing modifications
        self.nBestSuggLimit = floor(nSuggLimit * 1.5)   # n times the requested limit
        self.nGoodSuggLimit = nSuggLimit * 15           # n times the requested limit

    def addSugg (self, sSugg, nDeep=0):
        "add a suggestion"
        #logging.info((nDeep * "  ") + "__" + sSugg + "__")
        if sSugg in self.aAllSugg:
            return
        self.aAllSugg.add(sSugg)

        nDistJaro = 1 - st.distanceJaroWinkler(self.sSimplifiedWord, st.simplifyWord(sSugg))
        nDist = floor(nDistJaro * 10)
        if nDistJaro < .11:     # Best suggestions
            self.dBestSugg[sSugg] = round(nDistJaro*1000)
            if len(self.dBestSugg) > self.nBestSuggLimit:
                self.nDistLimit = -1  # make suggest() to end search
        elif nDistJaro < .33:   # Good suggestions
            self.dGoodSugg[sSugg] = round(nDistJaro*1000)
            if len(self.dGoodSugg) > self.nGoodSuggLimit:
                self.nDistLimit = -1  # make suggest() to end search
        else:
            if nDist < self.nMinDist:
                self.nMinDist = nDist
            self.nDistLimit = min(self.nDistLimit, self.nMinDist)
        if nDist <= self.nDistLimit:
            if nDist < self.nMinDist:
                self.nMinDist = nDist
            self.nDistLimit = min(self.nDistLimit, self.nMinDist+1)

    def getSuggestions (self):
        "return a list of suggestions"
        # we sort the better results with the original word
        lRes = []




        if len(self.dBestSugg) > 0:
            # sort only with simplified words
            lResTmp = sorted(self.dBestSugg.items(), key=lambda x: x[1])

            for i in range(min(self.nSuggLimitExt, len(lResTmp))):

                lRes.append(lResTmp[i][0])
        if len(lRes) < self.nSuggLimitExt:
            # sort with simplified words and original word
            lResTmp = sorted(self.dGoodSugg.items(), key=lambda x: ((1-st.distanceJaroWinkler(self.sWord, x[0]))*10, x[1]))
            for i in range(min(self.nSuggLimitExt, len(lResTmp))):
                lRes.append(lResTmp[i][0])
        # casing
        if self.sWord.isupper():
            lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg.upper(), lRes))) # use dict, when Python 3.6+
        elif self.sWord[0:1].isupper():
            # dont’ use <.istitle>
            lRes = list(OrderedDict.fromkeys(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes))) # use dict, when Python 3.6+
        return lRes[:self.nSuggLimit]

    def reset (self):
        "clear data"
        self.aSugg.clear()
        self.dSugg.clear()


318
319
320
321
322
323
324
325

326
327
328
329
330
331
332
333
334
335
336
337
        sSfx = ""
        if self.lexicographer:
            sPfx, sWord, sSfx = self.lexicographer.split(sWord)
        nMaxSwitch = max(len(sWord) // 3, 1)
        nMaxDel = len(sWord) // 5
        nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
        nMaxJump = max(len(sWord) // 4, 1)
        oSuggResult = SuggResult(sWord)

        if bSplitTrailingNumbers:
            self._splitTrailingNumbers(oSuggResult, sWord)
        self._splitSuggest(oSuggResult, sWord)
        self._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump)
        aSugg = oSuggResult.getSuggestions(nSuggLimit)
        if self.lexicographer:
            aSugg = self.lexicographer.filterSugg(aSugg)
        if sSfx or sPfx:
            # we add what we removed
            return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
        return aSugg








|
>




|







327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
        sSfx = ""
        if self.lexicographer:
            sPfx, sWord, sSfx = self.lexicographer.split(sWord)
        nMaxSwitch = max(len(sWord) // 3, 1)
        nMaxDel = len(sWord) // 5
        nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
        nMaxJump = max(len(sWord) // 4, 1)
        oSuggResult = SuggResult(sWord, nSuggLimit)
        sWord = st.cleanWord(sWord)
        if bSplitTrailingNumbers:
            self._splitTrailingNumbers(oSuggResult, sWord)
        self._splitSuggest(oSuggResult, sWord)
        self._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump)
        aSugg = oSuggResult.getSuggestions()
        if self.lexicographer:
            aSugg = self.lexicographer.filterSugg(aSugg)
        if sSfx or sPfx:
            # we add what we removed
            return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
        return aSugg