Grammalecte  Diff

Differences From Artifact [303901f49a]:

To Artifact [b249dfb9bf]:


52
53
54
55
56
57
58
59

60
61
62
63

64
65
66
67
68
69
70
52
53
54
55
56
57
58

59
60
61
62

63
64
65
66
67
68
69
70







-
+



-
+







        if self.cStemming == "S":
            self.funcStemming = st.changeWordWithSuffixCode
        elif self.cStemming == "A":
            self.funcStemming = st.changeWordWithAffixCode
        else:
            self.funcStemming = st.noStemming
        self.nTag = self.nArcVal - self.nChar - self.nAff
        # <dChar> to get the value of an arc, <dArcVal> to get the char of an arc with its value
        # <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
        self.dChar = {}
        for i in range(1, self.nChar):
            self.dChar[self.lArcVal[i]] = i
        self.dArcVal = { v: k  for k, v in self.dChar.items() }
        self.dCharVal = { v: k  for k, v in self.dChar.items() }
            
        self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1
        self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1)
        self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2)
        self._addrBitMask = 1 << ((self.nBytesArc * 8) - 3)  # version 2

        self.nBytesOffset = 1 # version 3
170
171
172
173
174
175
176









177
178
179
180
181
182
183
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192







+
+
+
+
+
+
+
+
+







        for c in sWord:
            if c not in self.dChar:
                return False
            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
            if iAddr == None:
                return False
        return bool(int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask)

    def getMorph (self, sWord):
        "retrieves morphologies list, different casing allowed"
        l = self.morph(sWord)
        if sWord[0:1].isupper():
            l.extend(self.morph(sWord.lower()))
            if sWord.isupper() and len(sWord) > 1:
                l.extend(self.morph(sWord.capitalize()))
        return l

    def suggest (self, sWord):
        "returns a set of similar words"
        # first, we check for similar words
        #return set(self._suggestWithCrushedUselessChars(cp.clearWord(sWord)))
        lSugg = self._suggest(sWord)
        if not lSugg:
244
245
246
247
248
249
250
251
252


253
254
255
256
257
258
259
260
261
262
263
264
265
266

















267
268
269
270
271
272
273
253
254
255
256
257
258
259


260
261
262
263
264
265
266
267








268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291







-
-
+
+






-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







            show(nDeep, cChar)
            lSugg.extend(self._suggestWithCrushedUselessChars(sWord[1:], nDeep+1, jAddr, sNewWord+cChar))
        return lSugg

    def _getSimilarArcsAndCrushedChars (self, cChar, iAddr):
        "generator: yield similar char of <cChar> and address of the following node"
        for nVal, jAddr in self._getArcs(iAddr):
            if self.dArcVal.get(nVal, "") in cp.aUselessChar:
                yield (self.dArcVal[nVal], jAddr)
            if self.dCharVal.get(nVal, None) in cp.aUselessChar:
                yield (self.dCharVal[nVal], jAddr)
        for c in cp.d1to1.get(cChar, [cChar]):
            if c in self.dChar:
                jAddr = self._lookupArcNode(self.dChar[c], iAddr)
                if jAddr:
                    yield (c, jAddr)

    def getMorph (self, sWord):
        "retrieves morphologies list, different casing allowed"
        l = self.morph(sWord)
        if sWord[0:1].isupper():
            l.extend(self.morph(sWord.lower()))
            if sWord.isupper() and len(sWord) > 1:
                l.extend(self.morph(sWord.capitalize()))
        return l
    def drawPath (self, sWord, iAddr=0):
        if not sWord:
            return
        iPos = -1
        n = 0
        print(sWord[0:1] + ": ", end="")
        for nVal, jAddr in self._getArcs(iAddr):
            if nVal in self.dCharVal:
                print(self.dCharVal[nVal], end="")
                if self.dCharVal[nVal] == sWord[0:1]:
                    iNextNodeAddr = jAddr
                    iPos = n
                n += 1
        if iPos >= 0:
            print("\n   "+ " " * iPos + "|")
            self.drawPath(sWord[1:], iNextNodeAddr)


    # def morph (self, sWord):
    #     is defined in __init__

    # VERSION 1
    def _morph1 (self, sWord):
        "returns morphologies of sWord"