Grammalecte  Check-in [4414e62e0b]

Overview
Comment:[graphspell] code consistency
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | graphspell
Files: files | file ages | folders
SHA3-256: 4414e62e0be60582dd2d0e9b415093257d721843e17bd1958e11a68eb9a64e1e
User & Date: olr on 2018-02-05 10:34:21
Original Comment: [graphspell] str_transform: code clarification
Other Links: manifest | tags
Context
2018-02-06
10:22
[graphspell][py] dawg: code clarification check-in: b181054462 user: olr tags: trunk, graphspell
2018-02-05
10:34
[graphspell] code consistency check-in: 4414e62e0b user: olr tags: trunk, graphspell
2018-02-04
11:43
[fr] faux positif: syntagme adjectival check-in: d03269c260 user: olr tags: trunk, fr
Changes

Modified graphspell-js/dawg.js from [b32ed20eaa] to [fa338df471].

57
58
59
60
61
62
63
64

65
66
67
68
69
70
71
57
58
59
60
61
62
63

64
65
66
67
68
69
70
71







-
+







                    dChar.set(c, nChar);
                    lChar.push(c);
                    nChar += 1;
                }
                dCharOccur.set(c, dCharOccur.gl_get(c, 0) + 1);
            }
            // affixes to find stem from flexion
            sAff = funcStemmingGen(sFlex, sStem);
            let sAff = funcStemmingGen(sFlex, sStem);
            if (!dAff.get(sAff)) {
                dAff.set(sAff, nAff);
                lAff.push(sAff);
                nAff += 1;
            }
            dAffOccur.set(sAff, dCharOccur.gl_get(sAff, 0) + 1);
            // tags
97
98
99
100
101
102
103
104

105
106
107
108
109
110
111
97
98
99
100
101
102
103

104
105
106
107
108
109
110
111







-
+







        lEntry.length = 0; // clear the array
        
        // Dictionary of arc values occurrency, to sort arcs of each node
        let lKeyVal = [];
        for (let c of dChar.keys()) { lKeyVal.push([dChar[c], dCharOccur[c]]); }
        for (let sAff of dAff.keys()) { lKeyVal.push([dAff[sAff]+nChar, dAffOccur[sAff]]); }
        for (let sTag in dTag.keys()) { lKeyVal.push([dTag[sTag]+nChar+nAff, dTagOccur[sTag]]); }
        dValOccur = new Map(lKeyVal);
        let dValOccur = new Map(lKeyVal);
        lKeyVal.length = 0; // clear the array

        //with open(spfSrc[:-8]+".valuesfreq.txt", 'w', encoding='utf-8') as hFreqDst:  # DEBUG
        //    for iKey, nOcc in sorted(dValOccur.entries(), key=lambda t: t[1], reverse=True):
        //        hFreqDst.write("{}: {}\n".format(lVal[iKey], nOcc))
        //    hFreqDst.close()
        
238
239
240
241
242
243
244
245
246


247
248
249
250
251
252
253
238
239
240
241
242
243
244


245
246
247
248
249
250
251
252
253







-
-
+
+







    }
    
    _parseNodes (oNode) {
        // Warning: recursive method
        if (oNode.pos > 0) {
            return;
        }
        oNode.setPos();
        this.lSortedNodes.append(oNode);
        //oNode.setPos();  // version 2
        this.lSortedNodes.push(oNode);
        for (let oNextNode of oNode.arcs.values()) {
             this._parseNodes(oNextNode);
        }
    }
        
    lookup (sWord) {
        let oNode = this.oRoot;

Modified graphspell-js/ibdawg.js from [222e0edc2a] to [5f5404403f].

120
121
122
123
124
125
126
127

128
129

130
131
132
133
134
135
136
120
121
122
123
124
125
126

127
128

129
130
131
132
133
134
135
136







-
+

-
+







        }
        // <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
        this.dChar = helpers.objectToMap(this.dChar);
        this.dCharVal = this.dChar.gl_reverse();
        //this.byDic = new Uint8Array(this.byDic);  // not quicker, even slower

        if (this.cStemming == "S") {
            this.funcStemming = str_transform.getStemFromSuffixCode;
            this.funcStemming = str_transform.changeWordWithSuffixCode;
        } else if (this.cStemming == "A") {
            this.funcStemming = str_transform.getStemFromAffixCode;
            this.funcStemming = str_transform.changeWordWithAffixCode;
        } else {
            this.funcStemming = str_transform.noStemming;
        }

        // Configuring DAWG functions according to nVersion
        switch (this.nVersion) {
            case 1:

Modified graphspell-js/str_transform.js from [e440be0f51] to [047ef79ee5].

1
2
3



4
5
6






































7
8
9
10
11
12
13
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54



+
+
+



+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







//// STRING TRANSFORMATION
/*jslint esversion: 6*/

"use strict";


// Note: 48 is the ASCII code for "0"

var str_transform = {

    longestCommonSubstring: function (string1, string2) {
        // https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Longest_common_substring
        // untested

        // init max value
        let longestCommonSubstring = 0;
        // init 2D array with 0
        let table = [],
            len1 = string1.length,
            len2 = string2.length,
            row, col;
        for (row = 0; row <= len1; row++) {
            table[row] = [];
            for (col = 0; col <= len2; col++) {
                table[row][col] = 0;
            }
        }
        // fill table
        let i, j;
        for (i = 0;  i < len1;  i++) {
            for (j = 0;  j < len2;  j++) {
                if (string1[i] === string2[j]) {
                    if (table[i][j] === 0){
                        table[i+1][j+1] = 1;
                    } else {
                        table[i+1][j+1] = table[i][j] + 1;
                    }
                    if (table[i+1][j+1] > longestCommonSubstring) {
                        longestCommonSubstring = table[i+1][j+1];
                    }
                } else {
                    table[i+1][j+1] = 0;
                }
            }
        }
        return longestCommonSubstring;
    },

    distanceDamerauLevenshtein2: function (s1, s2) {
        // distance of Damerau-Levenshtein between <s1> and <s2>
        // https://fr.wikipedia.org/wiki/Distance_de_Damerau-Levenshtein
        try {
            let nLen1 = s1.length;
            let nLen2 = s2.length;
113
114
115
116
117
118
119
120

121
122
123

124
125

126
127
128
129


































130
131

132
133
134

135
136
137
138


139
140
141
142
143
144
145
146
154
155
156
157
158
159
160

161

162

163
164

165
166
167


168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202

203
204
205

206
207
208


209
210
211
212
213
214
215
216
217
218







-
+
-

-
+

-
+


-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
+


-
+


-
-
+
+








                break;
            }
            jSfx += 1;
        }
        return String.fromCharCode(sFlex.length-jSfx+48) + sStem.slice(jSfx);
    },

    getStemFromSuffixCode: function (sFlex, sSfxCode) {
    changeWordWithSuffixCode: function (sWord, sSfxCode) {
        // Suffix only
        if (sSfxCode == "0") {
            return sFlex;
            return sWord;
        }
        return sSfxCode[0] == '0' ? sFlex + sSfxCode.slice(1) : sFlex.slice(0, -(sSfxCode.charCodeAt(0)-48)) + sSfxCode.slice(1);
        return sSfxCode[0] == '0' ? sWord + sSfxCode.slice(1) : sWord.slice(0, -(sSfxCode.charCodeAt(0)-48)) + sSfxCode.slice(1);
    },
    
    getStemFromAffixCode: function (sFlex, sAffCode) {
        // Prefix and suffix
    // Prefix and suffix
    defineAffixCode: function (sFlex, sStem) {
        /*
            UNTESTED!
            Returns a string defining how to get stem from flexion. Examples:
                "0" if stem = flexion
                "stem" if no common substring
                "n(pfx)/m(sfx)"
            with n and m: chars with numeric meaning, "0" = 0, "1" = 1, ... ":" = 10, etc. (See ASCII table.) Says how many letters to strip from flexion.
                pfx [optional]: string to add before the flexion 
                sfx [optional]: string to add after the flexion
        */
        if (sFlex == sStem) {
            return "0";
        }
        // is stem a substring of flexion?
        let n = sFlex.indexOf(sStem);
        if (n >= 0) {
            return String.fromCharCode(n+48) + "/" + String.fromCharCode(sFlex.length-(sStem.length+n)+48);
        }
        // no, so we are looking for common substring
        let sSubs = this.longestCommonSubstring(sFlex, sStem);
        if (sSubs.length > 1) {
            let iPos = sStem.indexOf(sSubs);
            let sPfx = sStem.slice(0, iPos);
            let sSfx = sStem.slice(iPos+sSubs.length);
            let n = sFlex.indexOf(sSubs);
            let m = sFlex.length - (sSubs.length+n);
            return String.fromCharCode(n+48) + sPfx + "/" + String.fromCharCode(m+48) + sSfx;
        }
        return sStem;
    },

    changeWordWithAffixCode: function (sWord, sAffCode) {
        if (sAffCode == "0") {
            return sFlex;
            return sWord;
        }
        if (!sAffCode.includes("/")) {
            return "# error #";
            return sAffCode;
        }
        let [sPfxCode, sSfxCode] = sAffCode.split('/');
        sFlex = sPfxCode.slice(1) + sFlex.slice(sPfxCode.charCodeAt(0)-48);
        return sSfxCode[0] == '0' ? sFlex + sSfxCode.slice(1) : sFlex.slice(0, -(sSfxCode.charCodeAt(0)-48)) + sSfxCode.slice(1);
        sWord = sPfxCode.slice(1) + sWord.slice(sPfxCode.charCodeAt(0)-48);
        return sSfxCode[0] == '0' ? sWord + sSfxCode.slice(1) : sWord.slice(0, -(sSfxCode.charCodeAt(0)-48)) + sSfxCode.slice(1);
    }
};


if (typeof(exports) !== 'undefined') {
    exports.getStemFromSuffixCode = str_transform.getStemFromSuffixCode;
    exports.getStemFromAffixCode = str_transform.getStemFromAffixCode;
}

Modified graphspell/str_transform.py from [646bc07929] to [9961c8cbc8].

182
183
184
185
186
187
188
189
190
191

192
193
194
195
196
197
198
199

200
201
202
203
182
183
184
185
186
187
188



189
190
191
192
193
194
195
196

197
198
199
200
201







-
-
-
+







-
+




    sSubs = longestCommonSubstring(sFlex, sStem)
    if len(sSubs) > 1:
        iPos = sStem.find(sSubs)
        sPfx = sStem[:iPos]
        sSfx = sStem[iPos+len(sSubs):]
        n = sFlex.find(sSubs)
        m = len(sFlex) - (len(sSubs)+n)
        sAff = "{}/".format(chr(n+48))  if not sPfx  else "{}{}/".format(chr(n+48), sPfx)
        sAff += chr(m+48)  if not sSfx  else "{}{}".format(chr(m+48), sSfx)
        return sAff
        return chr(n+48) + sPfx + "/" + chr(m+48) + sSfx
    return sStem


def changeWordWithAffixCode (sWord, sAffCode):
    if sAffCode == "0":
        return sWord
    if '/' not in sAffCode:
        return "# error #"
        return sAffCode
    sPfxCode, sSfxCode = sAffCode.split('/')
    sWord = sPfxCode[1:] + sWord[(ord(sPfxCode[0])-48):] 
    return sWord[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:]  if sSfxCode[0] != '0'  else sWord + sSfxCode[1:]