Grammalecte  Check-in [f4870dbaa5]

Overview
Comment:[graphspell] check if custom dictionaries have l2grams attributes
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | graphspell
Files: files | file ages | folders
SHA3-256: f4870dbaa5d29f14dfac5ea9c617a1b7498d0a8050bf0beedc503193c06eb135
User & Date: olr on 2018-10-18 11:12:08
Other Links: manifest | tags
Context
2018-10-20
09:41
[fr] faux positif avec voir, + confusion air/aire/ère check-in: 5f374a824a user: olr tags: trunk, fr
2018-10-18
11:12
[graphspell] check if custom dictionaries have l2grams attributes check-in: f4870dbaa5 user: olr tags: trunk, graphspell
11:01
[fr] nr: issue check-in: 98edfb0735 user: olr tags: trunk, fr
Changes

Modified graphspell-js/ibdawg.js from [820a95ea91] to [edb211270b].

144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
        }
        if (!(this.nCompressionMethod == 1 || this.nCompressionMethod == 2 || this.nCompressionMethod == 3)) {
            throw RangeError("# Error. Unknown dictionary compression method: " + this.nCompressionMethod);
        }
        // <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
        this.dChar = helpers.objectToMap(this.dChar);
        this.dCharVal = this.dChar.gl_reverse();
        this.a2grams = new Set(this.l2grams);

        if (this.cStemming == "S") {
            this.funcStemming = str_transform.changeWordWithSuffixCode;
        } else if (this.cStemming == "A") {
            this.funcStemming = str_transform.changeWordWithAffixCode;
        } else {
            this.funcStemming = str_transform.noStemming;







|







144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
        }
        if (!(this.nCompressionMethod == 1 || this.nCompressionMethod == 2 || this.nCompressionMethod == 3)) {
            throw RangeError("# Error. Unknown dictionary compression method: " + this.nCompressionMethod);
        }
        // <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
        this.dChar = helpers.objectToMap(this.dChar);
        this.dCharVal = this.dChar.gl_reverse();
        this.a2grams = (this.l2grams) ? new Set(this.l2grams) : null;

        if (this.cStemming == "S") {
            this.funcStemming = str_transform.changeWordWithSuffixCode;
        } else if (this.cStemming == "A") {
            this.funcStemming = str_transform.changeWordWithAffixCode;
        } else {
            this.funcStemming = str_transform.noStemming;
410
411
412
413
414
415
416



417
418
419
420
421
422
423
            }
        }
    }

    isNgramsOK (sChars) {
        if (sChars.length != 2) {
            return true;



        }
        return this.a2grams.has(sChars);
    }

    * _getCharArcs (iAddr) {
        // generator: yield all chars and addresses from node at address <iAddr>
        for (let [nVal, jAddr] of this._getArcs(iAddr)) {







>
>
>







410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
            }
        }
    }

    isNgramsOK (sChars) {
        if (sChars.length != 2) {
            return true;
        }
        if (!this.a2grams) {
            return true;
        }
        return this.a2grams.has(sChars);
    }

    * _getCharArcs (iAddr) {
        // generator: yield all chars and addresses from node at address <iAddr>
        for (let [nVal, jAddr] of this._getArcs(iAddr)) {

Modified graphspell/ibdawg.py from [eceb83c9e7] to [59273fbe60].

186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
        self.nBytesOffset = 1 # version 3

    def _initJSON (self, oJSON):
        "initialize with a JSON text file"
        self.__dict__.update(oJSON)
        self.byDic = binascii.unhexlify(self.sByDic)
        self.dCharVal = { v: k  for k, v in self.dChar.items() }
        self.a2grams = set(self.l2grams)

    def getInfo (self):
        "return string about the IBDAWG"
        return  "  Language: {0.sLangName}   Lang code: {0.sLangCode}   Dictionary name: {0.sDicName}" \
                "  Compression method: {0.nCompressionMethod:>2}   Date: {0.sDate}   Stemming: {0.cStemming}FX\n" \
                "  Arcs values:  {0.nArcVal:>10,} = {0.nChar:>5,} characters,  {0.nAff:>6,} affixes,  {0.nTag:>6,} tags\n" \
                "  Dictionary: {0.nEntry:>12,} entries,    {0.nNode:>11,} nodes,   {0.nArc:>11,} arcs\n" \







|







186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
        self.nBytesOffset = 1 # version 3

    def _initJSON (self, oJSON):
        "initialize with a JSON text file"
        self.__dict__.update(oJSON)
        self.byDic = binascii.unhexlify(self.sByDic)
        self.dCharVal = { v: k  for k, v in self.dChar.items() }
        self.a2grams = set(self.l2grams)  if hasattr(self, 'l2grams')  else None

    def getInfo (self):
        "return string about the IBDAWG"
        return  "  Language: {0.sLangName}   Lang code: {0.sLangCode}   Dictionary name: {0.sDicName}" \
                "  Compression method: {0.nCompressionMethod:>2}   Date: {0.sDate}   Stemming: {0.cStemming}FX\n" \
                "  Arcs values:  {0.nArcVal:>10,} = {0.nChar:>5,} characters,  {0.nAff:>6,} affixes,  {0.nTag:>6,} tags\n" \
                "  Dictionary: {0.nEntry:>12,} entries,    {0.nNode:>11,} nodes,   {0.nArc:>11,} arcs\n" \
354
355
356
357
358
359
360


361
362
363
364
365
366
367
            elif len(sRemain) == 1:
                self._suggest(oSuggResult, "", nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDist, nDeep+1, iAddr, sNewWord, True) # remove last char and go on
                for sRepl in cp.dFinal1.get(sRemain, ()):
                    self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDist, nDeep+1, iAddr, sNewWord, True)

    def isNgramsOK (self, sChars):
        if len(sChars) != 2:


            return True
        return sChars in self.a2grams

    #@timethis
    def suggest2 (self, sWord, nSuggLimit=10):
        "returns a set of suggestions for <sWord>"
        sWord = cp.spellingNormalization(sWord)







>
>







354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
            elif len(sRemain) == 1:
                self._suggest(oSuggResult, "", nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDist, nDeep+1, iAddr, sNewWord, True) # remove last char and go on
                for sRepl in cp.dFinal1.get(sRemain, ()):
                    self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDist, nDeep+1, iAddr, sNewWord, True)

    def isNgramsOK (self, sChars):
        if len(sChars) != 2:
            return True
        if not self.a2grams:
            return True
        return sChars in self.a2grams

    #@timethis
    def suggest2 (self, sWord, nSuggLimit=10):
        "returns a set of suggestions for <sWord>"
        sWord = cp.spellingNormalization(sWord)