Overview
Comment: | [graphspell][js] ibdawg: code cleaning, remove version 2 and 3, never used |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | graphspell | bdic_opt |
Files: | files | file ages | folders |
SHA3-256: |
b3f2f1d72a96d6299b7277b038e27c2b |
User & Date: | olr on 2020-09-12 12:32:03 |
Other Links: | branch diff | manifest | tags |
Context
2020-09-12
| ||
12:45 | [graphspell][py] ibdawg: code cleaning, remove version 2 and 3, never used check-in: 4fa9631623 user: olr tags: graphspell, bdic_opt | |
12:32 | [graphspell][js] ibdawg: code cleaning, remove version 2 and 3, never used check-in: b3f2f1d72a user: olr tags: graphspell, bdic_opt | |
12:22 | [fr] tests update check-in: 0207fe1b5b user: olr tags: fr, bdic_opt | |
Changes
Modified graphspell-js/ibdawg.js from [1dc2c625a7] to [baafc4cde1].
︙ | ︙ | |||
119 120 121 122 123 124 125 126 127 128 129 130 131 132 | throw Error("# Error. File not found or not loadable.\n" + e.message + "\n"); } /* Properties: sName, nCompressionMethod, sHeader, lArcVal, nArcVal, sByDic, sLang, nChar, nBytesArc, nBytesNodeAddress, nEntry, nNode, nArc, nAff, cStemming, nTag, dChar, nBytesOffset, */ /* Bug workaround. Mozilla’s JS parser sucks. Can’t read file bigger than 4 Mb! So we convert huge hexadecimal string to list of numbers… https://github.com/mozilla/addons-linter/issues/1361 */ | > > > > > > > > > > > > > > > > > > > | 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | throw Error("# Error. File not found or not loadable.\n" + e.message + "\n"); } /* Properties: sName, nCompressionMethod, sHeader, lArcVal, nArcVal, sByDic, sLang, nChar, nBytesArc, nBytesNodeAddress, nEntry, nNode, nArc, nAff, cStemming, nTag, dChar, nBytesOffset, */ if (!(this.sHeader.startsWith("/grammalecte-fsa/") || this.sHeader.startsWith("/pyfsa/"))) { throw TypeError("# Error. Not a grammalecte-fsa binary dictionary. Header: " + this.sHeader); } if (!(this.nCompressionMethod == 1 || this.nCompressionMethod == 2 || this.nCompressionMethod == 3)) { throw RangeError("# Error. Unknown dictionary compression method: " + this.nCompressionMethod); } // <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value this.dChar = helpers.objectToMap(this.dChar); this.dCharVal = this.dChar.gl_reverse(); this.a2grams = (this.l2grams) ? new Set(this.l2grams) : null; if (this.cStemming == "S") { this.funcStemming = str_transform.changeWordWithSuffixCode; } else if (this.cStemming == "A") { this.funcStemming = str_transform.changeWordWithAffixCode; } else { this.funcStemming = str_transform.noStemming; } /* Bug workaround. Mozilla’s JS parser sucks. Can’t read file bigger than 4 Mb! So we convert huge hexadecimal string to list of numbers… https://github.com/mozilla/addons-linter/issues/1361 */ |
︙ | ︙ | |||
151 152 153 154 155 156 157 | nAcc = -1; } nAcc = nAcc + 1; } this.byDic = lTemp; /* end of bug workaround */ | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 | nAcc = -1; } nAcc = nAcc + 1; } this.byDic = lTemp; /* end of bug workaround */ this._arcMask = (2 ** ((this.nBytesArc * 8) - 3)) - 1; this._finalNodeMask = 1 << ((this.nBytesArc * 8) - 1); this._lastArcMask = 1 << ((this.nBytesArc * 8) - 2); //console.log(this.getInfo()); this.bAcronymValid = true; this.bNumAtLastValid = false; // lexicographer module ? this.lexicographer = null; // JS still sucks: we’ll try importation when importation will be available in Workers. Still waiting... |
︙ | ︙ | |||
329 330 331 332 333 334 335 | getMorph (sWord) { // retrieves morphologies list, different casing allowed if (!sWord) { return []; } sWord = str_transform.spellingNormalization(sWord); | | | | | 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 | getMorph (sWord) { // retrieves morphologies list, different casing allowed if (!sWord) { return []; } sWord = str_transform.spellingNormalization(sWord); let l = this._morph(sWord); if (sWord[0].gl_isUpperCase()) { l.push(...this._morph(sWord.toLowerCase())); if (sWord.gl_isUpperCase() && sWord.length > 1) { l.push(...this._morph(sWord.gl_toCapitalize())); } } return l; } suggest (sWord, nSuggLimit=10, bSplitTrailingNumbers=false) { // returns a array of suggestions for <sWord> |
︙ | ︙ | |||
544 545 546 547 548 549 550 | zFlexPattern = (sFlexPattern !== "") ? new RegExp(sFlexPattern) : null; zTagsPattern = (sTagsPattern !== "") ? new RegExp(sTagsPattern) : null; } catch (e) { console.log("Error in regex pattern"); console.log(e.message); } | | < < | | | | | | 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 | zFlexPattern = (sFlexPattern !== "") ? new RegExp(sFlexPattern) : null; zTagsPattern = (sTagsPattern !== "") ? new RegExp(sTagsPattern) : null; } catch (e) { console.log("Error in regex pattern"); console.log(e.message); } yield* this._select(zFlexPattern, zTagsPattern, 0, ""); } * _select (zFlexPattern, zTagsPattern, iAddr, sWord) { // recursive generator for (let [nVal, jAddr] of this._getArcs(iAddr)) { if (nVal <= this.nChar) { // simple character yield* this._select(zFlexPattern, zTagsPattern, jAddr, sWord + this.lArcVal[nVal]); } else { if (!zFlexPattern || zFlexPattern.test(sWord)) { let sStem = this.funcStemming(sWord, this.lArcVal[nVal]); for (let [nMorphVal, _] of this._getArcs(jAddr)) { if (!zTagsPattern || zTagsPattern.test(this.lArcVal[nMorphVal])) { yield [sWord, sStem, this.lArcVal[nMorphVal]]; } } } } } } _morph (sWord) { // returns morphologies of sWord let iAddr = 0; for (let c of sWord) { if (!this.dChar.has(c)) { return []; } iAddr = this._lookupArcNode(this.dChar.get(c), iAddr); |
︙ | ︙ | |||
607 608 609 610 611 612 613 | iAddr = iEndArcAddr + 1; } return l; } return []; } | | | 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 | iAddr = iEndArcAddr + 1; } return l; } return []; } _stem (sWord) { // returns stems list of sWord let iAddr = 0; for (let c of sWord) { if (!this.dChar.has(c)) { return []; } iAddr = this._lookupArcNode(this.dChar.get(c), iAddr); |
︙ | ︙ | |||
637 638 639 640 641 642 643 | iAddr = iEndArcAddr + 1; } return l; } return []; } | | | < < < < < < < < < < < < < < < < < < < < < < < < < < < | 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 | iAddr = iEndArcAddr + 1; } return l; } return []; } _lookupArcNode (nVal, iAddr) { // looks if nVal is an arc at the node at iAddr, if yes, returns address of next node else None while (true) { let iEndArcAddr = iAddr+1; let nRawArc = this.byDic[iAddr]; if (nVal == (nRawArc & this._arcMask)) { // the value we are looking for // we return the address of the next node return this.byDic[iEndArcAddr]; } else { // value not found if (nRawArc & this._lastArcMask) { return null; } iAddr = iEndArcAddr + 1; } } } * _getArcs (iAddr) { // generator: return all arcs at <iAddr> as tuples of (nVal, iAddr) while (true) { let iEndArcAddr = iAddr+1; let nRawArc = this.byDic[iAddr]; yield [nRawArc & this._arcMask, this.byDic[iEndArcAddr]]; if (nRawArc & this._lastArcMask) { break; } iAddr = iEndArcAddr+1; } } } if (typeof(exports) !== 'undefined') { exports.IBDAWG = IBDAWG; } |