Overview
Comment: | [graphspell] handling apostrophes |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | graphspell | rg |
Files: | files | file ages | folders |
SHA3-256: |
764811b5f19783c83bcb50bfd1354151 |
User & Date: | olr on 2018-08-26 14:44:48 |
Other Links: | branch diff | manifest | tags |
Context
2018-08-26
| ||
15:02 | [fr] gendicfr: plus de copie des dictionnaires Hunspell dans les extensions Mozilla check-in: 8d3060486d user: olr tags: fr, rg | |
14:44 | [graphspell] handling apostrophes check-in: 764811b5f1 user: olr tags: graphspell, rg | |
13:41 | [fr] gendicfr: utilise le lemme alternatif s’il existe check-in: a9d1f17b1f user: olr tags: fr, rg | |
Changes
Modified graphspell-js/ibdawg.js from [068f06a16d] to [bd4ff8b3de].
︙ | ︙ | |||
222 223 224 225 226 227 228 | if (this.isValid(sToken)) { return true; } if (sToken.includes("-")) { if (sToken.gl_count("-") > 4) { return true; } | | | | | 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 | if (this.isValid(sToken)) { return true; } if (sToken.includes("-")) { if (sToken.gl_count("-") > 4) { return true; } return sToken.split("-").every(sWord => this.isValid(sWord)); } if (sToken.includes(".") || sToken.includes("·")) { return true; } return false; } isValid (sWord) { // checks if sWord is valid (different casing tested if the first letter is a capital) if (!sWord) { return null; } if (sWord.includes("'")) { // ugly hack sWord = sWord.replace("'", "’"); } if (this.lookup(sWord)) { return true; } if (sWord.charAt(0).gl_isUpperCase()) { if (sWord.length > 1) { if (sWord.gl_isTitle()) { |
︙ | ︙ | |||
480 481 482 483 484 485 486 | if (!zTagsPattern || zTagsPattern.test(this.lArcVal[nMorphVal])) { yield [sWord, sStem, this.lArcVal[nMorphVal]]; } } } } } | | | 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 | if (!zTagsPattern || zTagsPattern.test(this.lArcVal[nMorphVal])) { yield [sWord, sStem, this.lArcVal[nMorphVal]]; } } } } } } _morph1 (sWord) { // returns morphologies of sWord let iAddr = 0; for (let c of sWord) { if (!this.dChar.has(c)) { return []; |
︙ | ︙ | |||
502 503 504 505 506 507 508 | let l = []; let nRawArc = 0; while (!(nRawArc & this._lastArcMask)) { let iEndArcAddr = iAddr + this.nBytesArc; nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr)); let nArc = nRawArc & this._arcMask; if (nArc > this.nChar) { | | | 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 | let l = []; let nRawArc = 0; while (!(nRawArc & this._lastArcMask)) { let iEndArcAddr = iAddr + this.nBytesArc; nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr)); let nArc = nRawArc & this._arcMask; if (nArc > this.nChar) { // This value is not a char, this is a stemming code let sStem = ">" + this.funcStemming(sWord, this.lArcVal[nArc]); // Now , we go to the next node and retrieve all following arcs values, all of them are tags let iAddr2 = this._convBytesToInteger(this.byDic.slice(iEndArcAddr, iEndArcAddr+this.nBytesNodeAddress)); let nRawArc2 = 0; while (!(nRawArc2 & this._lastArcMask)) { let iEndArcAddr2 = iAddr2 + this.nBytesArc; nRawArc2 = this._convBytesToInteger(this.byDic.slice(iAddr2, iEndArcAddr2)); |
︙ | ︙ | |||
541 542 543 544 545 546 547 | let l = []; let nRawArc = 0; while (!(nRawArc & this._lastArcMask)) { let iEndArcAddr = iAddr + this.nBytesArc; nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr)); let nArc = nRawArc & this._arcMask; if (nArc > this.nChar) { | | | | 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 | let l = []; let nRawArc = 0; while (!(nRawArc & this._lastArcMask)) { let iEndArcAddr = iAddr + this.nBytesArc; nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr)); let nArc = nRawArc & this._arcMask; if (nArc > this.nChar) { // This value is not a char, this is a stemming code l.push(this.funcStemming(sWord, this.lArcVal[nArc])); } iAddr = iEndArcAddr + this.nBytesNodeAddress; } return l; } return []; } _lookupArcNode1 (nVal, iAddr) { // looks if nVal is an arc at the node at iAddr, if yes, returns address of next node else None while (true) { let iEndArcAddr = iAddr+this.nBytesArc; let nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr)); if (nVal == (nRawArc & this._arcMask)) { // the value we are looking for // we return the address of the next node return this._convBytesToInteger(this.byDic.slice(iEndArcAddr, iEndArcAddr+this.nBytesNodeAddress)); } else { // value not found if (nRawArc & this._lastArcMask) { return null; |
︙ | ︙ |
Modified graphspell/ibdawg.py from [0f1b5456be] to [15b71c861b].
︙ | ︙ | |||
245 246 247 248 249 250 251 | return True return False def isValid (self, sWord): "checks if <sWord> is valid (different casing tested if the first letter is a capital)" if not sWord: return None | | | | 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 | return True return False def isValid (self, sWord): "checks if <sWord> is valid (different casing tested if the first letter is a capital)" if not sWord: return None if "'" in sWord: # ugly hack sWord = sWord.replace("'", "’") if self.lookup(sWord): return True if sWord[0:1].isupper(): if len(sWord) > 1: if sWord.istitle(): return self.lookup(sWord.lower()) if sWord.isupper(): |
︙ | ︙ |