1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
// JavaScript
// FSA DICTIONARY BUILDER
//
// by Olivier R.
// License: MPL 2
//
// This tool encodes lexicon into an indexable binary dictionary
// Input files MUST be encoded in UTF-8.
"use strict";
if (typeof(require) !== 'undefined') {
var str_transform = require("resource://grammalecte/graphspell/str_transform.js");
}
${map}
class DAWG {
/* DIRECT ACYCLIC WORD GRAPH
This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)
|
>
>
>
>
|
>
|
|
<
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
// JavaScript
// FSA DICTIONARY BUILDER
//
// by Olivier R.
// License: MPL 2
//
// This tool encodes lexicon into an indexable binary dictionary
// Input files MUST be encoded in UTF-8.
/* jshint esversion:6, -W097 */
/* jslint esversion:6 */
/* global require, exports, console, helpers */
"use strict";
if(typeof process !== 'undefined') {
var str_transform = require('./str_transform.js');
} else if (typeof require !== 'undefined') {
var str_transform = require('resource://grammalecte/graphspell/str_transform.js');
}
${map}
class DAWG {
/* DIRECT ACYCLIC WORD GRAPH
This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)
|
︙ | | | ︙ | |
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
let lWord = [];
for (let [sFlex, iAff, iTag] of lEntry) {
let lTemp = [];
for (let c of sFlex) {
lTemp.push(dChar.get(c));
}
lTemp.push(iAff+nChar);
lTemp.push(iTag+nChar+nAff)
lWord.push(lTemp);
}
lEntry.length = 0; // clear the array
// Dictionary of arc values occurrency, to sort arcs of each node
let lKeyVal = [];
for (let c of dChar.keys()) { lKeyVal.push([dChar.get(c), dCharOccur.get(c)]); }
|
|
|
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
let lWord = [];
for (let [sFlex, iAff, iTag] of lEntry) {
let lTemp = [];
for (let c of sFlex) {
lTemp.push(dChar.get(c));
}
lTemp.push(iAff+nChar);
lTemp.push(iTag+nChar+nAff);
lWord.push(lTemp);
}
lEntry.length = 0; // clear the array
// Dictionary of arc values occurrency, to sort arcs of each node
let lKeyVal = [];
for (let c of dChar.keys()) { lKeyVal.push([dChar.get(c), dCharOccur.get(c)]); }
|
︙ | | | ︙ | |
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
|
this.nNextId += 1;
return this.nNextId-1;
},
reset: function () {
this.nNextId = 0;
}
}
class DawgNode {
constructor () {
this.i = oNodeCounter.getId();
this.final = false;
|
|
|
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
|
this.nNextId += 1;
return this.nNextId-1;
},
reset: function () {
this.nNextId = 0;
}
};
class DawgNode {
constructor () {
this.i = oNodeCounter.getId();
this.final = false;
|
︙ | | | ︙ | |
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
|
convValueToHexString (nVal, nByte) {
// nVal: value to convert, nByte: number of bytes
let sHexVal = nVal.toString(16); // conversion to hexadecimal string
//console.log(`value: ${nVal} in ${nByte} bytes`);
if (sHexVal.length < (nByte*2)) {
return "0".repeat((nByte*2) - sHexVal.length) + sHexVal;
} else if (sHexVal.length == (nByte*2)) {
return sHexVal
} else {
throw "Conversion to byte string: value bigger than allowed.";
}
}
}
|
|
|
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
|
convValueToHexString (nVal, nByte) {
// nVal: value to convert, nByte: number of bytes
let sHexVal = nVal.toString(16); // conversion to hexadecimal string
//console.log(`value: ${nVal} in ${nByte} bytes`);
if (sHexVal.length < (nByte*2)) {
return "0".repeat((nByte*2) - sHexVal.length) + sHexVal;
} else if (sHexVal.length == (nByte*2)) {
return sHexVal;
} else {
throw "Conversion to byte string: value bigger than allowed.";
}
}
}
|
︙ | | | ︙ | |