226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
|
class Lexicographe {
constructor (oDict, oTokenizer, oLocGraph) {
this.oDict = oDict;
this.oTokenizer = oTokenizer;
this.oLocGraph = JSON.parse(oLocGraph);
this._zCompoundWord = new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)-((?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts][’'](?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous)$", "i");
this._zTag = new RegExp("[:;/][a-zA-Z0-9ÑÂĴĈŔÔṼŴ!][^:;/]*", "g");
}
getInfoForToken (oToken) {
// Token: .sType, .sValue, .nStart, .nEnd
// return a object {sType, sValue, aLabel}
let m = null;
|
|
|
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
|
class Lexicographe {
constructor (oDict, oTokenizer, oLocGraph) {
this.oDict = oDict;
this.oTokenizer = oTokenizer;
this.oLocGraph = JSON.parse(oLocGraph);
this._zInterroVerb = new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)-((?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts][’'](?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous)$", "i");
this._zTag = new RegExp("[:;/][a-zA-Z0-9ÑÂĴĈŔÔṼŴ!][^:;/]*", "g");
}
getInfoForToken (oToken) {
// Token: .sType, .sValue, .nStart, .nEnd
// return a object {sType, sValue, aLabel}
let m = null;
|
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
|
case 'WORD':
if (oToken.sValue.gl_count("-") > 4) {
return {
sType: "COMPLEX",
sValue: oToken.sValue,
aLabel: ["élément complexe indéterminé"]
};
} else if (m = this._zCompoundWord.exec(oToken.sValue)) {
// mots composés
let lMorph = this.oDict.getMorph(m[1]);
let aElem = [];
for (let s of lMorph) {
if (s.includes(":")) aElem.push(this._formatTags(s));
}
aElem.push("-" + m[2] + ": " + this._formatSuffix(m[2].toLowerCase()));
return {
sType: oToken.sType,
sValue: oToken.sValue,
aLabel: aElem
};
} else if (this.oDict.isValidToken(oToken.sValue)) {
let lMorph = this.oDict.getMorph(oToken.sValue);
let aElem = [];
for (let s of lMorph) {
if (s.includes(":")) aElem.push(this._formatTags(s));
}
|
|
<
|
>
>
>
>
|
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
|
case 'WORD':
if (oToken.sValue.gl_count("-") > 4) {
return {
sType: "COMPLEX",
sValue: oToken.sValue,
aLabel: ["élément complexe indéterminé"]
};
} else if (m = this._zInterroVerb.exec(oToken.sValue)) {
// mots composés
let lMorph = this.oDict.getMorph(m[1]);
let aElem = [];
for (let s of lMorph) {
if (s.includes(":")) aElem.push(this._formatTags(s));
}
return {
sType: oToken.sType,
sValue: oToken.sValue,
aLabel: ["forme verbale interrogative"],
aSubElem: [
{ sType: oToken.sType, sValue: m[1], aLabel: aElem },
{ sType: oToken.sType, sValue: "-" + m[2], aLabel: [this._formatSuffix(m[2].toLowerCase())] }
]
};
} else if (this.oDict.isValidToken(oToken.sValue)) {
let lMorph = this.oDict.getMorph(oToken.sValue);
let aElem = [];
for (let s of lMorph) {
if (s.includes(":")) aElem.push(this._formatTags(s));
}
|