127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
|
[':s', " singulière"],
[':p', " plurielle"],
[':i', " invariable"],
['/L', " {latin}"]
]);
const _dLocVERB = new Map([
['i', " intransitif"],
['n', " transitif indirect"],
['t', " transitif direct"],
['p', " pronominal"],
['m', " impersonnel"],
]);
const _dPFX = new Map([
['d', "(de), déterminant épicène invariable"],
['l', "(le/la), déterminant masculin/féminin singulier"],
['j', "(je), pronom personnel sujet, 1ʳᵉ pers., épicène singulier"],
['m', "(me), pronom personnel objet, 1ʳᵉ pers., épicène singulier"],
|
|
|
|
|
|
|
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
|
[':s', " singulière"],
[':p', " plurielle"],
[':i', " invariable"],
['/L', " {latin}"]
]);
const _dLocVERB = new Map([
['i', ", intransitif"],
['n', ", transitif indirect"],
['t', ", transitif direct"],
['p', ", pronominal"],
['m', ", impersonnel"],
]);
const _dPFX = new Map([
['d', "(de), déterminant épicène invariable"],
['l', "(le/la), déterminant masculin/féminin singulier"],
['j', "(je), pronom personnel sujet, 1ʳᵉ pers., épicène singulier"],
['m', "(me), pronom personnel objet, 1ʳᵉ pers., épicène singulier"],
|
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
|
class Lexicographe {
constructor (oDict, oTokenizer, oLocGraph) {
this.oDict = oDict;
this.oTokenizer = oTokenizer;
this.oLocGraph = JSON.parse(oLocGraph);
this._zElidedPrefix = new RegExp("^([dljmtsncç]|quoiqu|lorsqu|jusqu|puisqu|qu)['’](.+)", "i");
this._zCompoundWord = new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)-((?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts][’'](?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous)$", "i");
this._zTag = new RegExp("[:;/][a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ*Ṽ][^:;/]*", "g");
this._zLocTag = new RegExp("((:L)([A-Z])?)([a-z].?)?(:.*)?");
}
getInfoForToken (oToken) {
// Token: .sType, .sValue, .nStart, .nEnd
// return a object {sType, sValue, aLabel}
let m = null;
try {
|
<
|
<
|
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
|
class Lexicographe {
constructor (oDict, oTokenizer, oLocGraph) {
this.oDict = oDict;
this.oTokenizer = oTokenizer;
this.oLocGraph = JSON.parse(oLocGraph);
this._zCompoundWord = new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)-((?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts][’'](?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous)$", "i");
this._zTag = new RegExp("[:;/][a-zA-Z0-9ÑÂĴĈŔÔṼŴ!][^:;/]*", "g");
}
getInfoForToken (oToken) {
// Token: .sType, .sValue, .nStart, .nEnd
// return a object {sType, sValue, aLabel}
let m = null;
try {
|
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
|
while ((m = this._zTag.exec(sTags)) !== null) {
sRes += _dTAGS.get(m[0]);
}
if (sRes.startsWith(" verbe") && !sRes.includes("infinitif")) {
sRes += " [" + sTags.slice(1, sTags.indexOf(" ")) + "]";
}
if (!sRes) {
sRes = "#Erreur. Étiquette inconnue : [" + sTags + "]";
return sRes;
}
return sRes.gl_trimRight(",");
}
_formatTagsLoc (sTags) {
let sRes = "";
let oTagsVerb = this._zLocTag.exec(sTags);
sRes += _dLocTAGS.get(oTagsVerb[1]);
if (oTagsVerb[4] && oTagsVerb[1] === ':LV'){
oTagsVerb[4].split(/(?!$)/u).forEach(function(sKey) {
sRes += _dLocVERB.get(sKey);
});
}
if (oTagsVerb[5]){
let m;
while ((m = this._zTag.exec(oTagsVerb[5])) !== null) {
sRes += _dLocTAGS.get(m[0]);
}
}
if (!sRes) {
sRes = "#Erreur. Étiquette inconnue : [" + sTags + "]";
return sRes;
}
return sRes.gl_trimRight(",");
}
_formatSuffix (s) {
if (s.startsWith("t-")) {
return "“t” euphonique +" + _dAD.get(s.slice(2));
|
|
<
>
|
>
|
<
<
>
|
<
|
<
|
<
|
<
|
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
|
while ((m = this._zTag.exec(sTags)) !== null) {
sRes += _dTAGS.get(m[0]);
}
if (sRes.startsWith(" verbe") && !sRes.includes("infinitif")) {
sRes += " [" + sTags.slice(1, sTags.indexOf(" ")) + "]";
}
if (!sRes) {
return "#Erreur. Étiquette inconnue : [" + sTags + "]";
}
return sRes.gl_trimRight(",");
}
_formatTagsLoc (sTags) {
let sRes = "";
let m;
while ((m = this._zTag.exec(sTags)) !== null) {
if (m[0].startsWith(":LV")) {
sRes += _dLocTAGS.get(":LV");
for (let c of m[0].slice(3)) {
sRes += _dLocVERB.get(c);
}
} else {
sRes += _dLocTAGS.get(m[0]);
}
}
if (!sRes) {
return "#Erreur. Étiquette inconnue : [" + sTags + "]";
}
return sRes.gl_trimRight(",");
}
_formatSuffix (s) {
if (s.startsWith("t-")) {
return "“t” euphonique +" + _dAD.get(s.slice(2));
|