914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
|
rewriteFromTags (bDebug=false) {
// rewrite the sentence, modify tokens, purge the token list
if (bDebug) {
console.log("REWRITE");
}
let lNewToken = [];
let nMergeUntil = 0;
let dTokenMerger = null;
for (let [iToken, dToken] of this.lToken.entries()) {
let bKeepToken = true;
if (dToken["sType"] != "INFO") {
if (nMergeUntil && iToken <= nMergeUntil) {
dTokenMerger["sValue"] += " ".repeat(dToken["nStart"] - dTokenMerger["nEnd"]) + dToken["sValue"];
dTokenMerger["nEnd"] = dToken["nEnd"];
if (bDebug) {
console.log(" MERGED TOKEN: " + dTokenMerger["sValue"]);
}
bKeepToken = false;
}
if (dToken.hasOwnProperty("nMergeUntil")) {
if (iToken > nMergeUntil) { // this token is not already merged with a previous token
dTokenMerger = dToken;
}
if (dToken["nMergeUntil"] > nMergeUntil) {
nMergeUntil = dToken["nMergeUntil"];
}
delete dToken["nMergeUntil"];
}
else if (dToken.hasOwnProperty("bToRemove")) {
if (bDebug) {
console.log(" REMOVED: " + dToken["sValue"]);
}
this.sSentence = this.sSentence.slice(0, dToken["nStart"]) + " ".repeat(dToken["nEnd"] - dToken["nStart"]) + this.sSentence.slice(dToken["nEnd"]);
bKeepToken = false;
}
}
//
if (bKeepToken) {
lNewToken.push(dToken);
if (dToken.hasOwnProperty("sNewValue")) {
// rewrite token and sentence
if (bDebug) {
console.log(dToken["sValue"] + " -> " + dToken["sNewValue"]);
}
dToken["sRealValue"] = dToken["sValue"];
dToken["sValue"] = dToken["sNewValue"];
let nDiffLen = dToken["sRealValue"].length - dToken["sNewValue"].length;
let sNewRepl = (nDiffLen >= 0) ? dToken["sNewValue"] + " ".repeat(nDiffLen) : dToken["sNewValue"].slice(0, dToken["sRealValue"].length);
this.sSentence = this.sSentence.slice(0,dToken["nStart"]) + sNewRepl + this.sSentence.slice(dToken["nEnd"]);
delete dToken["sNewValue"];
}
}
else {
try {
this.dTokenPos.delete(dToken["nStart"]);
}
catch (e) {
console.log(this.asString());
console.log(dToken);
}
}
}
if (bDebug) {
console.log(" TEXT REWRITED: " + this.sSentence);
}
this.lToken.length = 0;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
|
rewriteFromTags (bDebug=false) {
// rewrite the sentence, modify tokens, purge the token list
if (bDebug) {
console.log("REWRITE");
}
let lNewToken = [];
let nMergeUntil = 0;
let oMergingToken = null;
for (let [iToken, oToken] of this.lToken.entries()) {
let bKeepToken = true;
if (oToken["sType"] != "INFO") {
if (nMergeUntil && iToken <= nMergeUntil) {
oMergingToken["sValue"] += " ".repeat(oToken["nStart"] - oMergingToken["nEnd"]) + oToken["sValue"];
oMergingToken["nEnd"] = oToken["nEnd"];
if (bDebug) {
console.log(" MERGED TOKEN: " + oMergingToken["sValue"]);
}
bKeepToken = false;
}
if (oToken.hasOwnProperty("nMergeUntil")) {
if (iToken > nMergeUntil) { // this token is not already merged with a previous token
oMergingToken = oToken;
}
if (oToken["nMergeUntil"] > nMergeUntil) {
nMergeUntil = oToken["nMergeUntil"];
}
delete oToken["nMergeUntil"];
}
else if (oToken.hasOwnProperty("bToRemove")) {
if (bDebug) {
console.log(" REMOVED: " + oToken["sValue"]);
}
this.sSentence = this.sSentence.slice(0, oToken["nStart"]) + " ".repeat(oToken["nEnd"] - oToken["nStart"]) + this.sSentence.slice(oToken["nEnd"]);
bKeepToken = false;
}
}
//
if (bKeepToken) {
lNewToken.push(oToken);
if (oToken.hasOwnProperty("sNewValue")) {
// rewrite token and sentence
if (bDebug) {
console.log(oToken["sValue"] + " -> " + oToken["sNewValue"]);
}
oToken["sRealValue"] = oToken["sValue"];
oToken["sValue"] = oToken["sNewValue"];
let nDiffLen = oToken["sRealValue"].length - oToken["sNewValue"].length;
let sNewRepl = (nDiffLen >= 0) ? oToken["sNewValue"] + " ".repeat(nDiffLen) : oToken["sNewValue"].slice(0, oToken["sRealValue"].length);
this.sSentence = this.sSentence.slice(0,oToken["nStart"]) + sNewRepl + this.sSentence.slice(oToken["nEnd"]);
delete oToken["sNewValue"];
}
}
else {
try {
this.dTokenPos.delete(oToken["nStart"]);
}
catch (e) {
console.log(this.asString());
console.log(oToken);
}
}
}
if (bDebug) {
console.log(" TEXT REWRITED: " + this.sSentence);
}
this.lToken.length = 0;
|
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
|
// search sPattern
return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1));
}
//// Analyse tokens for graph rules
function g_value (dToken, sValues, nLeft=null, nRight=null) {
// test if <dToken['sValue']> is in sValues (each value should be separated with |)
let sValue = (nLeft === null) ? "|"+dToken["sValue"]+"|" : "|"+dToken["sValue"].slice(nLeft, nRight)+"|";
if (sValues.includes(sValue)) {
return true;
}
if (dToken["sValue"].slice(0,2).gl_isTitle()) { // we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout".
if (sValues.includes(sValue.toLowerCase())) {
return true;
}
}
else if (dToken["sValue"].gl_isUpperCase()) {
//if sValue.lower() in sValues:
// return true;
sValue = "|"+sValue.slice(1).gl_toCapitalize();
if (sValues.includes(sValue)) {
return true;
}
sValue = sValue.toLowerCase();
if (sValues.includes(sValue)) {
return true;
}
}
return false;
}
function g_morph (dToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) {
// analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies
let lMorph;
if (dToken.hasOwnProperty("lMorph")) {
lMorph = dToken["lMorph"];
}
else {
if (nLeft !== null) {
let sValue = (nRight !== null) ? dToken["sValue"].slice(nLeft, nRight) : dToken["sValue"].slice(nLeft);
lMorph = _oSpellChecker.getMorph(sValue);
if (bMemorizeMorph) {
dToken["lMorph"] = lMorph;
}
} else {
lMorph = _oSpellChecker.getMorph(dToken["sValue"]);
}
}
if (lMorph.length == 0) {
return false;
}
// check negative condition
if (sNegPattern) {
|
|
|
|
|
|
|
|
|
|
|
|
|
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
|
// search sPattern
return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1));
}
//// Analyse tokens for graph rules
function g_value (oToken, sValues, nLeft=null, nRight=null) {
// test if <oToken['sValue']> is in sValues (each value should be separated with |)
let sValue = (nLeft === null) ? "|"+oToken["sValue"]+"|" : "|"+oToken["sValue"].slice(nLeft, nRight)+"|";
if (sValues.includes(sValue)) {
return true;
}
if (oToken["sValue"].slice(0,2).gl_isTitle()) { // we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout".
if (sValues.includes(sValue.toLowerCase())) {
return true;
}
}
else if (oToken["sValue"].gl_isUpperCase()) {
//if sValue.lower() in sValues:
// return true;
sValue = "|"+sValue.slice(1).gl_toCapitalize();
if (sValues.includes(sValue)) {
return true;
}
sValue = sValue.toLowerCase();
if (sValues.includes(sValue)) {
return true;
}
}
return false;
}
function g_morph (oToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) {
// analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies
let lMorph;
if (oToken.hasOwnProperty("lMorph")) {
lMorph = oToken["lMorph"];
}
else {
if (nLeft !== null) {
let sValue = (nRight !== null) ? oToken["sValue"].slice(nLeft, nRight) : oToken["sValue"].slice(nLeft);
lMorph = _oSpellChecker.getMorph(sValue);
if (bMemorizeMorph) {
oToken["lMorph"] = lMorph;
}
} else {
lMorph = _oSpellChecker.getMorph(oToken["sValue"]);
}
}
if (lMorph.length == 0) {
return false;
}
// check negative condition
if (sNegPattern) {
|
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
|
}
}
}
// search sPattern
return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1));
}
function g_analyse (dToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) {
// analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies
let lMorph;
if (nLeft !== null) {
let sValue = (nRight !== null) ? dToken["sValue"].slice(nLeft, nRight) : dToken["sValue"].slice(nLeft);
lMorph = _oSpellChecker.getMorph(sValue);
if (bMemorizeMorph) {
dToken["lMorph"] = lMorph;
}
} else {
lMorph = _oSpellChecker.getMorph(dToken["sValue"]);
}
if (lMorph.length == 0) {
return false;
}
// check negative condition
if (sNegPattern) {
if (sNegPattern == "*") {
// all morph must match sPattern
return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1));
}
else {
if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) {
return false;
}
}
}
// search sPattern
return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1));
}
function g_merged_analyse (dToken1, dToken2, cMerger, sPattern, sNegPattern="", bSetMorph=true) {
// merge two token values, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies (disambiguation off)
let lMorph = _oSpellChecker.getMorph(dToken1["sValue"] + cMerger + dToken2["sValue"]);
if (lMorph.length == 0) {
return false;
}
// check negative condition
if (sNegPattern) {
if (sNegPattern == "*") {
// all morph must match sPattern
let bResult = lMorph.every(sMorph => (sMorph.search(sPattern) !== -1));
if (bResult && bSetMorph) {
dToken1["lMorph"] = lMorph;
}
return bResult;
}
else {
if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) {
return false;
}
}
}
// search sPattern
let bResult = lMorph.some(sMorph => (sMorph.search(sPattern) !== -1));
if (bResult && bSetMorph) {
dToken1["lMorph"] = lMorph;
}
return bResult;
}
function g_tag_before (dToken, dTags, sTag) {
if (!dTags.has(sTag)) {
return false;
}
if (dToken["i"] > dTags.get(sTag)[0]) {
return true;
}
return false;
}
function g_tag_after (dToken, dTags, sTag) {
if (!dTags.has(sTag)) {
return false;
}
if (dToken["i"] < dTags.get(sTag)[1]) {
return true;
}
return false;
}
function g_tag (dToken, sTag) {
return dToken.hasOwnProperty("aTags") && dToken["aTags"].has(sTag);
}
function g_space_between_tokens (dToken1, dToken2, nMin, nMax=null) {
let nSpace = dToken2["nStart"] - dToken1["nEnd"]
if (nSpace < nMin) {
return false;
}
if (nMax !== null && nSpace > nMax) {
return false;
}
return true;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
|
}
}
}
// search sPattern
return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1));
}
function g_analyse (oToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) {
// analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies
let lMorph;
if (nLeft !== null) {
let sValue = (nRight !== null) ? oToken["sValue"].slice(nLeft, nRight) : oToken["sValue"].slice(nLeft);
lMorph = _oSpellChecker.getMorph(sValue);
if (bMemorizeMorph) {
oToken["lMorph"] = lMorph;
}
} else {
lMorph = _oSpellChecker.getMorph(oToken["sValue"]);
}
if (lMorph.length == 0) {
return false;
}
// check negative condition
if (sNegPattern) {
if (sNegPattern == "*") {
// all morph must match sPattern
return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1));
}
else {
if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) {
return false;
}
}
}
// search sPattern
return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1));
}
function g_merged_analyse (oToken1, oToken2, cMerger, sPattern, sNegPattern="", bSetMorph=true) {
// merge two token values, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies (disambiguation off)
let lMorph = _oSpellChecker.getMorph(oToken1["sValue"] + cMerger + oToken2["sValue"]);
if (lMorph.length == 0) {
return false;
}
// check negative condition
if (sNegPattern) {
if (sNegPattern == "*") {
// all morph must match sPattern
let bResult = lMorph.every(sMorph => (sMorph.search(sPattern) !== -1));
if (bResult && bSetMorph) {
oToken1["lMorph"] = lMorph;
}
return bResult;
}
else {
if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) {
return false;
}
}
}
// search sPattern
let bResult = lMorph.some(sMorph => (sMorph.search(sPattern) !== -1));
if (bResult && bSetMorph) {
oToken1["lMorph"] = lMorph;
}
return bResult;
}
function g_tag_before (oToken, dTags, sTag) {
if (!dTags.has(sTag)) {
return false;
}
if (oToken["i"] > dTags.get(sTag)[0]) {
return true;
}
return false;
}
function g_tag_after (oToken, dTags, sTag) {
if (!dTags.has(sTag)) {
return false;
}
if (oToken["i"] < dTags.get(sTag)[1]) {
return true;
}
return false;
}
function g_tag (oToken, sTag) {
return oToken.hasOwnProperty("aTags") && oToken["aTags"].has(sTag);
}
function g_space_between_tokens (oToken1, oToken2, nMin, nMax=null) {
let nSpace = oToken2["nStart"] - oToken1["nEnd"]
if (nSpace < nMin) {
return false;
}
if (nMax !== null && nSpace > nMax) {
return false;
}
return true;
|
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
|
dTokenPos.get(nPos)["lMorph"] = lMorph;
return true;
}
//// Disambiguation for graph rules
function g_select (dToken, sPattern, lDefault=null) {
// select morphologies for <dToken> according to <sPattern>, always return true
let lMorph = (dToken.hasOwnProperty("lMorph")) ? dToken["lMorph"] : _oSpellChecker.getMorph(dToken["sValue"]);
if (lMorph.length === 0 || lMorph.length === 1) {
if (lDefault) {
dToken["lMorph"] = lDefault;
}
return true;
}
let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) !== -1 );
if (lSelect.length > 0) {
if (lSelect.length != lMorph.length) {
dToken["lMorph"] = lSelect;
}
} else if (lDefault) {
dToken["lMorph"] = lDefault;
}
return true;
}
function g_exclude (dToken, sPattern, lDefault=null) {
// select morphologies for <dToken> according to <sPattern>, always return true
let lMorph = (dToken.hasOwnProperty("lMorph")) ? dToken["lMorph"] : _oSpellChecker.getMorph(dToken["sValue"]);
if (lMorph.length === 0 || lMorph.length === 1) {
if (lDefault) {
dToken["lMorph"] = lDefault;
}
return true;
}
let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) === -1 );
if (lSelect.length > 0) {
if (lSelect.length != lMorph.length) {
dToken["lMorph"] = lSelect;
}
} else if (lDefault) {
dToken["lMorph"] = lDefault;
}
return true;
}
function g_define (dToken, lMorph) {
// set morphologies of <dToken>, always return true
dToken["lMorph"] = lMorph;
return true;
}
function g_define_from (dToken, nLeft=null, nRight=null) {
let sValue = dToken["sValue"];
if (nLeft !== null) {
sValue = (nRight !== null) ? sValue.slice(nLeft, nRight) : sValue.slice(nLeft);
}
dToken["lMorph"] = _oSpellChecker.getMorph(sValue);
return true;
}
//////// GRAMMAR CHECKER PLUGINS
${pluginsJS}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
|
dTokenPos.get(nPos)["lMorph"] = lMorph;
return true;
}
//// Disambiguation for graph rules
function g_select (oToken, sPattern, lDefault=null) {
// select morphologies for <oToken> according to <sPattern>, always return true
let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]);
if (lMorph.length === 0 || lMorph.length === 1) {
if (lDefault) {
oToken["lMorph"] = lDefault;
}
return true;
}
let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) !== -1 );
if (lSelect.length > 0) {
if (lSelect.length != lMorph.length) {
oToken["lMorph"] = lSelect;
}
} else if (lDefault) {
oToken["lMorph"] = lDefault;
}
return true;
}
function g_exclude (oToken, sPattern, lDefault=null) {
// select morphologies for <oToken> according to <sPattern>, always return true
let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]);
if (lMorph.length === 0 || lMorph.length === 1) {
if (lDefault) {
oToken["lMorph"] = lDefault;
}
return true;
}
let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) === -1 );
if (lSelect.length > 0) {
if (lSelect.length != lMorph.length) {
oToken["lMorph"] = lSelect;
}
} else if (lDefault) {
oToken["lMorph"] = lDefault;
}
return true;
}
function g_define (oToken, lMorph) {
// set morphologies of <oToken>, always return true
oToken["lMorph"] = lMorph;
return true;
}
function g_define_from (oToken, nLeft=null, nRight=null) {
let sValue = oToken["sValue"];
if (nLeft !== null) {
sValue = (nRight !== null) ? sValue.slice(nLeft, nRight) : sValue.slice(nLeft);
}
oToken["lMorph"] = _oSpellChecker.getMorph(sValue);
return true;
}
//////// GRAMMAR CHECKER PLUGINS
${pluginsJS}
|