372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
|
}
}
update (sSentence, bDebug=false) {
// update <sSentence> and retokenize
this.sSentence = sSentence;
let lNewToken = Array.from(_oTokenizer.genTokens(sSentence, true));
for (let dToken of lNewToken) {
if (this.dTokenPos.gl_get(dToken["nStart"], {}).hasOwnProperty("lMorph")) {
dToken["lMorph"] = this.dTokenPos.get(dToken["nStart"])["lMorph"];
}
if (this.dTokenPos.gl_get(dToken["nStart"], {}).hasOwnProperty("aTags")) {
dToken["aTags"] = this.dTokenPos.get(dToken["nStart"])["aTags"];
}
}
this.lToken = lNewToken;
this.dTokenPos.clear();
for (let dToken of this.lToken) {
if (dToken["sType"] != "INFO") {
this.dTokenPos.set(dToken["nStart"], dToken);
}
}
if (bDebug) {
console.log("UPDATE:");
console.log(this.asString());
}
}
* _getNextPointers (dToken, dGraph, dPointer, bDebug=false) {
// generator: return nodes where <dToken> “values” match <dNode> arcs
try {
let dNode = dPointer["dNode"];
let iNode1 = dPointer["iNode1"];
let bTokenFound = false;
// token value
if (dNode.hasOwnProperty(dToken["sValue"])) {
if (bDebug) {
console.log(" MATCH: " + dToken["sValue"]);
}
yield { "iNode1": iNode1, "dNode": dGraph[dNode[dToken["sValue"]]] };
bTokenFound = true;
}
if (dToken["sValue"].slice(0,2).gl_isTitle()) { // we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout".
let sValue = dToken["sValue"].toLowerCase();
if (dNode.hasOwnProperty(sValue)) {
if (bDebug) {
console.log(" MATCH: " + sValue);
}
yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] };
bTokenFound = true;
}
}
else if (dToken["sValue"].gl_isUpperCase()) {
let sValue = dToken["sValue"].toLowerCase();
if (dNode.hasOwnProperty(sValue)) {
if (bDebug) {
console.log(" MATCH: " + sValue);
}
yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] };
bTokenFound = true;
}
sValue = dToken["sValue"].gl_toCapitalize();
if (dNode.hasOwnProperty(sValue)) {
if (bDebug) {
console.log(" MATCH: " + sValue);
}
yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] };
bTokenFound = true;
}
}
// regex value arcs
if (dToken["sType"] != "INFO" && dToken["sType"] != "PUNC" && dToken["sType"] != "SIGN") {
if (dNode.hasOwnProperty("<re_value>")) {
for (let sRegex in dNode["<re_value>"]) {
if (!sRegex.includes("¬")) {
// no anti-pattern
if (dToken["sValue"].search(sRegex) !== -1) {
if (bDebug) {
console.log(" MATCH: ~" + sRegex);
}
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_value>"][sRegex]] };
bTokenFound = true;
}
} else {
// there is an anti-pattern
let [sPattern, sNegPattern] = sRegex.split("¬", 2);
if (sNegPattern && dToken["sValue"].search(sNegPattern) !== -1) {
continue;
}
if (!sPattern || dToken["sValue"].search(sPattern) !== -1) {
if (bDebug) {
console.log(" MATCH: ~" + sRegex);
}
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_value>"][sRegex]] };
bTokenFound = true;
}
}
}
}
}
// analysable tokens
if (dToken["sType"].slice(0,4) == "WORD") {
// token lemmas
if (dNode.hasOwnProperty("<lemmas>")) {
for (let sLemma of _oSpellChecker.getLemma(dToken["sValue"])) {
if (dNode["<lemmas>"].hasOwnProperty(sLemma)) {
if (bDebug) {
console.log(" MATCH: >" + sLemma);
}
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<lemmas>"][sLemma]] };
bTokenFound = true;
}
}
}
// regex morph arcs
if (dNode.hasOwnProperty("<re_morph>")) {
let lMorph = (dToken.hasOwnProperty("lMorph")) ? dToken["lMorph"] : _oSpellChecker.getMorph(dToken["sValue"]);
for (let sRegex in dNode["<re_morph>"]) {
if (!sRegex.includes("¬")) {
// no anti-pattern
if (lMorph.some(sMorph => (sMorph.search(sRegex) !== -1))) {
if (bDebug) {
console.log(" MATCH: @" + sRegex);
}
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_morph>"][sRegex]] };
bTokenFound = true;
}
} else {
// there is an anti-pattern
let [sPattern, sNegPattern] = sRegex.split("¬", 2);
if (sNegPattern == "*") {
// all morphologies must match with <sPattern>
if (sPattern) {
if (lMorph.length > 0 && lMorph.every(sMorph => (sMorph.search(sPattern) !== -1))) {
if (bDebug) {
console.log(" MATCH: @" + sRegex);
}
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_morph>"][sRegex]] };
bTokenFound = true;
}
}
} else {
if (sNegPattern && lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) {
continue;
}
if (!sPattern || lMorph.some(sMorph => (sMorph.search(sPattern) !== -1))) {
if (bDebug) {
console.log(" MATCH: @" + sRegex);
}
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_morph>"][sRegex]] };
bTokenFound = true;
}
}
}
}
}
}
// token tags
if (dToken.hasOwnProperty("aTags") && dNode.hasOwnProperty("<tags>")) {
for (let sTag of dToken["aTags"]) {
if (dNode["<tags>"].hasOwnProperty(sTag)) {
if (bDebug) {
console.log(" MATCH: /" + sTag);
}
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<tags>"][sTag]] };
bTokenFound = true;
}
}
}
// meta arc (for token type)
if (dNode.hasOwnProperty("<meta>")) {
for (let sMeta in dNode["<meta>"]) {
// no regex here, we just search if <dNode["sType"]> exists within <sMeta>
if (sMeta == "*" || dToken["sType"] == sMeta) {
if (bDebug) {
console.log(" MATCH: *" + sMeta);
}
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<meta>"][sMeta]] };
bTokenFound = true;
}
else if (sMeta.includes("¬")) {
if (!sMeta.includes(dToken["sType"])) {
if (bDebug) {
console.log(" MATCH: *" + sMeta);
}
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<meta>"][sMeta]] };
bTokenFound = true;
}
}
}
}
if (!bTokenFound && dPointer.hasOwnProperty("bKeep")) {
yield dPointer;
}
// JUMP
// Warning! Recurssion!
if (dNode.hasOwnProperty("<>")) {
let dPointer2 = { "iNode1": iNode1, "dNode": dGraph[dNode["<>"]], "bKeep": true };
yield* this._getNextPointers(dToken, dGraph, dPointer2, bDebug);
}
}
catch (e) {
console.error(e);
}
}
parseGraph (dGraph, sCountry="${country_default}", dOptions=null, bShowRuleId=false, bDebug=false, bContext=false) {
// parse graph with tokens from the text and execute actions encountered
let lPointer = [];
let bTagAndRewrite = false;
try {
for (let [iToken, dToken] of this.lToken.entries()) {
if (bDebug) {
console.log("TOKEN: " + dToken["sValue"]);
}
// check arcs for each existing pointer
let lNextPointer = [];
for (let dPointer of lPointer) {
lNextPointer.push(...this._getNextPointers(dToken, dGraph, dPointer, bDebug));
}
lPointer = lNextPointer;
// check arcs of first nodes
lPointer.push(...this._getNextPointers(dToken, dGraph, { "iNode1": iToken, "dNode": dGraph[0] }, bDebug));
// check if there is rules to check for each pointer
for (let dPointer of lPointer) {
if (dPointer["dNode"].hasOwnProperty("<rules>")) {
let bChange = this._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iNode1"]-1, iToken, dOptions, sCountry, bShowRuleId, bDebug, bContext);
if (bChange) {
bTagAndRewrite = true;
}
}
}
}
} catch (e) {
console.error(e);
}
if (bTagAndRewrite) {
this.rewriteFromTags(bDebug);
}
if (bDebug) {
console.log(this.asString());
}
return this.sSentence;
}
_executeActions (dGraph, dNode, nTokenOffset, nLastToken, dOptions, sCountry, bShowRuleId, bDebug, bContext) {
// execute actions found in the DARG
let bChange = false;
for (let [sLineId, nextNodeKey] of Object.entries(dNode)) {
let bCondMemo = null;
for (let sRuleId of dGraph[nextNodeKey]) {
try {
if (bDebug) {
console.log(" >TRY: " + sRuleId + " " + sLineId);
}
let [sOption, sFuncCond, cActionType, sWhat, ...eAct] = gc_rules_graph.dRule[sRuleId];
// Suggestion [ option, condition, "-", replacement/suggestion/action, iTokenStart, iTokenEnd, cStartLimit, cEndLimit, bCaseSvty, nPriority, sMessage, sURL ]
// TextProcessor [ option, condition, "~", replacement/suggestion/action, iTokenStart, iTokenEnd, bCaseSvty ]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
|
}
}
update (sSentence, bDebug=false) {
// update <sSentence> and retokenize
this.sSentence = sSentence;
let lNewToken = Array.from(_oTokenizer.genTokens(sSentence, true));
for (let oToken of lNewToken) {
if (this.dTokenPos.gl_get(oToken["nStart"], {}).hasOwnProperty("lMorph")) {
oToken["lMorph"] = this.dTokenPos.get(oToken["nStart"])["lMorph"];
}
if (this.dTokenPos.gl_get(oToken["nStart"], {}).hasOwnProperty("aTags")) {
oToken["aTags"] = this.dTokenPos.get(oToken["nStart"])["aTags"];
}
}
this.lToken = lNewToken;
this.dTokenPos.clear();
for (let oToken of this.lToken) {
if (oToken["sType"] != "INFO") {
this.dTokenPos.set(oToken["nStart"], oToken);
}
}
if (bDebug) {
console.log("UPDATE:");
console.log(this.asString());
}
}
* _getNextPointers (oToken, oGraph, oPointer, bDebug=false) {
// generator: return nodes where <oToken> “values” match <oNode> arcs
try {
let oNode = oGraph[oPointer["iNode"]];
let iToken1 = oPointer["iToken1"];
let bTokenFound = false;
// token value
if (oNode.hasOwnProperty(oToken["sValue"])) {
if (bDebug) {
console.log(" MATCH: " + oToken["sValue"]);
}
yield { "iToken1": iToken1, "iNode": oNode[oToken["sValue"]] };
bTokenFound = true;
}
if (oToken["sValue"].slice(0,2).gl_isTitle()) { // we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout".
let sValue = oToken["sValue"].toLowerCase();
if (oNode.hasOwnProperty(sValue)) {
if (bDebug) {
console.log(" MATCH: " + sValue);
}
yield { "iToken1": iToken1, "iNode": oNode[sValue] };
bTokenFound = true;
}
}
else if (oToken["sValue"].gl_isUpperCase()) {
let sValue = oToken["sValue"].toLowerCase();
if (oNode.hasOwnProperty(sValue)) {
if (bDebug) {
console.log(" MATCH: " + sValue);
}
yield { "iToken1": iToken1, "iNode": oNode[sValue] };
bTokenFound = true;
}
sValue = oToken["sValue"].gl_toCapitalize();
if (oNode.hasOwnProperty(sValue)) {
if (bDebug) {
console.log(" MATCH: " + sValue);
}
yield { "iToken1": iToken1, "iNode": oNode[sValue] };
bTokenFound = true;
}
}
// regex value arcs
if (oToken["sType"] != "INFO" && oToken["sType"] != "PUNC" && oToken["sType"] != "SIGN") {
if (oNode.hasOwnProperty("<re_value>")) {
for (let sRegex in oNode["<re_value>"]) {
if (!sRegex.includes("¬")) {
// no anti-pattern
if (oToken["sValue"].search(sRegex) !== -1) {
if (bDebug) {
console.log(" MATCH: ~" + sRegex);
}
yield { "iToken1": iToken1, "iNode": oNode["<re_value>"][sRegex] };
bTokenFound = true;
}
} else {
// there is an anti-pattern
let [sPattern, sNegPattern] = sRegex.split("¬", 2);
if (sNegPattern && oToken["sValue"].search(sNegPattern) !== -1) {
continue;
}
if (!sPattern || oToken["sValue"].search(sPattern) !== -1) {
if (bDebug) {
console.log(" MATCH: ~" + sRegex);
}
yield { "iToken1": iToken1, "iNode": oNode["<re_value>"][sRegex] };
bTokenFound = true;
}
}
}
}
}
// analysable tokens
if (oToken["sType"].slice(0,4) == "WORD") {
// token lemmas
if (oNode.hasOwnProperty("<lemmas>")) {
for (let sLemma of _oSpellChecker.getLemma(oToken["sValue"])) {
if (oNode["<lemmas>"].hasOwnProperty(sLemma)) {
if (bDebug) {
console.log(" MATCH: >" + sLemma);
}
yield { "iToken1": iToken1, "iNode": oNode["<lemmas>"][sLemma] };
bTokenFound = true;
}
}
}
// morph arcs
if (oNode.hasOwnProperty("<morph>")) {
let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]);
if (lMorph.length > 0) {
for (let sSearch in oNode["<morph>"]) {
if (!sSearch.includes("¬")) {
// no anti-pattern
if (lMorph.some(sMorph => (sMorph.includes(sSearch)))) {
if (bDebug) {
console.log(" MATCH: $" + sSearch);
}
yield { "iToken1": iToken1, "iNode": oNode["<morph>"][sSearch] };
bTokenFound = true;
}
} else {
// there is an anti-pattern
let [sPattern, sNegPattern] = sSearch.split("¬", 2);
if (sNegPattern == "*") {
// all morphologies must match with <sPattern>
if (sPattern) {
if (lMorph.every(sMorph => (sMorph.includes(sPattern)))) {
if (bDebug) {
console.log(" MATCH: $" + sSearch);
}
yield { "iToken1": iToken1, "iNode": oNode["<morph>"][sSearch] };
bTokenFound = true;
}
}
} else {
if (sNegPattern && lMorph.some(sMorph => (sMorph.includes(sNegPattern)))) {
continue;
}
if (!sPattern || lMorph.some(sMorph => (sMorph.includes(sPattern)))) {
if (bDebug) {
console.log(" MATCH: $" + sSearch);
}
yield { "iToken1": iToken1, "iNode": oNode["<morph>"][sSearch] };
bTokenFound = true;
}
}
}
}
}
}
// regex morph arcs
if (oNode.hasOwnProperty("<re_morph>")) {
let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]);
if (lMorph.length > 0) {
for (let sRegex in oNode["<re_morph>"]) {
if (!sRegex.includes("¬")) {
// no anti-pattern
if (lMorph.some(sMorph => (sMorph.search(sRegex) !== -1))) {
if (bDebug) {
console.log(" MATCH: @" + sRegex);
}
yield { "iToken1": iToken1, "iNode": oNode["<re_morph>"][sRegex] };
bTokenFound = true;
}
} else {
// there is an anti-pattern
let [sPattern, sNegPattern] = sRegex.split("¬", 2);
if (sNegPattern == "*") {
// all morphologies must match with <sPattern>
if (sPattern) {
if (lMorph.every(sMorph => (sMorph.search(sPattern) !== -1))) {
if (bDebug) {
console.log(" MATCH: @" + sRegex);
}
yield { "iToken1": iToken1, "iNode": oNode["<re_morph>"][sRegex] };
bTokenFound = true;
}
}
} else {
if (sNegPattern && lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) {
continue;
}
if (!sPattern || lMorph.some(sMorph => (sMorph.search(sPattern) !== -1))) {
if (bDebug) {
console.log(" MATCH: @" + sRegex);
}
yield { "iToken1": iToken1, "iNode": oNode["<re_morph>"][sRegex] };
bTokenFound = true;
}
}
}
}
}
}
}
// token tags
if (oToken.hasOwnProperty("aTags") && oNode.hasOwnProperty("<tags>")) {
for (let sTag of oToken["aTags"]) {
if (oNode["<tags>"].hasOwnProperty(sTag)) {
if (bDebug) {
console.log(" MATCH: /" + sTag);
}
yield { "iToken1": iToken1, "iNode": oNode["<tags>"][sTag] };
bTokenFound = true;
}
}
}
// meta arc (for token type)
if (oNode.hasOwnProperty("<meta>")) {
for (let sMeta in oNode["<meta>"]) {
// no regex here, we just search if <oNode["sType"]> exists within <sMeta>
if (sMeta == "*" || oToken["sType"] == sMeta) {
if (bDebug) {
console.log(" MATCH: *" + sMeta);
}
yield { "iToken1": iToken1, "iNode": oNode["<meta>"][sMeta] };
bTokenFound = true;
}
else if (sMeta.includes("¬")) {
if (!sMeta.includes(oToken["sType"])) {
if (bDebug) {
console.log(" MATCH: *" + sMeta);
}
yield { "iToken1": iToken1, "iNode": oNode["<meta>"][sMeta] };
bTokenFound = true;
}
}
}
}
if (!bTokenFound && oPointer.hasOwnProperty("bKeep")) {
yield oPointer;
}
// JUMP
// Warning! Recurssion!
if (oNode.hasOwnProperty("<>")) {
let oPointer2 = { "iToken1": iToken1, "iNode": oNode["<>"], "bKeep": true };
yield* this._getNextPointers(oToken, oGraph, oPointer2, bDebug);
}
}
catch (e) {
console.error(e);
}
}
parseGraph (oGraph, sCountry="${country_default}", dOptions=null, bShowRuleId=false, bDebug=false, bContext=false) {
// parse graph with tokens from the text and execute actions encountered
let lPointer = [];
let bTagAndRewrite = false;
try {
for (let [iToken, oToken] of this.lToken.entries()) {
if (bDebug) {
console.log("TOKEN: " + oToken["sValue"]);
}
// check arcs for each existing pointer
let lNextPointer = [];
for (let oPointer of lPointer) {
lNextPointer.push(...this._getNextPointers(oToken, oGraph, oPointer, bDebug));
}
lPointer = lNextPointer;
// check arcs of first nodes
lPointer.push(...this._getNextPointers(oToken, oGraph, { "iToken1": iToken, "iNode": 0 }, bDebug));
// check if there is rules to check for each pointer
for (let oPointer of lPointer) {
if (oGraph[oPointer["iNode"]].hasOwnProperty("<rules>")) {
let bChange = this._executeActions(oGraph, oGraph[oPointer["iNode"]]["<rules>"], oPointer["iToken1"]-1, iToken, dOptions, sCountry, bShowRuleId, bDebug, bContext);
if (bChange) {
bTagAndRewrite = true;
}
}
}
}
} catch (e) {
console.error(e);
}
if (bTagAndRewrite) {
this.rewriteFromTags(bDebug);
}
if (bDebug) {
console.log(this.asString());
}
return this.sSentence;
}
_executeActions (oGraph, oNode, nTokenOffset, nLastToken, dOptions, sCountry, bShowRuleId, bDebug, bContext) {
// execute actions found in the DARG
let bChange = false;
for (let [sLineId, nextNodeKey] of Object.entries(oNode)) {
let bCondMemo = null;
for (let sRuleId of oGraph[nextNodeKey]) {
try {
if (bDebug) {
console.log(" >TRY: " + sRuleId + " " + sLineId);
}
let [sOption, sFuncCond, cActionType, sWhat, ...eAct] = gc_rules_graph.dRule[sRuleId];
// Suggestion [ option, condition, "-", replacement/suggestion/action, iTokenStart, iTokenEnd, cStartLimit, cEndLimit, bCaseSvty, nPriority, sMessage, sURL ]
// TextProcessor [ option, condition, "~", replacement/suggestion/action, iTokenStart, iTokenEnd, bCaseSvty ]
|
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
|
// search sPattern
return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1));
}
//// Analyse tokens for graph rules
function g_value (dToken, sValues, nLeft=null, nRight=null) {
// test if <dToken['sValue']> is in sValues (each value should be separated with |)
let sValue = (nLeft === null) ? "|"+dToken["sValue"]+"|" : "|"+dToken["sValue"].slice(nLeft, nRight)+"|";
if (sValues.includes(sValue)) {
return true;
}
if (dToken["sValue"].slice(0,2).gl_isTitle()) { // we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout".
if (sValues.includes(sValue.toLowerCase())) {
return true;
}
}
else if (dToken["sValue"].gl_isUpperCase()) {
//if sValue.lower() in sValues:
// return true;
sValue = "|"+sValue.slice(1).gl_toCapitalize();
if (sValues.includes(sValue)) {
return true;
}
sValue = sValue.toLowerCase();
if (sValues.includes(sValue)) {
return true;
}
}
return false;
}
function g_morph (dToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) {
// analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies
let lMorph;
if (dToken.hasOwnProperty("lMorph")) {
lMorph = dToken["lMorph"];
}
else {
if (nLeft !== null) {
let sValue = (nRight !== null) ? dToken["sValue"].slice(nLeft, nRight) : dToken["sValue"].slice(nLeft);
lMorph = _oSpellChecker.getMorph(sValue);
if (bMemorizeMorph) {
dToken["lMorph"] = lMorph;
}
} else {
lMorph = _oSpellChecker.getMorph(dToken["sValue"]);
}
}
if (lMorph.length == 0) {
return false;
}
// check negative condition
if (sNegPattern) {
|
|
|
|
|
|
|
|
|
|
|
|
|
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
|
// search sPattern
return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1));
}
//// Analyse tokens for graph rules
function g_value (oToken, sValues, nLeft=null, nRight=null) {
// test if <oToken['sValue']> is in sValues (each value should be separated with |)
let sValue = (nLeft === null) ? "|"+oToken["sValue"]+"|" : "|"+oToken["sValue"].slice(nLeft, nRight)+"|";
if (sValues.includes(sValue)) {
return true;
}
if (oToken["sValue"].slice(0,2).gl_isTitle()) { // we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout".
if (sValues.includes(sValue.toLowerCase())) {
return true;
}
}
else if (oToken["sValue"].gl_isUpperCase()) {
//if sValue.lower() in sValues:
// return true;
sValue = "|"+sValue.slice(1).gl_toCapitalize();
if (sValues.includes(sValue)) {
return true;
}
sValue = sValue.toLowerCase();
if (sValues.includes(sValue)) {
return true;
}
}
return false;
}
function g_morph (oToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) {
// analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies
let lMorph;
if (oToken.hasOwnProperty("lMorph")) {
lMorph = oToken["lMorph"];
}
else {
if (nLeft !== null) {
let sValue = (nRight !== null) ? oToken["sValue"].slice(nLeft, nRight) : oToken["sValue"].slice(nLeft);
lMorph = _oSpellChecker.getMorph(sValue);
if (bMemorizeMorph) {
oToken["lMorph"] = lMorph;
}
} else {
lMorph = _oSpellChecker.getMorph(oToken["sValue"]);
}
}
if (lMorph.length == 0) {
return false;
}
// check negative condition
if (sNegPattern) {
|
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
|
}
}
}
// search sPattern
return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1));
}
function g_analyse (dToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) {
// analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies
let lMorph;
if (nLeft !== null) {
let sValue = (nRight !== null) ? dToken["sValue"].slice(nLeft, nRight) : dToken["sValue"].slice(nLeft);
lMorph = _oSpellChecker.getMorph(sValue);
if (bMemorizeMorph) {
dToken["lMorph"] = lMorph;
}
} else {
lMorph = _oSpellChecker.getMorph(dToken["sValue"]);
}
if (lMorph.length == 0) {
return false;
}
// check negative condition
if (sNegPattern) {
if (sNegPattern == "*") {
// all morph must match sPattern
return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1));
}
else {
if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) {
return false;
}
}
}
// search sPattern
return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1));
}
function g_merged_analyse (dToken1, dToken2, cMerger, sPattern, sNegPattern="", bSetMorph=true) {
// merge two token values, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies (disambiguation off)
let lMorph = _oSpellChecker.getMorph(dToken1["sValue"] + cMerger + dToken2["sValue"]);
if (lMorph.length == 0) {
return false;
}
// check negative condition
if (sNegPattern) {
if (sNegPattern == "*") {
// all morph must match sPattern
let bResult = lMorph.every(sMorph => (sMorph.search(sPattern) !== -1));
if (bResult && bSetMorph) {
dToken1["lMorph"] = lMorph;
}
return bResult;
}
else {
if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) {
return false;
}
}
}
// search sPattern
let bResult = lMorph.some(sMorph => (sMorph.search(sPattern) !== -1));
if (bResult && bSetMorph) {
dToken1["lMorph"] = lMorph;
}
return bResult;
}
function g_tag_before (dToken, dTags, sTag) {
if (!dTags.has(sTag)) {
return false;
}
if (dToken["i"] > dTags.get(sTag)[0]) {
return true;
}
return false;
}
function g_tag_after (dToken, dTags, sTag) {
if (!dTags.has(sTag)) {
return false;
}
if (dToken["i"] < dTags.get(sTag)[1]) {
return true;
}
return false;
}
function g_tag (dToken, sTag) {
return dToken.hasOwnProperty("aTags") && dToken["aTags"].has(sTag);
}
function g_space_between_tokens (dToken1, dToken2, nMin, nMax=null) {
let nSpace = dToken2["nStart"] - dToken1["nEnd"]
if (nSpace < nMin) {
return false;
}
if (nMax !== null && nSpace > nMax) {
return false;
}
return true;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
|
}
}
}
// search sPattern
return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1));
}
function g_analyse (oToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) {
// analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies
let lMorph;
if (nLeft !== null) {
let sValue = (nRight !== null) ? oToken["sValue"].slice(nLeft, nRight) : oToken["sValue"].slice(nLeft);
lMorph = _oSpellChecker.getMorph(sValue);
if (bMemorizeMorph) {
oToken["lMorph"] = lMorph;
}
} else {
lMorph = _oSpellChecker.getMorph(oToken["sValue"]);
}
if (lMorph.length == 0) {
return false;
}
// check negative condition
if (sNegPattern) {
if (sNegPattern == "*") {
// all morph must match sPattern
return lMorph.every(sMorph => (sMorph.search(sPattern) !== -1));
}
else {
if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) {
return false;
}
}
}
// search sPattern
return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1));
}
function g_merged_analyse (oToken1, oToken2, cMerger, sPattern, sNegPattern="", bSetMorph=true) {
// merge two token values, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies (disambiguation off)
let lMorph = _oSpellChecker.getMorph(oToken1["sValue"] + cMerger + oToken2["sValue"]);
if (lMorph.length == 0) {
return false;
}
// check negative condition
if (sNegPattern) {
if (sNegPattern == "*") {
// all morph must match sPattern
let bResult = lMorph.every(sMorph => (sMorph.search(sPattern) !== -1));
if (bResult && bSetMorph) {
oToken1["lMorph"] = lMorph;
}
return bResult;
}
else {
if (lMorph.some(sMorph => (sMorph.search(sNegPattern) !== -1))) {
return false;
}
}
}
// search sPattern
let bResult = lMorph.some(sMorph => (sMorph.search(sPattern) !== -1));
if (bResult && bSetMorph) {
oToken1["lMorph"] = lMorph;
}
return bResult;
}
function g_tag_before (oToken, dTags, sTag) {
if (!dTags.has(sTag)) {
return false;
}
if (oToken["i"] > dTags.get(sTag)[0]) {
return true;
}
return false;
}
function g_tag_after (oToken, dTags, sTag) {
if (!dTags.has(sTag)) {
return false;
}
if (oToken["i"] < dTags.get(sTag)[1]) {
return true;
}
return false;
}
function g_tag (oToken, sTag) {
return oToken.hasOwnProperty("aTags") && oToken["aTags"].has(sTag);
}
function g_space_between_tokens (oToken1, oToken2, nMin, nMax=null) {
let nSpace = oToken2["nStart"] - oToken1["nEnd"]
if (nSpace < nMin) {
return false;
}
if (nMax !== null && nSpace > nMax) {
return false;
}
return true;
|
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
|
dTokenPos.get(nPos)["lMorph"] = lMorph;
return true;
}
//// Disambiguation for graph rules
function g_select (dToken, sPattern, lDefault=null) {
// select morphologies for <dToken> according to <sPattern>, always return true
let lMorph = (dToken.hasOwnProperty("lMorph")) ? dToken["lMorph"] : _oSpellChecker.getMorph(dToken["sValue"]);
if (lMorph.length === 0 || lMorph.length === 1) {
if (lDefault) {
dToken["lMorph"] = lDefault;
}
return true;
}
let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) !== -1 );
if (lSelect.length > 0) {
if (lSelect.length != lMorph.length) {
dToken["lMorph"] = lSelect;
}
} else if (lDefault) {
dToken["lMorph"] = lDefault;
}
return true;
}
function g_exclude (dToken, sPattern, lDefault=null) {
// select morphologies for <dToken> according to <sPattern>, always return true
let lMorph = (dToken.hasOwnProperty("lMorph")) ? dToken["lMorph"] : _oSpellChecker.getMorph(dToken["sValue"]);
if (lMorph.length === 0 || lMorph.length === 1) {
if (lDefault) {
dToken["lMorph"] = lDefault;
}
return true;
}
let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) === -1 );
if (lSelect.length > 0) {
if (lSelect.length != lMorph.length) {
dToken["lMorph"] = lSelect;
}
} else if (lDefault) {
dToken["lMorph"] = lDefault;
}
return true;
}
function g_define (dToken, lMorph) {
// set morphologies of <dToken>, always return true
dToken["lMorph"] = lMorph;
return true;
}
function g_define_from (dToken, nLeft=null, nRight=null) {
let sValue = dToken["sValue"];
if (nLeft !== null) {
sValue = (nRight !== null) ? sValue.slice(nLeft, nRight) : sValue.slice(nLeft);
}
dToken["lMorph"] = _oSpellChecker.getMorph(sValue);
return true;
}
//////// GRAMMAR CHECKER PLUGINS
${pluginsJS}
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>
>
>
>
>
>
|
|
|
|
|
|
>
>
>
>
>
>
>
|
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
|
dTokenPos.get(nPos)["lMorph"] = lMorph;
return true;
}
//// Disambiguation for graph rules
function g_select (oToken, sPattern, lDefault=null) {
// select morphologies for <oToken> according to <sPattern>, always return true
let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]);
if (lMorph.length === 0 || lMorph.length === 1) {
if (lDefault) {
oToken["lMorph"] = lDefault;
}
return true;
}
let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) !== -1 );
if (lSelect.length > 0) {
if (lSelect.length != lMorph.length) {
oToken["lMorph"] = lSelect;
}
} else if (lDefault) {
oToken["lMorph"] = lDefault;
}
return true;
}
function g_exclude (oToken, sPattern, lDefault=null) {
// select morphologies for <oToken> according to <sPattern>, always return true
let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]);
if (lMorph.length === 0 || lMorph.length === 1) {
if (lDefault) {
oToken["lMorph"] = lDefault;
}
return true;
}
let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) === -1 );
if (lSelect.length > 0) {
if (lSelect.length != lMorph.length) {
oToken["lMorph"] = lSelect;
}
} else if (lDefault) {
oToken["lMorph"] = lDefault;
}
return true;
}
function g_add_morph (oToken, lNewMorph) {
"Disambiguation: add a morphology to a token"
let lMorph = (oToken.hasOwnProperty("lMorph")) ? oToken["lMorph"] : _oSpellChecker.getMorph(oToken["sValue"]);
lMorph.push(...lNewMorph);
oToken["lMorph"] = lMorph;
return true;
}
function g_define (oToken, lMorph) {
// set morphologies of <oToken>, always return true
oToken["lMorph"] = lMorph;
return true;
}
function g_define_from (oToken, nLeft=null, nRight=null) {
let sValue = oToken["sValue"];
if (nLeft !== null) {
sValue = (nRight !== null) ? sValue.slice(nLeft, nRight) : sValue.slice(nLeft);
}
oToken["lMorph"] = _oSpellChecker.getMorph(sValue);
return true;
}
function g_change_meta (oToken, sType) {
// Disambiguation: change type of token
oToken["sType"] = sType;
return true;
}
//////// GRAMMAR CHECKER PLUGINS
${pluginsJS}
|