82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
|
sText = sText.replace(/‑/g, "-"); // nobreakdash
}
// parse sentence
for (let [iStart, iEnd] of this._getSentenceBoundaries(sText)) {
if (4 < (iEnd - iStart) < 2000) {
dDA.clear();
//echo(sText.slice(iStart, iEnd));
try {
[, errs] = this._proofread(sText.slice(iStart, iEnd), sAlt.slice(iStart, iEnd), iStart, false, dDA, dPriority, sCountry, bDebug, bContext);
dErrors.gl_update(errs);
}
catch (e) {
helpers.logerror(e);
}
|
|
|
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
sText = sText.replace(/‑/g, "-"); // nobreakdash
}
// parse sentence
for (let [iStart, iEnd] of this._getSentenceBoundaries(sText)) {
if (4 < (iEnd - iStart) < 2000) {
dDA.clear();
//helpers.echo(sText.slice(iStart, iEnd));
try {
[, errs] = this._proofread(sText.slice(iStart, iEnd), sAlt.slice(iStart, iEnd), iStart, false, dDA, dPriority, sCountry, bDebug, bContext);
dErrors.gl_update(errs);
}
catch (e) {
helpers.logerror(e);
}
|
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
|
for (let [sOption, lRuleGroup] of this._getRules(bParagraph)) {
if (!sOption || option(sOption)) {
for (let [zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions, lGroups, lNegLookBefore] of lRuleGroup) {
if (!_aIgnoredRules.has(sRuleId)) {
while ((m = zRegex.gl_exec2(s, lGroups, lNegLookBefore)) !== null) {
bCondMemo = null;
/*if (bDebug) {
echo(">>>> Rule # " + sLineId + " - Text: " + s + " opt: "+ sOption);
}*/
for (let [sFuncCond, cActionType, sWhat, ...eAct] of lActions) {
// action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ]
try {
//echo(oEvalFunc[sFuncCond]);
bCondMemo = (!sFuncCond || oEvalFunc[sFuncCond](s, sx, m, dDA, sCountry, bCondMemo));
if (bCondMemo) {
switch (cActionType) {
case "-":
// grammar error
//echo("-> error detected in " + sLineId + "\nzRegex: " + zRegex.source);
nErrorStart = nOffset + m.start[eAct[0]];
if (!dErrs.has(nErrorStart) || nPriority > dPriority.get(nErrorStart)) {
dErrs.set(nErrorStart, this._createError(s, sx, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bIdRule, sOption, bContext));
dPriority.set(nErrorStart, nPriority);
}
break;
case "~":
// text processor
//echo("-> text processor by " + sLineId + "\nzRegex: " + zRegex.source);
s = this._rewrite(s, sWhat, eAct[0], m, bUppercase);
bChange = true;
if (bDebug) {
echo("~ " + s + " -- " + m[eAct[0]] + " # " + sLineId);
}
break;
case "=":
// disambiguation
//echo("-> disambiguation by " + sLineId + "\nzRegex: " + zRegex.source);
oEvalFunc[sWhat](s, m, dDA);
if (bDebug) {
echo("= " + m[0] + " # " + sLineId + "\nDA: " + dDA.gl_toString());
}
break;
case ">":
// we do nothing, this test is just a condition to apply all following actions
break;
default:
echo("# error: unknown action at " + sLineId);
}
} else {
if (cActionType == ">") {
break;
}
}
}
catch (e) {
echo(s);
echo("# line id: " + sLineId + "\n# rule id: " + sRuleId);
helpers.logerror(e);
}
}
}
}
}
}
|
|
|
|
|
|
|
|
|
|
|
|
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
|
for (let [sOption, lRuleGroup] of this._getRules(bParagraph)) {
if (!sOption || option(sOption)) {
for (let [zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions, lGroups, lNegLookBefore] of lRuleGroup) {
if (!_aIgnoredRules.has(sRuleId)) {
while ((m = zRegex.gl_exec2(s, lGroups, lNegLookBefore)) !== null) {
bCondMemo = null;
/*if (bDebug) {
helpers.echo(">>>> Rule # " + sLineId + " - Text: " + s + " opt: "+ sOption);
}*/
for (let [sFuncCond, cActionType, sWhat, ...eAct] of lActions) {
// action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ]
try {
//helpers.echo(oEvalFunc[sFuncCond]);
bCondMemo = (!sFuncCond || oEvalFunc[sFuncCond](s, sx, m, dDA, sCountry, bCondMemo));
if (bCondMemo) {
switch (cActionType) {
case "-":
// grammar error
//helpers.echo("-> error detected in " + sLineId + "\nzRegex: " + zRegex.source);
nErrorStart = nOffset + m.start[eAct[0]];
if (!dErrs.has(nErrorStart) || nPriority > dPriority.get(nErrorStart)) {
dErrs.set(nErrorStart, this._createError(s, sx, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bIdRule, sOption, bContext));
dPriority.set(nErrorStart, nPriority);
}
break;
case "~":
// text processor
//helpers.echo("-> text processor by " + sLineId + "\nzRegex: " + zRegex.source);
s = this._rewrite(s, sWhat, eAct[0], m, bUppercase);
bChange = true;
if (bDebug) {
helpers.echo("~ " + s + " -- " + m[eAct[0]] + " # " + sLineId);
}
break;
case "=":
// disambiguation
//helpers.echo("-> disambiguation by " + sLineId + "\nzRegex: " + zRegex.source);
oEvalFunc[sWhat](s, m, dDA);
if (bDebug) {
helpers.echo("= " + m[0] + " # " + sLineId + "\nDA: " + dDA.gl_toString());
}
break;
case ">":
// we do nothing, this test is just a condition to apply all following actions
break;
default:
helpers.echo("# error: unknown action at " + sLineId);
}
} else {
if (cActionType == ">") {
break;
}
}
}
catch (e) {
helpers.echo(s);
helpers.echo("# line id: " + sLineId + "\n# rule id: " + sRuleId);
helpers.logerror(e);
}
}
}
}
}
}
|
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
|
// return true if option sOpt is active
return _dOptions.get(sOpt);
}
function displayInfo (dDA, aWord) {
// for debugging: info of word
if (!aWord) {
echo("> nothing to find");
return true;
}
if (!_dAnalyses.has(aWord[1]) && !_storeMorphFromFSA(aWord[1])) {
echo("> not in FSA");
return true;
}
if (dDA.has(aWord[0])) {
echo("DA: " + dDA.get(aWord[0]));
}
echo("FSA: " + _dAnalyses.get(aWord[1]));
return true;
}
function _storeMorphFromFSA (sWord) {
// retrieves morphologies list from _oDict -> _dAnalyses
//echo("register: "+sWord + " " + _oDict.getMorph(sWord).toString())
_dAnalyses.set(sWord, _oDict.getMorph(sWord));
return !!_dAnalyses.get(sWord);
}
function morph (dDA, aWord, sPattern, bStrict=true, bNoWord=false) {
// analyse a tuple (position, word), return true if sPattern in morphologies (disambiguation on)
if (!aWord) {
//echo("morph: noword, returns " + bNoWord);
return bNoWord;
}
//echo("aWord: "+aWord.toString());
if (!_dAnalyses.has(aWord[1]) && !_storeMorphFromFSA(aWord[1])) {
return false;
}
let lMorph = dDA.has(aWord[0]) ? dDA.get(aWord[0]) : _dAnalyses.get(aWord[1]);
//echo("lMorph: "+lMorph.toString());
if (lMorph.length === 0) {
return false;
}
//echo("***");
if (bStrict) {
return lMorph.every(s => (s.search(sPattern) !== -1));
}
return lMorph.some(s => (s.search(sPattern) !== -1));
}
function morphex (dDA, aWord, sPattern, sNegPattern, bNoWord=false) {
// analyse a tuple (position, word), returns true if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)
if (!aWord) {
//echo("morph: noword, returns " + bNoWord);
return bNoWord;
}
//echo("aWord: "+aWord.toString());
if (!_dAnalyses.has(aWord[1]) && !_storeMorphFromFSA(aWord[1])) {
return false;
}
let lMorph = dDA.has(aWord[0]) ? dDA.get(aWord[0]) : _dAnalyses.get(aWord[1]);
//echo("lMorph: "+lMorph.toString());
if (lMorph.length === 0) {
return false;
}
//echo("***");
// check negative condition
if (lMorph.some(s => (s.search(sNegPattern) !== -1))) {
return false;
}
// search sPattern
return lMorph.some(s => (s.search(sPattern) !== -1));
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
|
// return true if option sOpt is active
return _dOptions.get(sOpt);
}
function displayInfo (dDA, aWord) {
// for debugging: info of word
if (!aWord) {
helpers.echo("> nothing to find");
return true;
}
if (!_dAnalyses.has(aWord[1]) && !_storeMorphFromFSA(aWord[1])) {
helpers.echo("> not in FSA");
return true;
}
if (dDA.has(aWord[0])) {
helpers.echo("DA: " + dDA.get(aWord[0]));
}
helpers.echo("FSA: " + _dAnalyses.get(aWord[1]));
return true;
}
function _storeMorphFromFSA (sWord) {
// retrieves morphologies list from _oDict -> _dAnalyses
//helpers.echo("register: "+sWord + " " + _oDict.getMorph(sWord).toString())
_dAnalyses.set(sWord, _oDict.getMorph(sWord));
return !!_dAnalyses.get(sWord);
}
function morph (dDA, aWord, sPattern, bStrict=true, bNoWord=false) {
// analyse a tuple (position, word), return true if sPattern in morphologies (disambiguation on)
if (!aWord) {
//helpers.echo("morph: noword, returns " + bNoWord);
return bNoWord;
}
//helpers.echo("aWord: "+aWord.toString());
if (!_dAnalyses.has(aWord[1]) && !_storeMorphFromFSA(aWord[1])) {
return false;
}
let lMorph = dDA.has(aWord[0]) ? dDA.get(aWord[0]) : _dAnalyses.get(aWord[1]);
//helpers.echo("lMorph: "+lMorph.toString());
if (lMorph.length === 0) {
return false;
}
//helpers.echo("***");
if (bStrict) {
return lMorph.every(s => (s.search(sPattern) !== -1));
}
return lMorph.some(s => (s.search(sPattern) !== -1));
}
function morphex (dDA, aWord, sPattern, sNegPattern, bNoWord=false) {
// analyse a tuple (position, word), returns true if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)
if (!aWord) {
//helpers.echo("morph: noword, returns " + bNoWord);
return bNoWord;
}
//helpers.echo("aWord: "+aWord.toString());
if (!_dAnalyses.has(aWord[1]) && !_storeMorphFromFSA(aWord[1])) {
return false;
}
let lMorph = dDA.has(aWord[0]) ? dDA.get(aWord[0]) : _dAnalyses.get(aWord[1]);
//helpers.echo("lMorph: "+lMorph.toString());
if (lMorph.length === 0) {
return false;
}
//helpers.echo("***");
// check negative condition
if (lMorph.some(s => (s.search(sNegPattern) !== -1))) {
return false;
}
// search sPattern
return lMorph.some(s => (s.search(sPattern) !== -1));
}
|
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
|
return [iStart + _zNextWord.lastIndex - m[1].length, m[1]];
}
const _zPrevWord = new RegExp ("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_-]*) +$", "i");
function prevword1 (s, iEnd) {
// get previous word (optimization)
//echo("prev1, s:"+s);
//echo("prev1, s.slice(0, iEnd):"+s.slice(0, iEnd));
let m = _zPrevWord.exec(s.slice(0, iEnd));
//echo("prev1, m:"+m);
if (!m) {
return null;
}
//echo("prev1: " + m.index + " " + m[1]);
return [m.index, m[1]];
}
function look (s, zPattern, zNegPattern=null) {
// seek zPattern in s (before/after/fulltext), if antipattern zNegPattern not in s
try {
if (zNegPattern && zNegPattern.test(s)) {
|
|
|
|
|
|
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
|
return [iStart + _zNextWord.lastIndex - m[1].length, m[1]];
}
const _zPrevWord = new RegExp ("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-st_-]*) +$", "i");
function prevword1 (s, iEnd) {
// get previous word (optimization)
//helpers.echo("prev1, s:"+s);
//helpers.echo("prev1, s.slice(0, iEnd):"+s.slice(0, iEnd));
let m = _zPrevWord.exec(s.slice(0, iEnd));
//helpers.echo("prev1, m:"+m);
if (!m) {
return null;
}
//helpers.echo("prev1: " + m.index + " " + m[1]);
return [m.index, m[1]];
}
function look (s, zPattern, zNegPattern=null) {
// seek zPattern in s (before/after/fulltext), if antipattern zNegPattern not in s
try {
if (zNegPattern && zNegPattern.test(s)) {
|