︙ | | | ︙ | |
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
|
// text processor
let nTokenStart = (eAct[0] > 0) ? nTokenOffset + eAct[0] : nLastToken + eAct[0];
let nTokenEnd = (eAct[1] > 0) ? nTokenOffset + eAct[1] : nLastToken + eAct[1];
this._tagAndPrepareTokenForRewriting(sWhat, nTokenStart, nTokenEnd, nTokenOffset, nLastToken, eAct[2], bDebug);
bChange = true;
if (bDebug) {
console.log(` TEXT_PROCESSOR: ${sRuleId} ${sLineId}`);
console.log(` ${this.lToken[nTokenStart]["sValue"]} : ${this.lToken[nTokenEnd]["sValue"]} > ${sWhat}`);
}
}
else if (cActionType == "=") {
// disambiguation
oEvalFunc[sWhat](this.lToken, nTokenOffset, nLastToken);
if (bDebug) {
console.log(` DISAMBIGUATOR: ${sRuleId} ${sLineId} (${sWhat}) ${this.lToken[nTokenOffset+1]["sValue"]}:${this.lToken[nLastToken]["sValue"]}`);
|
|
|
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
|
// text processor
let nTokenStart = (eAct[0] > 0) ? nTokenOffset + eAct[0] : nLastToken + eAct[0];
let nTokenEnd = (eAct[1] > 0) ? nTokenOffset + eAct[1] : nLastToken + eAct[1];
this._tagAndPrepareTokenForRewriting(sWhat, nTokenStart, nTokenEnd, nTokenOffset, nLastToken, eAct[2], bDebug);
bChange = true;
if (bDebug) {
console.log(` TEXT_PROCESSOR: ${sRuleId} ${sLineId}`);
console.log(` [${this.lToken[nTokenStart]["sValue"]}:${this.lToken[nTokenEnd]["sValue"]}] > ${sWhat}`);
}
}
else if (cActionType == "=") {
// disambiguation
oEvalFunc[sWhat](this.lToken, nTokenOffset, nLastToken);
if (bDebug) {
console.log(` DISAMBIGUATOR: ${sRuleId} ${sLineId} (${sWhat}) ${this.lToken[nTokenOffset+1]["sValue"]}:${this.lToken[nLastToken]["sValue"]}`);
|
︙ | | | ︙ | |
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
|
}
//console.log(sText+"\nstart: "+m.start[iGroup]+" end:"+m.end[iGroup]);
return sText.slice(0, m.start[iGroup]) + sNew + sText.slice(m.end[iGroup]);
}
_tagAndPrepareTokenForRewriting (sWhat, nTokenRewriteStart, nTokenRewriteEnd, nTokenOffset, nLastToken, bCaseSvty, bDebug) {
// text processor: rewrite tokens between <nTokenRewriteStart> and <nTokenRewriteEnd> position
if (bDebug) {
console.log(" START: ${nTokenRewriteStart} - END: ${nTokenRewriteEnd} ");
}
if (sWhat === "*") {
// purge text
if (nTokenRewriteEnd - nTokenRewriteStart == 0) {
this.lToken[nTokenRewriteStart]["bToRemove"] = true;
} else {
for (let i = nTokenRewriteStart; i <= nTokenRewriteEnd; i++) {
this.lToken[i]["bToRemove"] = true;
|
<
<
<
|
803
804
805
806
807
808
809
810
811
812
813
814
815
816
|
}
//console.log(sText+"\nstart: "+m.start[iGroup]+" end:"+m.end[iGroup]);
return sText.slice(0, m.start[iGroup]) + sNew + sText.slice(m.end[iGroup]);
}
_tagAndPrepareTokenForRewriting (sWhat, nTokenRewriteStart, nTokenRewriteEnd, nTokenOffset, nLastToken, bCaseSvty, bDebug) {
// text processor: rewrite tokens between <nTokenRewriteStart> and <nTokenRewriteEnd> position
if (sWhat === "*") {
// purge text
if (nTokenRewriteEnd - nTokenRewriteStart == 0) {
this.lToken[nTokenRewriteStart]["bToRemove"] = true;
} else {
for (let i = nTokenRewriteStart; i <= nTokenRewriteEnd; i++) {
this.lToken[i]["bToRemove"] = true;
|
︙ | | | ︙ | |
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
|
// rewrite the sentence, modify tokens, purge the token list
if (bDebug) {
console.log("REWRITE");
}
let lNewToken = [];
let nMergeUntil = 0;
let dTokenMerger = null;
for (let [iToken, dToken] in this.lToken.entries()) {
let bKeepToken = true;
if (dToken["sType"] != "INFO") {
if (nMergeUntil && iToken <= nMergeUntil) {
dTokenMerger["sValue"] += " ".repeat(dToken["nStart"] - dTokenMerger["nEnd"]) + dToken["sValue"];
dTokenMerger["nEnd"] = dToken["nEnd"];
if (bDebug) {
console.log(" MERGED TOKEN: " + dTokenMerger["sValue"]);
|
|
|
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
|
// rewrite the sentence, modify tokens, purge the token list
if (bDebug) {
console.log("REWRITE");
}
let lNewToken = [];
let nMergeUntil = 0;
let dTokenMerger = null;
for (let [iToken, dToken] of this.lToken.entries()) {
let bKeepToken = true;
if (dToken["sType"] != "INFO") {
if (nMergeUntil && iToken <= nMergeUntil) {
dTokenMerger["sValue"] += " ".repeat(dToken["nStart"] - dTokenMerger["nEnd"]) + dToken["sValue"];
dTokenMerger["nEnd"] = dToken["nEnd"];
if (bDebug) {
console.log(" MERGED TOKEN: " + dTokenMerger["sValue"]);
|
︙ | | | ︙ | |
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
|
}
// search sPattern
return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1));
}
function g_analyse (dToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) {
// analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies
if (nLeft !== null) {
lMorph = _oSpellChecker.getMorph(dToken["sValue"].slice(nLeft, nRight));
if (bMemorizeMorph) {
dToken["lMorph"] = lMorph;
}
} else {
lMorph = _oSpellChecker.getMorph(dToken["sValue"]);
|
>
|
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
|
}
// search sPattern
return lMorph.some(sMorph => (sMorph.search(sPattern) !== -1));
}
function g_analyse (dToken, sPattern, sNegPattern="", nLeft=null, nRight=null, bMemorizeMorph=true) {
// analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies
let lMorph;
if (nLeft !== null) {
lMorph = _oSpellChecker.getMorph(dToken["sValue"].slice(nLeft, nRight));
if (bMemorizeMorph) {
dToken["lMorph"] = lMorph;
}
} else {
lMorph = _oSpellChecker.getMorph(dToken["sValue"]);
|
︙ | | | ︙ | |
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
|
return bResult;
}
function g_tag_before (dToken, dTags, sTag) {
if (dTags.has(sTag)) {
return false;
}
if (dToken["i"] > dTags[sTag][0]) {
return true;
}
return false;
}
function g_tag_after (dToken, dTags, sTag) {
if (dTags.has(sTag)) {
return false;
}
if (dToken["i"] < dTags[sTag][1]) {
return true;
}
return false;
}
function g_tag (dToken, sTag) {
return dToken.hasOwnProperty("tags") && dToken["tags"].has(sTag);
|
|
|
|
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
|
return bResult;
}
function g_tag_before (dToken, dTags, sTag) {
if (dTags.has(sTag)) {
return false;
}
if (dToken["i"] > dTags.get(sTag)[0]) {
return true;
}
return false;
}
function g_tag_after (dToken, dTags, sTag) {
if (dTags.has(sTag)) {
return false;
}
if (dToken["i"] < dTags.get(sTag)[1]) {
return true;
}
return false;
}
function g_tag (dToken, sTag) {
return dToken.hasOwnProperty("tags") && dToken["tags"].has(sTag);
|
︙ | | | ︙ | |