1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
|
-
+
+
+
+
+
+
+
+
|
// JavaScript
/* jshint esversion:6, -W097 */
/* jslint esversion:6 */
/* global require, exports, console */
"use strict";
var text = {
_zEndOfSentence: new RegExp ('[.?!:;…][ .?!…»«“”"‘’)–—]+(?=[A-ZÉÈÎÔ])', "g"),
_zEndOfSentence: new RegExp ('[.?!:;…]+[ ]+[»”’]?(?=[«"“‘]?[A-ZÉÈÎÔ–—])', "g"),
getSentenceBoundaries: function* (sText) {
// generator: returns start and end of sentences found in <sText>
let iStart = 0;
let m;
while ((m = this._zEndOfSentence.exec(sText)) !== null) {
yield [iStart, this._zEndOfSentence.lastIndex];
iStart = this._zEndOfSentence.lastIndex;
}
yield [iStart, sText.length];
},
getSentence: function* (sText) {
// generator: returns sentences found in <sText>
for (let [iStart, iEnd] of this.getSentenceBoundaries(sText)) {
yield sText.slice(iStart, iEnd);
}
},
getParagraph: function* (sText, sSepParagraph = "\n") {
// generator: returns paragraphs of text
let iStart = 0;
let iEnd = 0;
sText = sText.replace("\r\n", "\n").replace("\r", "\n");
while ((iEnd = sText.indexOf(sSepParagraph, iStart)) !== -1) {
|