Overview
| Comment: | [core] sentence splitting: code clarification |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk | core |
| Files: | files | file ages | folders |
| SHA3-256: |
b52cb827b1c8ff0be6d7b814a3c4761e |
| User & Date: | olr on 2019-05-24 14:12:33 |
| Other Links: | manifest | tags |
Context
|
2019-05-24
| ||
| 14:43 | [build][core] update line identifier check-in: b4abcccefb user: olr tags: trunk, core, build | |
| 14:12 | [core] sentence splitting: code clarification check-in: b52cb827b1 user: olr tags: trunk, core | |
| 12:21 | [core] new regex for sentence splitting, generator of sentences check-in: 2777d8cef6 user: olr tags: trunk, core | |
Changes
Modified gc_core/js/text.js from [091c88a16c] to [b124979f5a].
1 2 3 4 5 6 7 8 9 10 11 |
// JavaScript
/* jshint esversion:6, -W097 */
/* jslint esversion:6 */
/* global require, exports, console */
"use strict";
var text = {
| | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
// JavaScript
/* jshint esversion:6, -W097 */
/* jslint esversion:6 */
/* global require, exports, console */
"use strict";
var text = {
_zEndOfSentence: new RegExp ('[.?!:;…]+[ ]+[»”’]?(?=[«"“‘–— ]?[A-ZÉÈÎÔ])', "g"),
getSentenceBoundaries: function* (sText) {
// generator: returns start and end of sentences found in <sText>
let iStart = 0;
let m;
while ((m = this._zEndOfSentence.exec(sText)) !== null) {
yield [iStart, this._zEndOfSentence.lastIndex];
|
| ︙ | ︙ |
Modified gc_core/py/text.py from [dd28033587] to [bbd48992a7].
1 2 3 4 5 6 7 8 9 10 11 | #!python3 """ Text tools """ import re import textwrap from itertools import chain | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
#!python3
"""
Text tools
"""
import re
import textwrap
from itertools import chain
_zEndOfSentence = re.compile(r'[.?!:;…]+[ ]+[»”’]?(?=[«"“‘–— ]?[A-ZÉÈÎÔ])')
def getSentenceBoundaries (sText):
"generator: returns start and end of sentences found in <sText>"
iStart = 0
for m in _zEndOfSentence.finditer(sText):
yield (iStart, m.end())
iStart = m.end()
|
| ︙ | ︙ |