Overview
Comment: | [core] move getSentenceBoundaries from gc_engine to text module |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | core |
Files: | files | file ages | folders |
SHA3-256: |
bf0a1bdd5dcdb4b2f65922bfa97012a4 |
User & Date: | olr on 2019-05-24 07:42:20 |
Other Links: | manifest | tags |
Context
2019-05-24
| ||
08:50 | [core] sentence splitting: code clarification check-in: 311ccab788 user: olr tags: trunk, core | |
07:42 | [core] move getSentenceBoundaries from gc_engine to text module check-in: bf0a1bdd5d user: olr tags: trunk, core | |
07:03 | [fr] faux positifs et ajustements check-in: 2f921877e7 user: olr tags: trunk, fr | |
Changes
Modified gc_core/js/lang_core/gc_engine.js from [842c50636b] to [0ae80cfb32].
︙ | |||
159 160 161 162 163 164 165 | 159 160 161 162 163 164 165 166 167 168 169 170 171 172 | - - - - - - - - - - - - - - | }, //// Parsing parse: function (sText, sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false) { let oText = new TextParser(sText); return oText.parse(sCountry, bDebug, dOptions, bContext); |
︙ | |||
239 240 241 242 243 244 245 | 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 | - + | this.sText = this.sText.replace(/‑/g, "-"); // nobreakdash } if (this.sText.includes("@@")) { this.sText = this.sText.replace(/@@+/g, ""); } // parse sentence |
︙ |
Modified gc_core/js/text.js from [1251d5448b] to [dc3ef10e8d].
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | + + + + + + + + + + + + + + + | // JavaScript /* jshint esversion:6, -W097 */ /* jslint esversion:6 */ /* global require, exports, console */ "use strict"; var text = { _zEndOfSentence: new RegExp ('([.?!:;…][ .?!…»«“”"‘’)–—]+(?=[A-ZÉÈÎÔ])|.$)', "g"), _zBeginOfParagraph: new RegExp ("^[- –—.,;?!…]*", "ig"), getSentenceBoundaries: function* (sText) { // generator: returns start and end of sentences found in <sText> let mBeginOfSentence = this._zBeginOfParagraph.exec(sText); let iStart = this._zBeginOfParagraph.lastIndex; let m; while ((m = this._zEndOfSentence.exec(sText)) !== null) { yield [iStart, this._zEndOfSentence.lastIndex]; iStart = this._zEndOfSentence.lastIndex; } }, getParagraph: function* (sText, sSepParagraph = "\n") { // generator: returns paragraphs of text let iStart = 0; let iEnd = 0; sText = sText.replace("\r\n", "\n").replace("\r", "\n"); while ((iEnd = sText.indexOf(sSepParagraph, iStart)) !== -1) { yield sText.slice(iStart, iEnd); |
︙ |
Modified gc_core/py/lang_core/gc_engine.py from [b546c7e179] to [c8470c3c39].
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | + + + | """ Grammalecte Grammar checker engine """ import re import traceback #import unicodedata from itertools import chain from ..graphspell.spellchecker import SpellChecker from ..graphspell.echo import echo from .. import text from . import gc_options try: # LibreOffice / OpenOffice from com.sun.star.linguistic2 import SingleProofreadingError from com.sun.star.text.TextMarkupType import PROOFREADING from com.sun.star.beans import PropertyValue |
︙ | |||
186 187 188 189 190 191 192 | 189 190 191 192 193 194 195 196 197 198 199 200 201 202 | - - - - - - - - - - - | def resetOptions (): "set options to default values" global _dOptions _dOptions = getDefaultOptions() #### Parsing |
︙ | |||
262 263 264 265 266 267 268 | 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 | - + | sText = sText.replace("'", "’") if "‑" in sText: sText = sText.replace("‑", "-") # nobreakdash if "@@" in sText: sText = re.sub("@@+", "", sText) # parse sentences |
︙ |
Modified gc_core/py/text.py from [36660e74f0] to [81c9cf089f].
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | + + + + + + + + + + + + | #!python3 """ Text tools """ import re import textwrap from itertools import chain _zEndOfSentence = re.compile(r'([.?!:;…]\W+(?=[A-ZÉÈÎÔ])|.$)') _zBeginOfParagraph = re.compile(r"^\W*") def getSentenceBoundaries (sText): "generator: returns start and end of sentences found in <sText>" iStart = _zBeginOfParagraph.match(sText).end() for m in _zEndOfSentence.finditer(sText): yield (iStart, m.end()) iStart = m.end() def getParagraph (sText): "generator: returns paragraphs of text" iStart = 0 sText = sText.replace("\r\n", "\n").replace("\r", "\n") iEnd = sText.find("\n", iStart) while iEnd != -1: |
︙ |