Overview
| Comment: | [core] new regex for paragraph splitting | 
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive | 
| Timelines: | family | ancestors | descendants | both | trunk | core | major_change | 
| Files: | files | file ages | folders | 
| SHA3-256: | 
306fddb08274af6e8458422b69d107a3 | 
| User & Date: | olr on 2018-11-26 15:15:52 | 
| Other Links: | manifest | tags | 
Context
| 
   2018-11-26 
 | ||
| 16:20 | [fr] faux positif check-in: 8f4f50aba4 user: olr tags: trunk, fr | |
| 15:15 | [core] new regex for paragraph splitting check-in: 306fddb082 user: olr tags: trunk, core, major_change | |
| 14:53 | [fr] faux positifs check-in: bbc73efbc8 user: olr tags: trunk, fr | |
Changes
Modified gc_core/js/lang_core/gc_engine.js from [10324df4f2] to [9248952883].
| ︙ | ︙ | |||
156 157 158 159 160 161 162  | 
    //// Parsing
    parse: function (sText, sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false) {
        let oText = new TextParser(sText);
        return oText.parse(sCountry, bDebug, dOptions, bContext);
    },
 | |  | 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170  | 
    //// Parsing
    parse: function (sText, sCountry="${country_default}", bDebug=false, dOptions=null, bContext=false) {
        let oText = new TextParser(sText);
        return oText.parse(sCountry, bDebug, dOptions, bContext);
    },
    _zEndOfSentence: new RegExp ('([.?!:;…][   .?!… »”")]+|.$)', "g"),
    _zBeginOfParagraph: new RegExp ("^[-  –—.,;?!…]*", "ig"),
    _zEndOfParagraph: new RegExp ("[-  .,;?!…–—]*$", "ig"),
    getSentenceBoundaries: function* (sText) {
        let mBeginOfSentence = this._zBeginOfParagraph.exec(sText);
        let iStart = this._zBeginOfParagraph.lastIndex;
        let m;
 | 
| ︙ | ︙ | 
Modified gc_core/py/lang_core/gc_engine.py from [20746fba5d] to [621c240c96].
| ︙ | ︙ | |||
187 188 189 190 191 192 193  | 
    "set options to default values"
    global _dOptions
    _dOptions = dict(gc_options.getOptions(_sAppContext))
#### Parsing
 | |  | 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201  | 
    "set options to default values"
    global _dOptions
    _dOptions = dict(gc_options.getOptions(_sAppContext))
#### Parsing
_zEndOfSentence = re.compile(r'([.?!:;…][   .?!… »”")]+|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
_zEndOfParagraph = re.compile(r"\W*$")
def _getSentenceBoundaries (sText):
    iStart = _zBeginOfParagraph.match(sText).end()
    for m in _zEndOfSentence.finditer(sText):
        yield (iStart, m.end())
 | 
| ︙ | ︙ |