Overview
Comment: | [core][graphspell][js] fix regex for \w substitution |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | core | graphspell |
Files: | files | file ages | folders |
SHA3-256: |
e40149ad940e2fc32f79fa4cdef99e3b |
User & Date: | olr on 2019-05-22 07:59:29 |
Other Links: | manifest | tags |
Context
2019-05-22
| ||
08:24 | [graphspell][js] tokenizer: tag SEPARATOR -> PUNC check-in: 75bf92c9c2 user: olr tags: trunk, graphspell | |
07:59 | [core][graphspell][js] fix regex for \w substitution check-in: e40149ad94 user: olr tags: trunk, core, graphspell | |
07:11 | [fr] écriture épicène: ajustements check-in: 02d41c9147 user: olr tags: trunk, fr | |
Changes
Modified gc_core/js/lang_core/gc_engine.js from [5635a63c4b] to [842c50636b].
︙ | ︙ | |||
988 989 990 991 992 993 994 | //////// functions to get text outside pattern scope // warning: check compile_rules.py to understand how it works function nextword (s, iStart, n) { // get the nth word of the input string or empty string | | | | | | 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 | //////// functions to get text outside pattern scope // warning: check compile_rules.py to understand how it works function nextword (s, iStart, n) { // get the nth word of the input string or empty string let z = new RegExp("^(?: +[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+){" + (n-1).toString() + "} +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+)", "ig"); let m = z.exec(s.slice(iStart)); if (!m) { return null; } return [iStart + z.lastIndex - m[1].length, m[1]]; } function prevword (s, iEnd, n) { // get the (-)nth word of the input string or empty string let z = new RegExp("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+) +(?:[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+ +){" + (n-1).toString() + "}$", "i"); let m = z.exec(s.slice(0, iEnd)); if (!m) { return null; } return [m.index, m[1]]; } function nextword1 (s, iStart) { // get next word (optimization) let _zNextWord = new RegExp ("^ +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_-]*)", "ig"); let m = _zNextWord.exec(s.slice(iStart)); if (!m) { return null; } return [iStart + _zNextWord.lastIndex - m[1].length, m[1]]; } const _zPrevWord = new RegExp ("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_-]*) +$", "i"); function prevword1 (s, iEnd) { // get previous word (optimization) let m = _zPrevWord.exec(s.slice(0, iEnd)); if (!m) { return null; } |
︙ | ︙ |
Modified graphspell-js/tokenizer.js from [4d861c8e0e] to [5b1f96af0f].
︙ | ︙ | |||
9 10 11 12 13 14 15 | const aTkzPatterns = { // All regexps must start with ^. "default": [ [/^[ \t]+/, 'SPACE'], | | | | | | | | | | | | | | | | 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | const aTkzPatterns = { // All regexps must start with ^. "default": [ [/^[ \t]+/, 'SPACE'], [/^\/(?:~|bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_.()-]+)*/, 'FOLDERUNIX'], [/^[a-zA-Z]:\\(?:Program Files(?: \(x86\)|)|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st.()]+)(?:\\[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_.()-]+)*/, 'FOLDERWIN'], [/^[,.;:!?…«»“”‘’"(){}\[\]·–—¿¡]/, 'SEPARATOR'], [/^[A-Z][.][A-Z][.](?:[A-Z][.])*/, 'WORD_ACRONYM'], [/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_.\/?&!%=+*"'@$#-]+/, 'LINK'], [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]+/, 'TAG'], [/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st]+ *>/, 'HTML'], [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st]+\]/, 'PSEUDOHTML'], [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'], [/^\d\d?[h:]\d\d\b/, 'HOUR'], [/^\d+(?:[.,]\d+|)/, 'NUM'], [/^[&%‰€$+±=*/<>⩾⩽#|×¥£§¢¬÷@-]/, 'SIGN'], [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-stᴀ-ᶿ_]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-stᴀ-ᶿ_]+)*/, 'WORD'] ], "fr": [ [/^[ \t]+/, 'SPACE'], [/^\/(?:~|bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_.()-]+)*/, 'FOLDERUNIX'], [/^[a-zA-Z]:\\(?:Program Files(?: \(x86\)|)|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st.()]+)(?:\\[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_.()-]+)*/, 'FOLDERWIN'], [/^[,.;:!?…«»“”‘’"(){}\[\]·–—¿¡]/, 'SEPARATOR'], [/^[A-Z][.][A-Z][.](?:[A-Z][.])*/, 'WORD_ACRONYM'], [/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_.\/?&!%=+*"'@$#-]+/, 'LINK'], [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]+/, 'TAG'], [/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st]+ *>/, 'HTML'], [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st]+\]/, 'PSEUDOHTML'], [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'], [/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`]/i, 'WORD_ELIDED'], [/^\d\d?[h:]\d\d\b/, 'HOUR'], [/^\d+(?:ers?\b|nds?\b|es?\b|des?\b|ièmes?\b|èmes?\b|emes?\b|ᵉʳˢ?|ⁿᵈˢ?|ᵉˢ?|ᵈᵉˢ?)/, 'WORD_ORDINAL'], [/^\d+(?:[.,]\d+|)/, 'NUM'], [/^[&%‰€$+±=*/<>⩾⩽#|×¥£§¢¬÷@-]/, 'SIGN'], [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-stᴀ-ᶿᵉʳˢⁿᵈ_]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-stᴀ-ᶿᵉʳˢⁿᵈ_]+)*/, 'WORD'] ] }; class Tokenizer { constructor (sLang) { |
︙ | ︙ |
Modified js_extension/string.js from [aea1fc20a2] to [642ec84082].
︙ | ︙ | |||
17 18 19 20 21 22 23 | } return nOccur; }; String.prototype.gl_isDigit = function () { return (this.search(/^[0-9⁰¹²³⁴⁵⁶⁷⁸⁹]+$/) !== -1); }; String.prototype.gl_isAlpha = function () { | | | | | 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | } return nOccur; }; String.prototype.gl_isDigit = function () { return (this.search(/^[0-9⁰¹²³⁴⁵⁶⁷⁸⁹]+$/) !== -1); }; String.prototype.gl_isAlpha = function () { return (this.search(/^[a-zà-öA-Zø-ÿÀ-ÖØ-ßĀ-ʯff-stᴀ-ᶿ]+$/) !== -1); }; String.prototype.gl_isLowerCase = function () { return (this.search(/^[a-zà-öø-ÿff-st0-9-]+$/) !== -1); }; String.prototype.gl_isUpperCase = function () { return (this.search(/^[A-ZÀ-ÖØ-ߌ0-9-]+$/) !== -1); }; String.prototype.gl_isTitle = function () { return (this.search(/^[A-ZÀ-ÖØ-ߌ][a-zà-öø-ÿff-st'’-]+$/) !== -1); }; String.prototype.gl_toCapitalize = function () { return this.slice(0,1).toUpperCase() + this.slice(1).toLowerCase(); }; String.prototype.gl_expand = function (oMatch) { let sNew = this; for (let i = 0; i < oMatch.length ; i++) { |
︙ | ︙ |