Grammalecte  Check-in [11f1414b5b]

Overview
Comment:Ajout dans le tokenizer du ~ dans la detection des dossier linux, et distintion entre les deux types de dossier windows/linux avec le changement dans le lexicographe
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | Lexicographe
Files: files | file ages | folders
SHA3-256: 11f1414b5b27a55edc22150c5948efe903cccd8dab3a6c70be79af8ef0242dc4
User & Date: IllusionPerdu on 2017-11-02 10:58:57
Other Links: branch diff | manifest | tags
Context
2017-11-02
11:01
[fr] restructuration des données pour éviter la confusion avec le token <:> check-in: 685f9128f0 user: olr tags: fr, Lexicographe
10:58
Ajout dans le tokenizer du ~ dans la detection des dossier linux, et distintion entre les deux types de dossier windows/linux avec le changement dans le lexicographe check-in: 11f1414b5b user: IllusionPerdu tags: Lexicographe
10:47
[fr] lexicographe: gestion des locutions occasionnelles check-in: 34d80cabfa user: olr tags: fr, Lexicographe
Changes

Modified gc_core/js/tokenizer.js from [9bb6ea03fb] to [9d996e312d].

12
13
14
15
16
17
18
19
20


21
22
23
24
25
26
27
28
29
30
31
32
33
34
35


36
37
38
39
40
41
42
12
13
14
15
16
17
18


19
20
21
22
23
24
25
26
27
28
29
30
31
32
33


34
35
36
37
38
39
40
41
42







-
-
+
+













-
-
+
+









const aTkzPatterns = {
    // All regexps must start with ^.
    "default":
        [
            [/^[   \t]+/, 'SPACE'],
            [/^\/(?:bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDER'],
            [/^[a-zA-Z]:\\(?:Program Files(?: \(x86\)|)|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st.()]+)(?:\\[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDER'],
            [/^\/(?:~|bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERLINUX'],
            [/^[a-zA-Z]:\\(?:Program Files(?: \(x86\)|)|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st.()]+)(?:\\[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERWIN'],
            [/^[,.;:!?…«»“”‘’"(){}\[\]/·–—]+/, 'SEPARATOR'],
            [/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
            [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'],
            [/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+ *>/, 'HTML'],
            [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'],
            [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
            [/^\d\d?h\d\d\b/, 'HOUR'],
            [/^-?\d+(?:[.,]\d+|)/, 'NUM'],
            [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+)*/, 'WORD']
        ],
    "fr":
        [
            [/^[   \t]+/, 'SPACE'],
            [/^\/(?:bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDER'],
            [/^[a-zA-Z]:\\(?:Program Files(?: \(x86\)|)|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st.()]+)(?:\\[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDER'],
            [/^\/(?:~|bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERLINUX'],
            [/^[a-zA-Z]:\\(?:Program Files(?: \(x86\)|)|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st.()]+)(?:\\[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERWIN'],
            [/^[,.;:!?…«»“”‘’"(){}\[\]/·–—]+/, 'SEPARATOR'],
            [/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
            [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'],
            [/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+ *>/, 'HTML'],
            [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'],
            [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
            [/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`]/i, 'ELPFX'],
64
65
66
67
68
69
70
71

72
73
74

75
76
77
78
79
80
81
64
65
66
67
68
69
70

71
72
73

74
75
76
77
78
79
80
81







-
+


-
+







        while (sText) {
            let nCut = 1;
            for (let [zRegex, sType] of this.aRules) {
                try {
                    if ((m = zRegex.exec(sText)) !== null) {
                        if (sType == 'SEPARATOR') {
                            for (let c of m[0]) {
                                yield { "sType": sType, "sValue": c, "nStart": i, "nEnd": i + m[0].length }    
                                yield { "sType": sType, "sValue": c, "nStart": i, "nEnd": i + m[0].length }
                            }
                        } else {
                            yield { "sType": sType, "sValue": m[0], "nStart": i, "nEnd": i + m[0].length }    
                            yield { "sType": sType, "sValue": m[0], "nStart": i, "nEnd": i + m[0].length }
                        }
                        nCut = m[0].length;
                        break;
                    }
                }
                catch (e) {
                    helpers.logerror(e);

Modified gc_lang/fr/modules-js/lexicographe.js from [fc0a175535] to [df1f15e3d4].

265
266
267
268
269
270
271
272








273
274
275
276

277
278
279
280
281
282
283
265
266
267
268
269
270
271

272
273
274
275
276
277
278
279
280
281
282

283
284
285
286
287
288
289
290







-
+
+
+
+
+
+
+
+



-
+







                    let sTemp = oToken.sValue.replace("’", "").replace("'", "").replace("`", "").toLowerCase();
                    return {
                        sType: oToken.sType,
                        sValue: oToken.sValue,
                        aLabel: [_dElidedPrefix.gl_get(sTemp, "préfixe élidé inconnu")]
                    };
                    break;
                case 'FOLDER':
                case 'FOLDERLINUX':
                    return {
                        sType: oToken.sType,
                        sValue: oToken.sValue.slice(0, 40) + "…",
                        aLabel: ["dossier Linux/Unix"]
                    };
                    break;
                case 'FOLDERWIN':
                    return {
                        sType: oToken.sType,
                        sValue: oToken.sValue.slice(0, 40) + "…",
                        aLabel: ["dossier"]
                        aLabel: ["dossier Windows"]
                    };
                    break;
                case 'WORD':
                    if (oToken.sValue.gl_count("-") > 4) {
                        return {
                            sType: "COMPLEX",
                            sValue: oToken.sValue,