Grammalecte  Diff

Differences From Artifact [a6594366c3]:

To Artifact [02266194ef]:


1
2
3
4
5


6
7


8
9
10
11
12
13
14
15
// JavaScript
// Very simple tokenizer

"use strict";



const helpers = require("resource://grammalecte/helpers.js");



const aPatterns = {
    // All regexps must start with ^.
    "default":
        [
            [/^[   \t]+/, 'SPACE'],
            [/^[,.;:!?…«»“”‘’"(){}\[\]/·–—]+/, 'SEPARATOR'],
            [/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_]+[@.])[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
            [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'],





>
>
|
|
>
>
|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
// JavaScript
// Very simple tokenizer

"use strict";


if (typeof(exports) !== 'undefined') {
    var helpers = require("resource://grammalecte/helpers.js");
}


const aTkzPatterns = {
    // All regexps must start with ^.
    "default":
        [
            [/^[   \t]+/, 'SPACE'],
            [/^[,.;:!?…«»“”‘’"(){}\[\]/·–—]+/, 'SEPARATOR'],
            [/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_]+[@.])[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
            [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'],
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
}


class Tokenizer {

    constructor (sLang) {
        this.sLang = sLang;
        if (!aPatterns.hasOwnProperty(sLang)) {
            this.sLang = "default";
        }
        this.aRules = aPatterns[this.sLang];
    };

    * genTokens (sText) {
        let m;
        let i = 0;
        while (sText) {
            let nCut = 1;







|


|







42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
}


class Tokenizer {

    constructor (sLang) {
        this.sLang = sLang;
        if (!aTkzPatterns.hasOwnProperty(sLang)) {
            this.sLang = "default";
        }
        this.aRules = aTkzPatterns[this.sLang];
    };

    * genTokens (sText) {
        let m;
        let i = 0;
        while (sText) {
            let nCut = 1;