Index: graphspell-js/tokenizer.js ================================================================== --- graphspell-js/tokenizer.js +++ graphspell-js/tokenizer.js @@ -24,11 +24,11 @@ [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'], [/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+ *>/, 'HTML'], [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'], [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'], [/^\d\d?h\d\d\b/, 'HOUR'], - [/^-?\d+(?:[.,]\d+|)/, 'NUM'], + [/^\d+(?:[.,]\d+|)/, 'NUM'], [/^[%‰+=*/<>⩾⩽-]/, 'SIGN'], [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+)*/, 'WORD'] ], "fr": [ @@ -43,11 +43,11 @@ [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'], [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'], [/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`]/i, 'WORD_ELIDED'], [/^\d\d?[hm]\d\d\b/, 'HOUR'], [/^\d+(?:ers?|nds?|es?|des?|ièmes?|èmes?|emes?|ᵉʳˢ?|ⁿᵈˢ?|ᵉˢ?|ᵈᵉˢ?)\b/, 'WORD_ORDINAL'], - [/^-?\d+(?:[.,]\d+|)/, 'NUM'], + [/^\d+(?:[.,]\d+|)/, 'NUM'], [/^[%‰+=*/<>⩾⩽-]/, 'SIGN'], [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+)*/, 'WORD'] ] }; Index: graphspell/tokenizer.py ================================================================== --- graphspell/tokenizer.py +++ graphspell/tokenizer.py @@ -15,11 +15,11 @@ r'(?P(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)', r'(?P[#@][\w-]+)', r'(?P<\w+.*?>|)', r'(?P\[/?\w+\])', r'(?P\d\d?h\d\d\b)', - r'(?P-?\d+(?:[.,]\d+))', + r'(?P\d+(?:[.,]\d+))', r'(?P[%‰+=*/<>⩾⩽-])', r"(?P\w+(?:[’'`-]\w+)*)" ), "fr": ( @@ -32,11 +32,11 @@ r'(?P<\w+.*?>|)', r'(?P\[/?\w+\])', r"(?P(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`])", r'(?P\d+(?:ers?|nds?|es?|des?|ièmes?|èmes?|emes?|ᵉʳˢ?|ⁿᵈˢ?|ᵉˢ?|ᵈᵉˢ?)\b)', r'(?P\d\d?h\d\d\b)', - r'(?P-?\d+(?:[.,]\d+|))', + r'(?P\d+(?:[.,]\d+|))', r'(?P[%‰+=*/<>⩾⩽-])', r"(?P\w+(?:[’'`-]\w+)*)" ) }