Index: graphspell-js/tokenizer.js
==================================================================
--- graphspell-js/tokenizer.js
+++ graphspell-js/tokenizer.js
@@ -39,11 +39,11 @@
[/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st]+ *>/, 'HTML'],
[/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st]+\]/, 'PSEUDOHTML'],
[/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
[/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`]/i, 'WORD_ELIDED'],
[/^\d\d?[h:]\d\d(?:[m:]\d\ds?|)\b/, 'HOUR'],
- [/^\d+(?:ers?\b|nds?\b|es?\b|des?\b|ièmes?\b|èmes?\b|emes?\b|ᵉʳˢ?|ⁿᵈˢ?|ᵉˢ?|ᵈᵉˢ?)/, 'WORD_ORDINAL'],
+ [/^\d+(?:ers?\b|res?\b|è[rm]es?\b|i[èe][mr]es?\b|de?s?\b|nde?s?\b|ès?\b|es?\b|ᵉʳˢ?|ʳᵉˢ?|ᵈᵉ?ˢ?|ⁿᵈᵉ?ˢ?|ᵉˢ?)/, 'WORD_ORDINAL'],
[/^\d+(?:[.,]\d+|)/, 'NUM'],
[/^[&%‰€$+±=*/<>⩾⩽#|×¥£§¢¬÷@-]/, 'SIGN'],
[/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-stᴀ-ᶿᵉʳˢⁿᵈ_]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-stᴀ-ᶿᵉʳˢⁿᵈ_]+)*/, 'WORD']
]
};
Index: graphspell/tokenizer.py
==================================================================
--- graphspell/tokenizer.py
+++ graphspell/tokenizer.py
@@ -30,11 +30,11 @@
r'(?P(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
r'(?P[#@][\w-]+)',
r'(?P<\w+.*?>|\w+ *>)',
r'(?P\[/?\w+\])',
r"(?P(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`])",
- r'(?P\d+(?:ers?|nds?|es?|des?|ièmes?|èmes?|emes?|ᵉʳˢ?|ⁿᵈˢ?|ᵉˢ?|ᵈᵉˢ?)\b)',
+ r'(?P\d+(?:ers?|res?|è[rm]es?|i[èe][mr]es?|de?s?|nde?s?|ès?|es?|ᵉʳˢ?|ʳᵉˢ?|ᵈᵉ?ˢ?|ⁿᵈᵉ?ˢ?|ᵉˢ?)\b)',
r'(?P\d\d?[h:]\d\d(?:[m:]\d\ds?|)\b)',
r'(?P\d+(?:[.,]\d+|))',
r'(?P[&%‰€$+±=*/<>⩾⩽#|×¥£¢§¬÷@-])',
r"(?P\w+(?:[’'`-]\w+)*)"
)