Index: graphspell-js/tokenizer.js ================================================================== --- graphspell-js/tokenizer.js +++ graphspell-js/tokenizer.js @@ -21,11 +21,11 @@ [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'], [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'], [/^\d\d?h\d\d\b/, 'HOUR'], [/^\d+(?:[.,]\d+|)/, 'NUM'], [/^[%‰+=*/<>⩾⩽-]/, 'SIGN'], - [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+)*/, 'WORD'] + [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿ]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿ]+)*/, 'WORD'] ], "fr": [ [/^[   \t]+/, 'SPACE'], [/^\/(?:~|bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERUNIX'], @@ -40,11 +40,11 @@ [/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`]/i, 'WORD_ELIDED'], [/^\d\d?[hm]\d\d\b/, 'HOUR'], [/^\d+(?:ers?|nds?|es?|des?|ièmes?|èmes?|emes?|ᵉʳˢ?|ⁿᵈˢ?|ᵉˢ?|ᵈᵉˢ?)\b/, 'WORD_ORDINAL'], [/^\d+(?:[.,]\d+|)/, 'NUM'], [/^[%‰+=*/<>⩾⩽-]/, 'SIGN'], - [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+)*/, 'WORD'] + [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿ]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿ]+)*/, 'WORD'] ] }; class Tokenizer {