Index: gc_lang/fr/webext/content_scripts/panel_lxg.css ================================================================== --- gc_lang/fr/webext/content_scripts/panel_lxg.css +++ gc_lang/fr/webext/content_scripts/panel_lxg.css @@ -148,10 +148,14 @@ } div.grammalecte_lxg_token_PUNC { background-color: hsla(210, 50%, 50%, 1); } div.grammalecte_lxg_token_SIGN { + background-color: hsla(210, 50%, 50%, 1); +} +div.grammalecte_lxg_token_SYMBOL, +div.grammalecte_lxg_token_EMOJI { background-color: hsla(300, 50%, 50%, 1); } div.grammalecte_lxg_token_LINK { background-color: hsla(270, 50%, 50%, 1); } Index: graphspell-js/lexgraph_fr.js ================================================================== --- graphspell-js/lexgraph_fr.js +++ graphspell-js/lexgraph_fr.js @@ -449,10 +449,16 @@ switch (oToken.sType) { case 'PUNC': case 'SIGN': oToken["aLabels"] = [this.dValues.gl_get(oToken["sValue"], "signe de ponctuation divers")]; break; + case 'SYMB': + oToken["aLabels"] = ["symbole"]; + break; + case 'EMOJI': + oToken["aLabels"] = ["émoji"]; + break; case 'NUM': oToken["aLabels"] = ["nombre"]; break; case 'LINK': oToken["aLabels"] = ["hyperlien"]; Index: graphspell-js/tokenizer.js ================================================================== --- graphspell-js/tokenizer.js +++ graphspell-js/tokenizer.js @@ -46,11 +46,13 @@ [/^\d+(?:ers?\b|res?\b|è[rm]es?\b|i[èe][mr]es?\b|de?s?\b|nde?s?\b|ès?\b|es?\b|ᵉʳˢ?|ʳᵉˢ?|ᵈᵉ?ˢ?|ⁿᵈᵉ?ˢ?|ᵉˢ?)/, 'WORDORD'], [/^\d+(?:[.,]\d+|)/, 'NUM'], [/^[&%‰€$+±=*/<>⩾⩽#|×¥£§¢¬÷@-]/, 'SIGN'], [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-stᴀ-ᶿ\u0300-\u036fᵉʳˢⁿᵈ]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-stᴀ-ᶿ\u0300-\u036fᵉʳˢⁿᵈ]+)*/, 'WORD'], [/^_+/, 'UNDERSCORE'], - [/^\S/, 'OTHER'], + [/^[\u2600-\u26ff\u2700-\u27bf\u1f650-\u1f67f\u1f700-\u1f77f\u1f780-\u1f7ff\u1f800-\u1f8ff]/, 'SYMBOL'], + [/^[\u1f300-\u1f5ff\u1f600-\u1f64f\u1f680-\u1f6ff\u1f900-\u1f9ff]+/u, "EMOJI"], + [/^\S/u, 'OTHER'], ] }; class Tokenizer { Index: graphspell/lexgraph_fr.py ================================================================== --- graphspell/lexgraph_fr.py +++ graphspell/lexgraph_fr.py @@ -441,10 +441,14 @@ def setLabelsOnToken (dToken): # Token: .sType, .sValue, .nStart, .nEnd, .lMorph try: if dToken["sType"] == "PUNC" or dToken["sType"] == "SIGN": dToken["aLabels"] = [_dValues.get(dToken["sValue"], "signe de ponctuation divers")] + elif dToken["sType"] == 'SYMBOL': + dToken["aLabels"] = ["symbole"] + elif dToken["sType"] == 'EMOJI': + dToken["aLabels"] = ["émoji"] elif dToken["sType"] == 'NUM': dToken["aLabels"] = ["nombre"] elif dToken["sType"] == 'LINK': dToken["aLabels"] = ["hyperlien"] elif dToken["sType"] == 'TAG': Index: graphspell/tokenizer.py ================================================================== --- graphspell/tokenizer.py +++ graphspell/tokenizer.py @@ -40,10 +40,12 @@ r'(?P\d\d?[h:]\d\d(?:[m:]\d\ds?|)\b)', r'(?P\d+(?:[.,]\d+|))', r'(?P[&%‰€$+±=*/<>⩾⩽#|×¥£¢§¬÷@-])', r"(?P(?:(?!_)[\w\u0300-\u036f])+(?:[’'`-](?:(?!_)[\w\u0300-\u036f])+)*)", # with combining diacritics r"(?P_+)", + r"(?P[\u2600-\u26ff\u2700-\u27bf\U0001f650-\U0001f67f\U0001f700-\U0001f77f\U0001f780-\U0001f7ff\U0001f800-\U0001f8ff])", + r"(?P[\U0001f300-\U0001f5ff\U0001f600-\U0001f64f\U0001f680-\U0001f6ff\U0001f900-\U0001f9ff]+)", r"(?P\S)" ) }