MM-55267 Add ability for server-side Markdown code to understand emojis (#25332)

* MM-55267 Add ability for server-side Markdown code to understand emojis * Remove unused regex
2025-02-25 18:55:24 -06:00 · 2023-11-13 14:38:05 -05:00 · 2023-11-13 14:38:05 -05:00 · 9397970644
commit 9397970644
parent 448d442a0b
5 changed files with 277 additions and 1 deletions
--- a/server/public/shared/markdown/emoji.go
+++ b/server/public/shared/markdown/emoji.go
@ -0,0 +1,42 @@
 // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
 // See LICENSE.txt for license information.
 package markdown
 import (
 	"regexp"
 )
 // Based off the mobile app's emoji parsing from https://github.com/mattermost/commonmark.js
 var (
 	emojiRegex = regexp.MustCompile(`^:([a-z0-9_\-+]+):\B`)
 )
 // parseEmoji attempts to parse a named emoji (eg. :taco:) starting at the current parser position. If an emoji is
 // found, it adds that to p.inlines and returns true. Otherwise, it returns false.
 func (p *inlineParser) parseEmoji() bool {
 	// Only allow emojis after non-word characters
 	if p.position > 1 {
 		prevChar := p.raw[p.position-1]
 		if isWordByte(prevChar) {
 			return false
 		}
 	}
 	remaining := p.raw[p.position:]
 	loc := emojiRegex.FindStringIndex(remaining)
 	if loc == nil {
 		return false
 	}
 	// Note that there may not be a system or custom emoji that exists with this name
 	p.inlines = append(p.inlines, &Emoji{
 		Name: remaining[loc[0]+1 : loc[1]-1],
 	})
 	p.position += loc[1] - loc[0]
 	return true
 }
--- a/server/public/shared/markdown/emoji_test.go
+++ b/server/public/shared/markdown/emoji_test.go
@ -0,0 +1,203 @@
 // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
 // See LICENSE.txt for license information.
 package markdown
 import (
 	"testing"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
 func TestParseEmoji(t *testing.T) {
 	for name, tc := range map[string]struct {
 		Input             string
 		Position          int
 		ExpectedOk        bool
 		ExpectedPosition  int
 		ExpectedEmojiName string
 	}{
 		"just a colon": {
 			Input:            ":",
 			Position:         0,
 			ExpectedOk:       false,
 			ExpectedPosition: 0,
 		},
 		"no closing colon": {
 			Input:            ":emoji",
 			Position:         0,
 			ExpectedOk:       false,
 			ExpectedPosition: 0,
 		},
 		"no closing colon before whitespace": {
 			Input:            ":emoji example",
 			Position:         0,
 			ExpectedOk:       false,
 			ExpectedPosition: 0,
 		},
 		"valid emoji": {
 			Input:             ":emoji:",
 			Position:          0,
 			ExpectedOk:        true,
 			ExpectedPosition:  7,
 			ExpectedEmojiName: "emoji",
 		},
 		"valid emoji with punctuation": {
 			Input:             ":valid-emoji:",
 			Position:          0,
 			ExpectedOk:        true,
 			ExpectedPosition:  13,
 			ExpectedEmojiName: "valid-emoji",
 		},
 		"valid emoji with text before": {
 			Input:             "this is an :emoji:",
 			Position:          11,
 			ExpectedOk:        true,
 			ExpectedPosition:  18,
 			ExpectedEmojiName: "emoji",
 		},
 		"invalid emoji with text before": {
 			Input:            "this is not an :emoji",
 			Position:         15,
 			ExpectedOk:       false,
 			ExpectedPosition: 15,
 		},
 		"valid emoji with text after": {
 			Input:             ":emoji: before some text",
 			Position:          0,
 			ExpectedOk:        true,
 			ExpectedPosition:  7,
 			ExpectedEmojiName: "emoji",
 		},
 		"valid emoji with text before and after": {
 			Input:             "this is an :emoji: in a sentence",
 			Position:          11,
 			ExpectedOk:        true,
 			ExpectedPosition:  18,
 			ExpectedEmojiName: "emoji",
 		},
 		"multiple emojis 1": {
 			Input:             ":multiple: :emojis:",
 			Position:          0,
 			ExpectedOk:        true,
 			ExpectedPosition:  10,
 			ExpectedEmojiName: "multiple",
 		},
 		"multiple emojis 2": {
 			Input:             ":multiple: :emojis:",
 			Position:          11,
 			ExpectedOk:        true,
 			ExpectedPosition:  19,
 			ExpectedEmojiName: "emojis",
 		},
 	} {
 		t.Run(name, func(t *testing.T) {
 			p := newInlineParser(tc.Input, []Range{}, []*ReferenceDefinition{})
 			p.raw = tc.Input
 			p.position = tc.Position
 			ok := p.parseEmoji()
 			assert.Equal(t, tc.ExpectedOk, ok)
 			assert.Equal(t, tc.ExpectedPosition, p.position)
 			if tc.ExpectedOk {
 				require.True(t, len(p.inlines) > 0)
 				require.IsType(t, &Emoji{}, p.inlines[len(p.inlines)-1])
 				assert.Equal(t, tc.ExpectedEmojiName, p.inlines[len(p.inlines)-1].(*Emoji).Name)
 			}
 		})
 	}
 }
 func TestParseEmojiFull(t *testing.T) {
 	// These tests are based on https://github.com/mattermost/commonmark.js/blob/master/test/mattermost.txt
 	for name, tc := range map[string]struct {
 		Markdown     string
 		ExpectedHTML string
 	}{
 		// Valid emojis
 		"emoji": {
 			Markdown:     "This is an :emoji:",
 			ExpectedHTML: `<p>This is an <span data-emoji-name="emoji" data-literal=":emoji:" /></p>`,
 		},
 		"emoji with underscore": {
 			Markdown:     "This is an :emo_ji:",
 			ExpectedHTML: `<p>This is an <span data-emoji-name="emo_ji" data-literal=":emo_ji:" /></p>`,
 		},
 		"emoji with hyphen": {
 			Markdown:     "This is an :emo-ji:",
 			ExpectedHTML: `<p>This is an <span data-emoji-name="emo-ji" data-literal=":emo-ji:" /></p>`,
 		},
 		"emoji with numbers": {
 			Markdown:     "This is an :emoji123:",
 			ExpectedHTML: `<p>This is an <span data-emoji-name="emoji123" data-literal=":emoji123:" /></p>`,
 		},
 		"emoji in brackets": {
 			Markdown:     "This is an (:emoji:)",
 			ExpectedHTML: `<p>This is an (<span data-emoji-name="emoji" data-literal=":emoji:" />)</p>`,
 		},
 		"two emojis without space between": {
 			Markdown:     "These are some :emoji1::emoji2:",
 			ExpectedHTML: `<p>These are some <span data-emoji-name="emoji1" data-literal=":emoji1:" /><span data-emoji-name="emoji2" data-literal=":emoji2:" /></p>`,
 		},
 		"two emojis separated by a slash": {
 			Markdown:     "These are some :emoji1:/:emoji2:",
 			ExpectedHTML: `<p>These are some <span data-emoji-name="emoji1" data-literal=":emoji1:" />/<span data-emoji-name="emoji2" data-literal=":emoji2:" /></p>`,
 		},
 		"+1 emoji": {
 			Markdown:     "This is an :+1:",
 			ExpectedHTML: `<p>This is an <span data-emoji-name="+1" data-literal=":+1:" /></p>`,
 		},
 		"-1 emoji": {
 			Markdown:     "This is an :-1:",
 			ExpectedHTML: `<p>This is an <span data-emoji-name="-1" data-literal=":-1:" /></p>`,
 		},
 		"emoji with surrounding words": {
 			Markdown:     "This is an :emoji: in a sentence.",
 			ExpectedHTML: `<p>This is an <span data-emoji-name="emoji" data-literal=":emoji:" /> in a sentence.</p>`,
 		},
 		// Invalid emojis
 		"incomplete emoji 1": {
 			Markdown:     "This is not an :emoji",
 			ExpectedHTML: `<p>This is not an :emoji</p>`,
 		},
 		"incomplete emoji 2": {
 			Markdown:     "This is not an emoji:",
 			ExpectedHTML: `<p>This is not an emoji:</p>`,
 		},
 		"invalid emoji with whitespace": {
 			Markdown:     "This is not an :emo ji:",
 			ExpectedHTML: `<p>This is not an :emo ji:</p>`,
 		},
 		"invalid emoji with other punctuation": {
 			Markdown:     "This is not an :emo'ji:",
 			ExpectedHTML: `<p>This is not an :emo'ji:</p>`,
 		},
 		"invalid emoji due to adjacent text 1": {
 			Markdown: "Thisisnotan:emoji:",
 			// This differs slightly from our commonmark.js implementation because it doesn't require :// when autolinking
 			ExpectedHTML: `<p>Thisisnotan:emoji:</p>`,
 		},
 		"invalid emoji due to adjacent text 2": {
 			Markdown: "This is not an :emoji:isit",
 			// This differs slightly from our commonmark.js implementation because it doesn't require :// when autolinking
 			ExpectedHTML: `<p>This is not an :emoji:isit</p>`,
 		},
 		"invalid emoji due to adjacent text 3": {
 			Markdown: "This is not an:emoji:isit",
 			// This differs slightly from our commonmark.js implementation because it doesn't require :// when autolinking
 			ExpectedHTML: `<p>This is not an:emoji:isit</p>`,
 		},
 	} {
 		t.Run(name, func(t *testing.T) {
 			actual := RenderHTML(tc.Markdown)
 			assert.Equal(t, tc.ExpectedHTML, actual)
 		})
 	}
 }
--- a/server/public/shared/markdown/html.go
+++ b/server/public/shared/markdown/html.go
@ -162,6 +162,10 @@ func RenderInlineHTML(inline Inline) (result string) {
 			result += RenderInlineHTML(inline)
 		}
 		result += "</a>"
 	case *Emoji:
 		escapedName := htmlEscaper.Replace(v.Name)
 		result += fmt.Sprintf(`<span data-emoji-name="%s" data-literal=":%s:" />`, escapedName, escapedName)
 	default:
 		panic(fmt.Sprintf("missing case for type %T", v))
 	}
--- a/server/public/shared/markdown/inlines.go
+++ b/server/public/shared/markdown/inlines.go
@ -101,6 +101,12 @@ func (i *Autolink) Destination() string {
 	return destination
 }
 type Emoji struct {
 	inlineBase
 	Name string
 }
 type delimiterType int
 const (
@ -575,12 +581,24 @@ func (p *inlineParser) Parse() []Inline {
 			p.parseLinkOrImageDelimiter()
 		case ']':
 			p.lookForLinkOrImage()
-		case 'w', 'W', ':':
+		case 'w', 'W':
 			matched := p.parseAutolink(c)
 			if !matched {
 				p.parseText()
 			}
 		case ':':
 			matched := p.parseAutolink(c)
 			if matched {
 				continue
 			}
 			matched = p.parseEmoji()
 			if matched {
 				continue
 			}
 			p.parseText()
 		default:
 			p.parseText()
 		}
--- a/server/public/shared/markdown/markdown.go
+++ b/server/public/shared/markdown/markdown.go
@ -55,6 +55,15 @@ func isAlphanumericByte(c byte) bool {
 	return isAlphanumeric(rune(c))
 }
 // isWord returns true if c matches the \w regexp character class
 func isWord(c rune) bool {
 	return isAlphanumeric(c) || c == '_'
 }
 func isWordByte(c byte) bool {
 	return isWord(rune(c))
 }
 func nextNonWhitespace(markdown string, position int) int {
 	for offset, c := range []byte(markdown[position:]) {
 		if !isWhitespaceByte(c) {