mirror of
https://github.com/mattermost/mattermost.git
synced 2025-02-25 18:55:24 -06:00
MM-55267 Add ability for server-side Markdown code to understand emojis (#25332)
* MM-55267 Add ability for server-side Markdown code to understand emojis * Remove unused regex
This commit is contained in:
parent
448d442a0b
commit
9397970644
42
server/public/shared/markdown/emoji.go
Normal file
42
server/public/shared/markdown/emoji.go
Normal file
@ -0,0 +1,42 @@
|
||||
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
||||
// See LICENSE.txt for license information.
|
||||
|
||||
package markdown
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
)
|
||||
|
||||
// Based off the mobile app's emoji parsing from https://github.com/mattermost/commonmark.js
|
||||
|
||||
var (
|
||||
emojiRegex = regexp.MustCompile(`^:([a-z0-9_\-+]+):\B`)
|
||||
)
|
||||
|
||||
// parseEmoji attempts to parse a named emoji (eg. :taco:) starting at the current parser position. If an emoji is
|
||||
// found, it adds that to p.inlines and returns true. Otherwise, it returns false.
|
||||
func (p *inlineParser) parseEmoji() bool {
|
||||
// Only allow emojis after non-word characters
|
||||
if p.position > 1 {
|
||||
prevChar := p.raw[p.position-1]
|
||||
|
||||
if isWordByte(prevChar) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
remaining := p.raw[p.position:]
|
||||
|
||||
loc := emojiRegex.FindStringIndex(remaining)
|
||||
if loc == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// Note that there may not be a system or custom emoji that exists with this name
|
||||
p.inlines = append(p.inlines, &Emoji{
|
||||
Name: remaining[loc[0]+1 : loc[1]-1],
|
||||
})
|
||||
p.position += loc[1] - loc[0]
|
||||
|
||||
return true
|
||||
}
|
203
server/public/shared/markdown/emoji_test.go
Normal file
203
server/public/shared/markdown/emoji_test.go
Normal file
@ -0,0 +1,203 @@
|
||||
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
||||
// See LICENSE.txt for license information.
|
||||
|
||||
package markdown
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestParseEmoji(t *testing.T) {
|
||||
for name, tc := range map[string]struct {
|
||||
Input string
|
||||
Position int
|
||||
ExpectedOk bool
|
||||
ExpectedPosition int
|
||||
ExpectedEmojiName string
|
||||
}{
|
||||
"just a colon": {
|
||||
Input: ":",
|
||||
Position: 0,
|
||||
ExpectedOk: false,
|
||||
ExpectedPosition: 0,
|
||||
},
|
||||
"no closing colon": {
|
||||
Input: ":emoji",
|
||||
Position: 0,
|
||||
ExpectedOk: false,
|
||||
ExpectedPosition: 0,
|
||||
},
|
||||
"no closing colon before whitespace": {
|
||||
Input: ":emoji example",
|
||||
Position: 0,
|
||||
ExpectedOk: false,
|
||||
ExpectedPosition: 0,
|
||||
},
|
||||
"valid emoji": {
|
||||
Input: ":emoji:",
|
||||
Position: 0,
|
||||
ExpectedOk: true,
|
||||
ExpectedPosition: 7,
|
||||
ExpectedEmojiName: "emoji",
|
||||
},
|
||||
"valid emoji with punctuation": {
|
||||
Input: ":valid-emoji:",
|
||||
Position: 0,
|
||||
ExpectedOk: true,
|
||||
ExpectedPosition: 13,
|
||||
ExpectedEmojiName: "valid-emoji",
|
||||
},
|
||||
"valid emoji with text before": {
|
||||
Input: "this is an :emoji:",
|
||||
Position: 11,
|
||||
ExpectedOk: true,
|
||||
ExpectedPosition: 18,
|
||||
ExpectedEmojiName: "emoji",
|
||||
},
|
||||
"invalid emoji with text before": {
|
||||
Input: "this is not an :emoji",
|
||||
Position: 15,
|
||||
ExpectedOk: false,
|
||||
ExpectedPosition: 15,
|
||||
},
|
||||
"valid emoji with text after": {
|
||||
Input: ":emoji: before some text",
|
||||
Position: 0,
|
||||
ExpectedOk: true,
|
||||
ExpectedPosition: 7,
|
||||
ExpectedEmojiName: "emoji",
|
||||
},
|
||||
"valid emoji with text before and after": {
|
||||
Input: "this is an :emoji: in a sentence",
|
||||
Position: 11,
|
||||
ExpectedOk: true,
|
||||
ExpectedPosition: 18,
|
||||
ExpectedEmojiName: "emoji",
|
||||
},
|
||||
"multiple emojis 1": {
|
||||
Input: ":multiple: :emojis:",
|
||||
Position: 0,
|
||||
ExpectedOk: true,
|
||||
ExpectedPosition: 10,
|
||||
ExpectedEmojiName: "multiple",
|
||||
},
|
||||
"multiple emojis 2": {
|
||||
Input: ":multiple: :emojis:",
|
||||
Position: 11,
|
||||
ExpectedOk: true,
|
||||
ExpectedPosition: 19,
|
||||
ExpectedEmojiName: "emojis",
|
||||
},
|
||||
} {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
p := newInlineParser(tc.Input, []Range{}, []*ReferenceDefinition{})
|
||||
p.raw = tc.Input
|
||||
p.position = tc.Position
|
||||
|
||||
ok := p.parseEmoji()
|
||||
|
||||
assert.Equal(t, tc.ExpectedOk, ok)
|
||||
assert.Equal(t, tc.ExpectedPosition, p.position)
|
||||
if tc.ExpectedOk {
|
||||
require.True(t, len(p.inlines) > 0)
|
||||
require.IsType(t, &Emoji{}, p.inlines[len(p.inlines)-1])
|
||||
assert.Equal(t, tc.ExpectedEmojiName, p.inlines[len(p.inlines)-1].(*Emoji).Name)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEmojiFull(t *testing.T) {
|
||||
// These tests are based on https://github.com/mattermost/commonmark.js/blob/master/test/mattermost.txt
|
||||
|
||||
for name, tc := range map[string]struct {
|
||||
Markdown string
|
||||
ExpectedHTML string
|
||||
}{
|
||||
// Valid emojis
|
||||
|
||||
"emoji": {
|
||||
Markdown: "This is an :emoji:",
|
||||
ExpectedHTML: `<p>This is an <span data-emoji-name="emoji" data-literal=":emoji:" /></p>`,
|
||||
},
|
||||
"emoji with underscore": {
|
||||
Markdown: "This is an :emo_ji:",
|
||||
ExpectedHTML: `<p>This is an <span data-emoji-name="emo_ji" data-literal=":emo_ji:" /></p>`,
|
||||
},
|
||||
"emoji with hyphen": {
|
||||
Markdown: "This is an :emo-ji:",
|
||||
ExpectedHTML: `<p>This is an <span data-emoji-name="emo-ji" data-literal=":emo-ji:" /></p>`,
|
||||
},
|
||||
"emoji with numbers": {
|
||||
Markdown: "This is an :emoji123:",
|
||||
ExpectedHTML: `<p>This is an <span data-emoji-name="emoji123" data-literal=":emoji123:" /></p>`,
|
||||
},
|
||||
"emoji in brackets": {
|
||||
Markdown: "This is an (:emoji:)",
|
||||
ExpectedHTML: `<p>This is an (<span data-emoji-name="emoji" data-literal=":emoji:" />)</p>`,
|
||||
},
|
||||
"two emojis without space between": {
|
||||
Markdown: "These are some :emoji1::emoji2:",
|
||||
ExpectedHTML: `<p>These are some <span data-emoji-name="emoji1" data-literal=":emoji1:" /><span data-emoji-name="emoji2" data-literal=":emoji2:" /></p>`,
|
||||
},
|
||||
"two emojis separated by a slash": {
|
||||
Markdown: "These are some :emoji1:/:emoji2:",
|
||||
ExpectedHTML: `<p>These are some <span data-emoji-name="emoji1" data-literal=":emoji1:" />/<span data-emoji-name="emoji2" data-literal=":emoji2:" /></p>`,
|
||||
},
|
||||
"+1 emoji": {
|
||||
Markdown: "This is an :+1:",
|
||||
ExpectedHTML: `<p>This is an <span data-emoji-name="+1" data-literal=":+1:" /></p>`,
|
||||
},
|
||||
"-1 emoji": {
|
||||
Markdown: "This is an :-1:",
|
||||
ExpectedHTML: `<p>This is an <span data-emoji-name="-1" data-literal=":-1:" /></p>`,
|
||||
},
|
||||
"emoji with surrounding words": {
|
||||
Markdown: "This is an :emoji: in a sentence.",
|
||||
ExpectedHTML: `<p>This is an <span data-emoji-name="emoji" data-literal=":emoji:" /> in a sentence.</p>`,
|
||||
},
|
||||
|
||||
// Invalid emojis
|
||||
|
||||
"incomplete emoji 1": {
|
||||
Markdown: "This is not an :emoji",
|
||||
ExpectedHTML: `<p>This is not an :emoji</p>`,
|
||||
},
|
||||
"incomplete emoji 2": {
|
||||
Markdown: "This is not an emoji:",
|
||||
ExpectedHTML: `<p>This is not an emoji:</p>`,
|
||||
},
|
||||
"invalid emoji with whitespace": {
|
||||
Markdown: "This is not an :emo ji:",
|
||||
ExpectedHTML: `<p>This is not an :emo ji:</p>`,
|
||||
},
|
||||
"invalid emoji with other punctuation": {
|
||||
Markdown: "This is not an :emo'ji:",
|
||||
ExpectedHTML: `<p>This is not an :emo'ji:</p>`,
|
||||
},
|
||||
"invalid emoji due to adjacent text 1": {
|
||||
Markdown: "Thisisnotan:emoji:",
|
||||
// This differs slightly from our commonmark.js implementation because it doesn't require :// when autolinking
|
||||
ExpectedHTML: `<p>Thisisnotan:emoji:</p>`,
|
||||
},
|
||||
"invalid emoji due to adjacent text 2": {
|
||||
Markdown: "This is not an :emoji:isit",
|
||||
// This differs slightly from our commonmark.js implementation because it doesn't require :// when autolinking
|
||||
ExpectedHTML: `<p>This is not an :emoji:isit</p>`,
|
||||
},
|
||||
"invalid emoji due to adjacent text 3": {
|
||||
Markdown: "This is not an:emoji:isit",
|
||||
// This differs slightly from our commonmark.js implementation because it doesn't require :// when autolinking
|
||||
ExpectedHTML: `<p>This is not an:emoji:isit</p>`,
|
||||
},
|
||||
} {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
actual := RenderHTML(tc.Markdown)
|
||||
|
||||
assert.Equal(t, tc.ExpectedHTML, actual)
|
||||
})
|
||||
}
|
||||
}
|
@ -162,6 +162,10 @@ func RenderInlineHTML(inline Inline) (result string) {
|
||||
result += RenderInlineHTML(inline)
|
||||
}
|
||||
result += "</a>"
|
||||
case *Emoji:
|
||||
escapedName := htmlEscaper.Replace(v.Name)
|
||||
result += fmt.Sprintf(`<span data-emoji-name="%s" data-literal=":%s:" />`, escapedName, escapedName)
|
||||
|
||||
default:
|
||||
panic(fmt.Sprintf("missing case for type %T", v))
|
||||
}
|
||||
|
@ -101,6 +101,12 @@ func (i *Autolink) Destination() string {
|
||||
return destination
|
||||
}
|
||||
|
||||
type Emoji struct {
|
||||
inlineBase
|
||||
|
||||
Name string
|
||||
}
|
||||
|
||||
type delimiterType int
|
||||
|
||||
const (
|
||||
@ -575,12 +581,24 @@ func (p *inlineParser) Parse() []Inline {
|
||||
p.parseLinkOrImageDelimiter()
|
||||
case ']':
|
||||
p.lookForLinkOrImage()
|
||||
case 'w', 'W', ':':
|
||||
case 'w', 'W':
|
||||
matched := p.parseAutolink(c)
|
||||
|
||||
if !matched {
|
||||
p.parseText()
|
||||
}
|
||||
case ':':
|
||||
matched := p.parseAutolink(c)
|
||||
if matched {
|
||||
continue
|
||||
}
|
||||
|
||||
matched = p.parseEmoji()
|
||||
if matched {
|
||||
continue
|
||||
}
|
||||
|
||||
p.parseText()
|
||||
default:
|
||||
p.parseText()
|
||||
}
|
||||
|
@ -55,6 +55,15 @@ func isAlphanumericByte(c byte) bool {
|
||||
return isAlphanumeric(rune(c))
|
||||
}
|
||||
|
||||
// isWord returns true if c matches the \w regexp character class
|
||||
func isWord(c rune) bool {
|
||||
return isAlphanumeric(c) || c == '_'
|
||||
}
|
||||
|
||||
func isWordByte(c byte) bool {
|
||||
return isWord(rune(c))
|
||||
}
|
||||
|
||||
func nextNonWhitespace(markdown string, position int) int {
|
||||
for offset, c := range []byte(markdown[position:]) {
|
||||
if !isWhitespaceByte(c) {
|
||||
|
Loading…
Reference in New Issue
Block a user