mirror of
https://github.com/mattermost/mattermost.git
synced 2025-02-25 18:55:24 -06:00
MM-55267 Add ability for server-side Markdown code to understand emojis (#25332)
* MM-55267 Add ability for server-side Markdown code to understand emojis * Remove unused regex
This commit is contained in:
parent
448d442a0b
commit
9397970644
42
server/public/shared/markdown/emoji.go
Normal file
42
server/public/shared/markdown/emoji.go
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
||||||
|
// See LICENSE.txt for license information.
|
||||||
|
|
||||||
|
package markdown
|
||||||
|
|
||||||
|
import (
|
||||||
|
"regexp"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Based off the mobile app's emoji parsing from https://github.com/mattermost/commonmark.js
|
||||||
|
|
||||||
|
var (
|
||||||
|
emojiRegex = regexp.MustCompile(`^:([a-z0-9_\-+]+):\B`)
|
||||||
|
)
|
||||||
|
|
||||||
|
// parseEmoji attempts to parse a named emoji (eg. :taco:) starting at the current parser position. If an emoji is
|
||||||
|
// found, it adds that to p.inlines and returns true. Otherwise, it returns false.
|
||||||
|
func (p *inlineParser) parseEmoji() bool {
|
||||||
|
// Only allow emojis after non-word characters
|
||||||
|
if p.position > 1 {
|
||||||
|
prevChar := p.raw[p.position-1]
|
||||||
|
|
||||||
|
if isWordByte(prevChar) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
remaining := p.raw[p.position:]
|
||||||
|
|
||||||
|
loc := emojiRegex.FindStringIndex(remaining)
|
||||||
|
if loc == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note that there may not be a system or custom emoji that exists with this name
|
||||||
|
p.inlines = append(p.inlines, &Emoji{
|
||||||
|
Name: remaining[loc[0]+1 : loc[1]-1],
|
||||||
|
})
|
||||||
|
p.position += loc[1] - loc[0]
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
203
server/public/shared/markdown/emoji_test.go
Normal file
203
server/public/shared/markdown/emoji_test.go
Normal file
@ -0,0 +1,203 @@
|
|||||||
|
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
||||||
|
// See LICENSE.txt for license information.
|
||||||
|
|
||||||
|
package markdown
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestParseEmoji(t *testing.T) {
|
||||||
|
for name, tc := range map[string]struct {
|
||||||
|
Input string
|
||||||
|
Position int
|
||||||
|
ExpectedOk bool
|
||||||
|
ExpectedPosition int
|
||||||
|
ExpectedEmojiName string
|
||||||
|
}{
|
||||||
|
"just a colon": {
|
||||||
|
Input: ":",
|
||||||
|
Position: 0,
|
||||||
|
ExpectedOk: false,
|
||||||
|
ExpectedPosition: 0,
|
||||||
|
},
|
||||||
|
"no closing colon": {
|
||||||
|
Input: ":emoji",
|
||||||
|
Position: 0,
|
||||||
|
ExpectedOk: false,
|
||||||
|
ExpectedPosition: 0,
|
||||||
|
},
|
||||||
|
"no closing colon before whitespace": {
|
||||||
|
Input: ":emoji example",
|
||||||
|
Position: 0,
|
||||||
|
ExpectedOk: false,
|
||||||
|
ExpectedPosition: 0,
|
||||||
|
},
|
||||||
|
"valid emoji": {
|
||||||
|
Input: ":emoji:",
|
||||||
|
Position: 0,
|
||||||
|
ExpectedOk: true,
|
||||||
|
ExpectedPosition: 7,
|
||||||
|
ExpectedEmojiName: "emoji",
|
||||||
|
},
|
||||||
|
"valid emoji with punctuation": {
|
||||||
|
Input: ":valid-emoji:",
|
||||||
|
Position: 0,
|
||||||
|
ExpectedOk: true,
|
||||||
|
ExpectedPosition: 13,
|
||||||
|
ExpectedEmojiName: "valid-emoji",
|
||||||
|
},
|
||||||
|
"valid emoji with text before": {
|
||||||
|
Input: "this is an :emoji:",
|
||||||
|
Position: 11,
|
||||||
|
ExpectedOk: true,
|
||||||
|
ExpectedPosition: 18,
|
||||||
|
ExpectedEmojiName: "emoji",
|
||||||
|
},
|
||||||
|
"invalid emoji with text before": {
|
||||||
|
Input: "this is not an :emoji",
|
||||||
|
Position: 15,
|
||||||
|
ExpectedOk: false,
|
||||||
|
ExpectedPosition: 15,
|
||||||
|
},
|
||||||
|
"valid emoji with text after": {
|
||||||
|
Input: ":emoji: before some text",
|
||||||
|
Position: 0,
|
||||||
|
ExpectedOk: true,
|
||||||
|
ExpectedPosition: 7,
|
||||||
|
ExpectedEmojiName: "emoji",
|
||||||
|
},
|
||||||
|
"valid emoji with text before and after": {
|
||||||
|
Input: "this is an :emoji: in a sentence",
|
||||||
|
Position: 11,
|
||||||
|
ExpectedOk: true,
|
||||||
|
ExpectedPosition: 18,
|
||||||
|
ExpectedEmojiName: "emoji",
|
||||||
|
},
|
||||||
|
"multiple emojis 1": {
|
||||||
|
Input: ":multiple: :emojis:",
|
||||||
|
Position: 0,
|
||||||
|
ExpectedOk: true,
|
||||||
|
ExpectedPosition: 10,
|
||||||
|
ExpectedEmojiName: "multiple",
|
||||||
|
},
|
||||||
|
"multiple emojis 2": {
|
||||||
|
Input: ":multiple: :emojis:",
|
||||||
|
Position: 11,
|
||||||
|
ExpectedOk: true,
|
||||||
|
ExpectedPosition: 19,
|
||||||
|
ExpectedEmojiName: "emojis",
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
t.Run(name, func(t *testing.T) {
|
||||||
|
p := newInlineParser(tc.Input, []Range{}, []*ReferenceDefinition{})
|
||||||
|
p.raw = tc.Input
|
||||||
|
p.position = tc.Position
|
||||||
|
|
||||||
|
ok := p.parseEmoji()
|
||||||
|
|
||||||
|
assert.Equal(t, tc.ExpectedOk, ok)
|
||||||
|
assert.Equal(t, tc.ExpectedPosition, p.position)
|
||||||
|
if tc.ExpectedOk {
|
||||||
|
require.True(t, len(p.inlines) > 0)
|
||||||
|
require.IsType(t, &Emoji{}, p.inlines[len(p.inlines)-1])
|
||||||
|
assert.Equal(t, tc.ExpectedEmojiName, p.inlines[len(p.inlines)-1].(*Emoji).Name)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseEmojiFull(t *testing.T) {
|
||||||
|
// These tests are based on https://github.com/mattermost/commonmark.js/blob/master/test/mattermost.txt
|
||||||
|
|
||||||
|
for name, tc := range map[string]struct {
|
||||||
|
Markdown string
|
||||||
|
ExpectedHTML string
|
||||||
|
}{
|
||||||
|
// Valid emojis
|
||||||
|
|
||||||
|
"emoji": {
|
||||||
|
Markdown: "This is an :emoji:",
|
||||||
|
ExpectedHTML: `<p>This is an <span data-emoji-name="emoji" data-literal=":emoji:" /></p>`,
|
||||||
|
},
|
||||||
|
"emoji with underscore": {
|
||||||
|
Markdown: "This is an :emo_ji:",
|
||||||
|
ExpectedHTML: `<p>This is an <span data-emoji-name="emo_ji" data-literal=":emo_ji:" /></p>`,
|
||||||
|
},
|
||||||
|
"emoji with hyphen": {
|
||||||
|
Markdown: "This is an :emo-ji:",
|
||||||
|
ExpectedHTML: `<p>This is an <span data-emoji-name="emo-ji" data-literal=":emo-ji:" /></p>`,
|
||||||
|
},
|
||||||
|
"emoji with numbers": {
|
||||||
|
Markdown: "This is an :emoji123:",
|
||||||
|
ExpectedHTML: `<p>This is an <span data-emoji-name="emoji123" data-literal=":emoji123:" /></p>`,
|
||||||
|
},
|
||||||
|
"emoji in brackets": {
|
||||||
|
Markdown: "This is an (:emoji:)",
|
||||||
|
ExpectedHTML: `<p>This is an (<span data-emoji-name="emoji" data-literal=":emoji:" />)</p>`,
|
||||||
|
},
|
||||||
|
"two emojis without space between": {
|
||||||
|
Markdown: "These are some :emoji1::emoji2:",
|
||||||
|
ExpectedHTML: `<p>These are some <span data-emoji-name="emoji1" data-literal=":emoji1:" /><span data-emoji-name="emoji2" data-literal=":emoji2:" /></p>`,
|
||||||
|
},
|
||||||
|
"two emojis separated by a slash": {
|
||||||
|
Markdown: "These are some :emoji1:/:emoji2:",
|
||||||
|
ExpectedHTML: `<p>These are some <span data-emoji-name="emoji1" data-literal=":emoji1:" />/<span data-emoji-name="emoji2" data-literal=":emoji2:" /></p>`,
|
||||||
|
},
|
||||||
|
"+1 emoji": {
|
||||||
|
Markdown: "This is an :+1:",
|
||||||
|
ExpectedHTML: `<p>This is an <span data-emoji-name="+1" data-literal=":+1:" /></p>`,
|
||||||
|
},
|
||||||
|
"-1 emoji": {
|
||||||
|
Markdown: "This is an :-1:",
|
||||||
|
ExpectedHTML: `<p>This is an <span data-emoji-name="-1" data-literal=":-1:" /></p>`,
|
||||||
|
},
|
||||||
|
"emoji with surrounding words": {
|
||||||
|
Markdown: "This is an :emoji: in a sentence.",
|
||||||
|
ExpectedHTML: `<p>This is an <span data-emoji-name="emoji" data-literal=":emoji:" /> in a sentence.</p>`,
|
||||||
|
},
|
||||||
|
|
||||||
|
// Invalid emojis
|
||||||
|
|
||||||
|
"incomplete emoji 1": {
|
||||||
|
Markdown: "This is not an :emoji",
|
||||||
|
ExpectedHTML: `<p>This is not an :emoji</p>`,
|
||||||
|
},
|
||||||
|
"incomplete emoji 2": {
|
||||||
|
Markdown: "This is not an emoji:",
|
||||||
|
ExpectedHTML: `<p>This is not an emoji:</p>`,
|
||||||
|
},
|
||||||
|
"invalid emoji with whitespace": {
|
||||||
|
Markdown: "This is not an :emo ji:",
|
||||||
|
ExpectedHTML: `<p>This is not an :emo ji:</p>`,
|
||||||
|
},
|
||||||
|
"invalid emoji with other punctuation": {
|
||||||
|
Markdown: "This is not an :emo'ji:",
|
||||||
|
ExpectedHTML: `<p>This is not an :emo'ji:</p>`,
|
||||||
|
},
|
||||||
|
"invalid emoji due to adjacent text 1": {
|
||||||
|
Markdown: "Thisisnotan:emoji:",
|
||||||
|
// This differs slightly from our commonmark.js implementation because it doesn't require :// when autolinking
|
||||||
|
ExpectedHTML: `<p>Thisisnotan:emoji:</p>`,
|
||||||
|
},
|
||||||
|
"invalid emoji due to adjacent text 2": {
|
||||||
|
Markdown: "This is not an :emoji:isit",
|
||||||
|
// This differs slightly from our commonmark.js implementation because it doesn't require :// when autolinking
|
||||||
|
ExpectedHTML: `<p>This is not an :emoji:isit</p>`,
|
||||||
|
},
|
||||||
|
"invalid emoji due to adjacent text 3": {
|
||||||
|
Markdown: "This is not an:emoji:isit",
|
||||||
|
// This differs slightly from our commonmark.js implementation because it doesn't require :// when autolinking
|
||||||
|
ExpectedHTML: `<p>This is not an:emoji:isit</p>`,
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
t.Run(name, func(t *testing.T) {
|
||||||
|
actual := RenderHTML(tc.Markdown)
|
||||||
|
|
||||||
|
assert.Equal(t, tc.ExpectedHTML, actual)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
@ -162,6 +162,10 @@ func RenderInlineHTML(inline Inline) (result string) {
|
|||||||
result += RenderInlineHTML(inline)
|
result += RenderInlineHTML(inline)
|
||||||
}
|
}
|
||||||
result += "</a>"
|
result += "</a>"
|
||||||
|
case *Emoji:
|
||||||
|
escapedName := htmlEscaper.Replace(v.Name)
|
||||||
|
result += fmt.Sprintf(`<span data-emoji-name="%s" data-literal=":%s:" />`, escapedName, escapedName)
|
||||||
|
|
||||||
default:
|
default:
|
||||||
panic(fmt.Sprintf("missing case for type %T", v))
|
panic(fmt.Sprintf("missing case for type %T", v))
|
||||||
}
|
}
|
||||||
|
@ -101,6 +101,12 @@ func (i *Autolink) Destination() string {
|
|||||||
return destination
|
return destination
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Emoji struct {
|
||||||
|
inlineBase
|
||||||
|
|
||||||
|
Name string
|
||||||
|
}
|
||||||
|
|
||||||
type delimiterType int
|
type delimiterType int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -575,12 +581,24 @@ func (p *inlineParser) Parse() []Inline {
|
|||||||
p.parseLinkOrImageDelimiter()
|
p.parseLinkOrImageDelimiter()
|
||||||
case ']':
|
case ']':
|
||||||
p.lookForLinkOrImage()
|
p.lookForLinkOrImage()
|
||||||
case 'w', 'W', ':':
|
case 'w', 'W':
|
||||||
matched := p.parseAutolink(c)
|
matched := p.parseAutolink(c)
|
||||||
|
|
||||||
if !matched {
|
if !matched {
|
||||||
p.parseText()
|
p.parseText()
|
||||||
}
|
}
|
||||||
|
case ':':
|
||||||
|
matched := p.parseAutolink(c)
|
||||||
|
if matched {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
matched = p.parseEmoji()
|
||||||
|
if matched {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
p.parseText()
|
||||||
default:
|
default:
|
||||||
p.parseText()
|
p.parseText()
|
||||||
}
|
}
|
||||||
|
@ -55,6 +55,15 @@ func isAlphanumericByte(c byte) bool {
|
|||||||
return isAlphanumeric(rune(c))
|
return isAlphanumeric(rune(c))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// isWord returns true if c matches the \w regexp character class
|
||||||
|
func isWord(c rune) bool {
|
||||||
|
return isAlphanumeric(c) || c == '_'
|
||||||
|
}
|
||||||
|
|
||||||
|
func isWordByte(c byte) bool {
|
||||||
|
return isWord(rune(c))
|
||||||
|
}
|
||||||
|
|
||||||
func nextNonWhitespace(markdown string, position int) int {
|
func nextNonWhitespace(markdown string, position int) int {
|
||||||
for offset, c := range []byte(markdown[position:]) {
|
for offset, c := range []byte(markdown[position:]) {
|
||||||
if !isWhitespaceByte(c) {
|
if !isWhitespaceByte(c) {
|
||||||
|
Loading…
Reference in New Issue
Block a user