PLT-3915/PLT-5550 Improve handling of Markdown while parsing mentions (#6091)

* PLT-3915 Removed ability to mention users in code blocks

* PLT-3915 Added simple check for potential code blocks before using regexes

* PLT-5550 Improve splitting when parsing mentions to ignore markdown characters
This commit is contained in:
Harrison Healey
2017-04-26 11:00:38 -04:00
committed by Corey Hulen
parent 3e73adceb5
commit 8e6141152b
2 changed files with 209 additions and 1 deletions

View File

@@ -12,9 +12,11 @@ import (
"net/http"
"net/url"
"path/filepath"
"regexp"
"sort"
"strings"
"time"
"unicode"
l4g "github.com/alecthomas/log4go"
"github.com/mattermost/platform/einterfaces"
@@ -660,7 +662,12 @@ func GetExplicitMentions(message string, keywords map[string][]string) (map[stri
}
}
for _, word := range strings.Fields(message) {
message = removeCodeFromMessage(message)
for _, word := range strings.FieldsFunc(message, func(c rune) bool {
// Split on whitespace (as strings.Fields normally does) or on Markdown characters
return unicode.IsSpace(c) || c == '*' || c == '~'
}) {
isMention := false
if word == "@here" {
@@ -726,6 +733,27 @@ func GetExplicitMentions(message string, keywords map[string][]string) (map[stri
return mentioned, potentialOthersMentioned, hereMentioned, channelMentioned, allMentioned
}
// Matches a line containing only ``` and a potential language definition, any number of lines not containing ```,
// and then either a line containing only ``` or the end of the text
var codeBlockPattern = regexp.MustCompile("(?m)^[^\\S\n]*\\`\\`\\`.*$[\\s\\S]+?(^[^\\S\n]*\\`\\`\\`$|\\z)")
// Matches a backquote, either some text or any number of non-empty lines, and then a final backquote
var inlineCodePattern = regexp.MustCompile("(?m)\\`(?:.+?|.*?\n(.*?\\S.*?\n)*.*?)\\`")
// Strips pre-formatted text and code blocks from a Markdown string by replacing them with whitespace
func removeCodeFromMessage(message string) string {
if strings.Contains(message, "```") {
message = codeBlockPattern.ReplaceAllString(message, "")
}
// Replace with a space to prevent cases like "user`code`name" from turning into "username"
if strings.Contains(message, "`") {
message = inlineCodePattern.ReplaceAllString(message, " ")
}
return message
}
// Given a map of user IDs to profiles, returns a list of mention
// keywords for all users in the channel.
func GetMentionKeywordsInChannel(profiles map[string]*model.User) map[string][]string {

View File

@@ -39,6 +39,7 @@ func TestSendNotifications(t *testing.T) {
func TestGetExplicitMentions(t *testing.T) {
id1 := model.NewId()
id2 := model.NewId()
id3 := model.NewId()
// not mentioning anybody
message := "this is a message"
@@ -113,6 +114,51 @@ func TestGetExplicitMentions(t *testing.T) {
if mentions, potential, _, _, _ := GetExplicitMentions(message, keywords); len(mentions) != 1 || !mentions[id1] || len(potential) != 1 {
t.Fatal("should've mentioned user and have a potential not in channel")
}
// words in inline code shouldn't trigger mentions
message = "`this shouldn't mention @channel at all`"
keywords = map[string][]string{}
if mentions, _, _, _, _ := GetExplicitMentions(message, keywords); len(mentions) != 0 {
t.Fatal("@channel in inline code shouldn't cause a mention")
}
// words in code blocks shouldn't trigger mentions
message = "```\nthis shouldn't mention @channel at all\n```"
keywords = map[string][]string{}
if mentions, _, _, _, _ := GetExplicitMentions(message, keywords); len(mentions) != 0 {
t.Fatal("@channel in code block shouldn't cause a mention")
}
// Markdown-formatted text that isn't code should trigger mentions
message = "*@aaa @bbb @ccc*"
keywords = map[string][]string{"@aaa": {id1}, "@bbb": {id2}, "@ccc": {id3}}
if mentions, _, _, _, _ := GetExplicitMentions(message, keywords); len(mentions) != 3 || !mentions[id1] || !mentions[id2] || !mentions[id3] {
t.Fatal("should've mentioned all 3 users", mentions)
}
message = "**@aaa @bbb @ccc**"
keywords = map[string][]string{"@aaa": {id1}, "@bbb": {id2}, "@ccc": {id3}}
if mentions, _, _, _, _ := GetExplicitMentions(message, keywords); len(mentions) != 3 || !mentions[id1] || !mentions[id2] || !mentions[id3] {
t.Fatal("should've mentioned all 3 users")
}
message = "~~@aaa @bbb @ccc~~"
keywords = map[string][]string{"@aaa": {id1}, "@bbb": {id2}, "@ccc": {id3}}
if mentions, _, _, _, _ := GetExplicitMentions(message, keywords); len(mentions) != 3 || !mentions[id1] || !mentions[id2] || !mentions[id3] {
t.Fatal("should've mentioned all 3 users")
}
message = "### @aaa"
keywords = map[string][]string{"@aaa": {id1}, "@bbb": {id2}, "@ccc": {id3}}
if mentions, _, _, _, _ := GetExplicitMentions(message, keywords); len(mentions) != 1 || !mentions[id1] || mentions[id2] || mentions[id3] {
t.Fatal("should've only mentioned aaa")
}
message = "> @aaa"
keywords = map[string][]string{"@aaa": {id1}, "@bbb": {id2}, "@ccc": {id3}}
if mentions, _, _, _, _ := GetExplicitMentions(message, keywords); len(mentions) != 1 || !mentions[id1] || mentions[id2] || mentions[id3] {
t.Fatal("should've only mentioned aaa")
}
}
func TestGetExplicitMentionsAtHere(t *testing.T) {
@@ -177,6 +223,140 @@ func TestGetExplicitMentionsAtHere(t *testing.T) {
}
}
func TestRemoveCodeFromMessage(t *testing.T) {
input := "this is regular text"
expected := input
if actual := removeCodeFromMessage(input); actual != expected {
t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected)
}
input = "this is text with\n```\na code block\n```\nin it"
expected = "this is text with\n\nin it"
if actual := removeCodeFromMessage(input); actual != expected {
t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected)
}
input = "this is text with\n```javascript\na JS code block\n```\nin it"
expected = "this is text with\n\nin it"
if actual := removeCodeFromMessage(input); actual != expected {
t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected)
}
input = "this is text with\n```java script?\na JS code block\n```\nin it"
expected = "this is text with\n\nin it"
if actual := removeCodeFromMessage(input); actual != expected {
t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected)
}
input = "this is text with an empty\n```\n\n\n\n```\nin it"
expected = "this is text with an empty\n\nin it"
if actual := removeCodeFromMessage(input); actual != expected {
t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected)
}
input = "this is text with\n```\ntwo\n```\ncode\n```\nblocks\n```\nin it"
expected = "this is text with\n\ncode\n\nin it"
if actual := removeCodeFromMessage(input); actual != expected {
t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected)
}
input = "this is text with indented\n ```\ncode\n ```\nin it"
expected = "this is text with indented\n\nin it"
if actual := removeCodeFromMessage(input); actual != expected {
t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected)
}
input = "this is text ending with\n```\nan unfinished code block"
expected = "this is text ending with\n"
if actual := removeCodeFromMessage(input); actual != expected {
t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected)
}
input = "this is `code` in a sentence"
expected = "this is in a sentence"
if actual := removeCodeFromMessage(input); actual != expected {
t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected)
}
input = "this is `two` things of `code` in a sentence"
expected = "this is things of in a sentence"
if actual := removeCodeFromMessage(input); actual != expected {
t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected)
}
input = "this is `code with spaces` in a sentence"
expected = "this is in a sentence"
if actual := removeCodeFromMessage(input); actual != expected {
t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected)
}
input = "this is `code\nacross multiple` lines"
expected = "this is lines"
if actual := removeCodeFromMessage(input); actual != expected {
t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected)
}
input = "this is `code\non\nmany\ndifferent` lines"
expected = "this is lines"
if actual := removeCodeFromMessage(input); actual != expected {
t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected)
}
input = "this is `\ncode on its own line\n` across multiple lines"
expected = "this is across multiple lines"
if actual := removeCodeFromMessage(input); actual != expected {
t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected)
}
input = "this is `\n some more code \n` across multiple lines"
expected = "this is across multiple lines"
if actual := removeCodeFromMessage(input); actual != expected {
t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected)
}
input = "this is `\ncode` on its own line"
expected = "this is on its own line"
if actual := removeCodeFromMessage(input); actual != expected {
t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected)
}
input = "this is `code\n` on its own line"
expected = "this is on its own line"
if actual := removeCodeFromMessage(input); actual != expected {
t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected)
}
input = "this is *italics mixed with `code in a way that has the code` take precedence*"
expected = "this is *italics mixed with take precedence*"
if actual := removeCodeFromMessage(input); actual != expected {
t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected)
}
input = "this is code within a wo` `rd for some reason"
expected = "this is code within a wo rd for some reason"
if actual := removeCodeFromMessage(input); actual != expected {
t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected)
}
input = "this is `not\n\ncode` because it has a blank line"
expected = input
if actual := removeCodeFromMessage(input); actual != expected {
t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected)
}
input = "this is `not\n \ncode` because it has line with only whitespace"
expected = input
if actual := removeCodeFromMessage(input); actual != expected {
t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected)
}
input = "this is just `` two backquotes"
expected = input
if actual := removeCodeFromMessage(input); actual != expected {
t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected)
}
}
func TestGetMentionKeywords(t *testing.T) {
Setup()
// user with username or custom mentions enabled