mirror of
https://github.com/mattermost/mattermost.git
synced 2025-02-25 18:55:24 -06:00
Truncate data from OpenGraph metadata (#10532)
* Truncate strings from OpenGraph metada * remove unwanted opengraph fields, limit to 1 image * test helper functions * Add truncating test * fix typo * change test into not comparing for the pointer value * truncate only once * only shorten fields that were already present * set maximum of 5 images * fix original tests * fix test * limit to 5 images * fix typo * place functions below types, simplify function commentaries * Rewrite how to empty opengraph's structure
This commit is contained in:
committed by
Jesús Espino
parent
88202a76d9
commit
ad69002f9e
@@ -379,6 +379,7 @@ func (a *App) getLinkMetadata(requestURL string, timestamp int64, isNewPost bool
|
||||
// Parse the data
|
||||
og, image, err = a.parseLinkMetadata(requestURL, body, contentType)
|
||||
}
|
||||
og = model.TruncateOpenGraph(og) // remove unwanted length of texts
|
||||
|
||||
// Write back to cache and database, even if there was an error and the results are nil
|
||||
cacheLinkMetadata(requestURL, timestamp, og, image)
|
||||
|
||||
@@ -230,7 +230,7 @@ func TestPreparePostForClient(t *testing.T) {
|
||||
|
||||
clientPost := th.App.PreparePostForClient(post, false, false)
|
||||
|
||||
t.Run("pupulates emojis", func(t *testing.T) {
|
||||
t.Run("populates emojis", func(t *testing.T) {
|
||||
assert.ElementsMatch(t, []*model.Emoji{emoji1, emoji2, emoji3, emoji4}, clientPost.Metadata.Emojis, "should've populated post.Emojis")
|
||||
})
|
||||
|
||||
@@ -331,26 +331,18 @@ func TestPreparePostForClient(t *testing.T) {
|
||||
require.Nil(t, err)
|
||||
|
||||
clientPost := th.App.PreparePostForClient(post, false, false)
|
||||
firstEmbed := clientPost.Metadata.Embeds[0]
|
||||
ogData := firstEmbed.Data.(*opengraph.OpenGraph)
|
||||
|
||||
t.Run("populates embeds", func(t *testing.T) {
|
||||
assert.ElementsMatch(t, []*model.PostEmbed{
|
||||
{
|
||||
Type: model.POST_EMBED_OPENGRAPH,
|
||||
URL: "https://github.com/hmhealey/test-files",
|
||||
Data: &opengraph.OpenGraph{
|
||||
Description: "Contribute to hmhealey/test-files development by creating an account on GitHub.",
|
||||
SiteName: "GitHub",
|
||||
Title: "hmhealey/test-files",
|
||||
Type: "object",
|
||||
URL: "https://github.com/hmhealey/test-files",
|
||||
Images: []*opengraph.Image{
|
||||
{
|
||||
URL: "https://avatars1.githubusercontent.com/u/3277310?s=400&v=4",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}, clientPost.Metadata.Embeds)
|
||||
assert.Equal(t, firstEmbed.Type, model.POST_EMBED_OPENGRAPH)
|
||||
assert.Equal(t, firstEmbed.URL, "https://github.com/hmhealey/test-files")
|
||||
assert.Equal(t, ogData.Description, "Contribute to hmhealey/test-files development by creating an account on GitHub.")
|
||||
assert.Equal(t, ogData.SiteName, "GitHub")
|
||||
assert.Equal(t, ogData.Title, "hmhealey/test-files")
|
||||
assert.Equal(t, ogData.Type, "object")
|
||||
assert.Equal(t, ogData.URL, "https://github.com/hmhealey/test-files")
|
||||
assert.Equal(t, ogData.Images[0].URL, "https://avatars1.githubusercontent.com/u/3277310?s=400&v=4")
|
||||
})
|
||||
|
||||
t.Run("populates image dimensions", func(t *testing.T) {
|
||||
|
||||
@@ -6,9 +6,11 @@ package model
|
||||
import (
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"net/http"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/dyatlov/go-opengraph/opengraph"
|
||||
)
|
||||
@@ -17,6 +19,7 @@ const (
|
||||
LINK_METADATA_TYPE_IMAGE LinkMetadataType = "image"
|
||||
LINK_METADATA_TYPE_NONE LinkMetadataType = "none"
|
||||
LINK_METADATA_TYPE_OPENGRAPH LinkMetadataType = "opengraph"
|
||||
MAX_IMAGES int = 5
|
||||
)
|
||||
|
||||
type LinkMetadataType string
|
||||
@@ -38,6 +41,50 @@ type LinkMetadata struct {
|
||||
Data interface{}
|
||||
}
|
||||
|
||||
// truncateText ensure string is 300 chars, truncate and add ellipsis
|
||||
// if it was bigger.
|
||||
func truncateText(original string) string {
|
||||
if utf8.RuneCountInString(original) > 300 {
|
||||
return fmt.Sprintf("%.300s[...]", original)
|
||||
}
|
||||
return original
|
||||
}
|
||||
|
||||
func firstNImages(images []*opengraph.Image, maxImages int) []*opengraph.Image {
|
||||
if maxImages < 0 { // dont break stuff, if it's weird, go for sane defaults
|
||||
maxImages = MAX_IMAGES
|
||||
}
|
||||
numImages := len(images)
|
||||
if numImages > maxImages {
|
||||
subImages := make([]*opengraph.Image, maxImages)
|
||||
subImages = images[0:maxImages]
|
||||
return subImages
|
||||
}
|
||||
return images
|
||||
}
|
||||
|
||||
// TruncateOpenGraph ensure OpenGraph metadata doesn't grow too big by
|
||||
// shortening strings, trimming fields and reducing the number of
|
||||
// images.
|
||||
func TruncateOpenGraph(ogdata *opengraph.OpenGraph) *opengraph.OpenGraph {
|
||||
if ogdata != nil {
|
||||
empty := &opengraph.OpenGraph{}
|
||||
ogdata.Title = truncateText(ogdata.Title)
|
||||
ogdata.Description = truncateText(ogdata.Description)
|
||||
ogdata.SiteName = truncateText(ogdata.SiteName)
|
||||
ogdata.Article = empty.Article
|
||||
ogdata.Book = empty.Book
|
||||
ogdata.Profile = empty.Profile
|
||||
ogdata.Determiner = empty.Determiner
|
||||
ogdata.Locale = empty.Locale
|
||||
ogdata.LocalesAlternate = empty.LocalesAlternate
|
||||
ogdata.Images = firstNImages(ogdata.Images, MAX_IMAGES)
|
||||
ogdata.Audios = empty.Audios
|
||||
ogdata.Videos = empty.Videos
|
||||
}
|
||||
return ogdata
|
||||
}
|
||||
|
||||
func (o *LinkMetadata) PreSave() {
|
||||
o.Hash = GenerateLinkMetadataHash(o.URL, o.Timestamp)
|
||||
}
|
||||
|
||||
@@ -5,13 +5,28 @@ package model
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/dyatlov/go-opengraph/opengraph"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
const BigText = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vivamus maximus faucibus ex, vitae placerat neque feugiat ac. Nam tempus libero quis pellentesque feugiat. Cras tristique diam vel condimentum viverra. Proin molestie posuere leo. Nam pulvinar, ex quis tristique cursus, turpis ante commodo elit, a dapibus est ipsum id eros. Mauris tortor dolor, posuere ac velit vitae, faucibus viverra fusce."
|
||||
|
||||
func sampleImage(imageName string) *opengraph.Image {
|
||||
return &opengraph.Image{
|
||||
URL: fmt.Sprintf("http://example.com/%s", imageName),
|
||||
SecureURL: fmt.Sprintf("https://example.com/%s", imageName),
|
||||
Type: "png",
|
||||
Width: 32,
|
||||
Height: 32,
|
||||
}
|
||||
}
|
||||
|
||||
func TestLinkMetadataIsValid(t *testing.T) {
|
||||
for _, test := range []struct {
|
||||
Name string
|
||||
@@ -225,3 +240,87 @@ func TestLinkMetadataDeserializeDataToConcreteType(t *testing.T) {
|
||||
func TestFloorToNearestHour(t *testing.T) {
|
||||
assert.True(t, isRoundedToNearestHour(FloorToNearestHour(1546346096000)))
|
||||
}
|
||||
|
||||
func TestTruncateText(t *testing.T) {
|
||||
t.Run("Shouldn't affect strings smaller than 300 characters", func(t *testing.T) {
|
||||
assert.Equal(t, utf8.RuneCountInString(truncateText("abc")), 3, "should be 3")
|
||||
})
|
||||
t.Run("Shouldn't affect empty strings", func(t *testing.T) {
|
||||
assert.Equal(t, utf8.RuneCountInString(truncateText("")), 0, "should be empty")
|
||||
})
|
||||
t.Run("Truncates string to 300 + 5", func(t *testing.T) {
|
||||
assert.Equal(t, utf8.RuneCountInString(truncateText(BigText)), 305, "should be 300 chars + 5")
|
||||
})
|
||||
t.Run("Truncated text ends in elipsis", func(t *testing.T) {
|
||||
assert.True(t, strings.HasSuffix(truncateText(BigText), "[...]"))
|
||||
})
|
||||
}
|
||||
|
||||
func TestFirstNImages(t *testing.T) {
|
||||
t.Run("when empty, return an empty one", func(t *testing.T) {
|
||||
empty := make([]*opengraph.Image, 0)
|
||||
assert.Exactly(t, firstNImages(empty, 1), empty, "Should be the same element")
|
||||
})
|
||||
t.Run("when it contains one element, return the same array", func(t *testing.T) {
|
||||
one := []*opengraph.Image{sampleImage("image.png")}
|
||||
assert.Exactly(t, firstNImages(one, 1), one, "Should be the same element")
|
||||
})
|
||||
t.Run("when it contains more than one element and asking for only one, return the first one", func(t *testing.T) {
|
||||
two := []*opengraph.Image{sampleImage("image.png"), sampleImage("notme.png")}
|
||||
assert.True(t, strings.HasSuffix(firstNImages(two, 1)[0].URL, "image.png"), "Should be the image element")
|
||||
})
|
||||
t.Run("when it contains less than asked, return the original", func(t *testing.T) {
|
||||
two := []*opengraph.Image{sampleImage("image.png"), sampleImage("notme.png")}
|
||||
assert.Equal(t, two, firstNImages(two, 10), "should be the same pointer")
|
||||
})
|
||||
|
||||
t.Run("asking for negative images", func(t *testing.T) {
|
||||
six := []*opengraph.Image{
|
||||
sampleImage("image.png"),
|
||||
sampleImage("another.png"),
|
||||
sampleImage("yetanother.jpg"),
|
||||
sampleImage("metoo.gif"),
|
||||
sampleImage("fifth.ico"),
|
||||
sampleImage("notme.tiff"),
|
||||
}
|
||||
assert.Len(t, firstNImages(six, -10), MAX_IMAGES, "On negative, go for defaults")
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
func TestTruncateOpenGraph(t *testing.T) {
|
||||
og := opengraph.OpenGraph{
|
||||
Type: "something",
|
||||
URL: "http://myawesomesite.com",
|
||||
Title: BigText,
|
||||
Description: BigText,
|
||||
Determiner: BigText,
|
||||
SiteName: BigText,
|
||||
Locale: "[EN-en]",
|
||||
LocalesAlternate: []string{"[EN-ca]", "[ES-es]"},
|
||||
Images: []*opengraph.Image{
|
||||
sampleImage("image.png"),
|
||||
sampleImage("another.png"),
|
||||
sampleImage("yetanother.jpg"),
|
||||
sampleImage("metoo.gif"),
|
||||
sampleImage("fifth.ico"),
|
||||
sampleImage("notme.tiff")},
|
||||
Audios: []*opengraph.Audio{&opengraph.Audio{}},
|
||||
Videos: []*opengraph.Video{&opengraph.Video{}},
|
||||
Article: &opengraph.Article{},
|
||||
Book: &opengraph.Book{},
|
||||
Profile: &opengraph.Profile{},
|
||||
}
|
||||
result := TruncateOpenGraph(&og)
|
||||
assert.Nil(t, result.Article, "No article stored")
|
||||
assert.Nil(t, result.Book, "No book stored")
|
||||
assert.Nil(t, result.Profile, "No profile stored")
|
||||
assert.Len(t, result.Images, 5, "Only the first 5 images")
|
||||
assert.Len(t, result.Audios, 0, "No audios stored")
|
||||
assert.Len(t, result.Videos, 0, "No videos stored")
|
||||
assert.Len(t, result.LocalesAlternate, 0, "No alternate locales stored")
|
||||
assert.Equal(t, result.Determiner, "", "No determiner stored")
|
||||
assert.Equal(t, utf8.RuneCountInString(result.Title), 305, "Title text is truncated")
|
||||
assert.Equal(t, utf8.RuneCountInString(result.Description), 305, "Description text is truncated")
|
||||
assert.Equal(t, utf8.RuneCountInString(result.SiteName), 305, "SiteName text is truncated")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user