diff --git a/app/post_metadata.go b/app/post_metadata.go index c10f07c522..729afae99a 100644 --- a/app/post_metadata.go +++ b/app/post_metadata.go @@ -379,6 +379,7 @@ func (a *App) getLinkMetadata(requestURL string, timestamp int64, isNewPost bool // Parse the data og, image, err = a.parseLinkMetadata(requestURL, body, contentType) } + og = model.TruncateOpenGraph(og) // remove unwanted length of texts // Write back to cache and database, even if there was an error and the results are nil cacheLinkMetadata(requestURL, timestamp, og, image) diff --git a/app/post_metadata_test.go b/app/post_metadata_test.go index 71554e4016..f37b9b72af 100644 --- a/app/post_metadata_test.go +++ b/app/post_metadata_test.go @@ -230,7 +230,7 @@ func TestPreparePostForClient(t *testing.T) { clientPost := th.App.PreparePostForClient(post, false, false) - t.Run("pupulates emojis", func(t *testing.T) { + t.Run("populates emojis", func(t *testing.T) { assert.ElementsMatch(t, []*model.Emoji{emoji1, emoji2, emoji3, emoji4}, clientPost.Metadata.Emojis, "should've populated post.Emojis") }) @@ -331,26 +331,18 @@ func TestPreparePostForClient(t *testing.T) { require.Nil(t, err) clientPost := th.App.PreparePostForClient(post, false, false) + firstEmbed := clientPost.Metadata.Embeds[0] + ogData := firstEmbed.Data.(*opengraph.OpenGraph) t.Run("populates embeds", func(t *testing.T) { - assert.ElementsMatch(t, []*model.PostEmbed{ - { - Type: model.POST_EMBED_OPENGRAPH, - URL: "https://github.com/hmhealey/test-files", - Data: &opengraph.OpenGraph{ - Description: "Contribute to hmhealey/test-files development by creating an account on GitHub.", - SiteName: "GitHub", - Title: "hmhealey/test-files", - Type: "object", - URL: "https://github.com/hmhealey/test-files", - Images: []*opengraph.Image{ - { - URL: "https://avatars1.githubusercontent.com/u/3277310?s=400&v=4", - }, - }, - }, - }, - }, clientPost.Metadata.Embeds) + assert.Equal(t, firstEmbed.Type, model.POST_EMBED_OPENGRAPH) + assert.Equal(t, firstEmbed.URL, "https://github.com/hmhealey/test-files") + assert.Equal(t, ogData.Description, "Contribute to hmhealey/test-files development by creating an account on GitHub.") + assert.Equal(t, ogData.SiteName, "GitHub") + assert.Equal(t, ogData.Title, "hmhealey/test-files") + assert.Equal(t, ogData.Type, "object") + assert.Equal(t, ogData.URL, "https://github.com/hmhealey/test-files") + assert.Equal(t, ogData.Images[0].URL, "https://avatars1.githubusercontent.com/u/3277310?s=400&v=4") }) t.Run("populates image dimensions", func(t *testing.T) { diff --git a/model/link_metadata.go b/model/link_metadata.go index 0488289295..72f0178eb4 100644 --- a/model/link_metadata.go +++ b/model/link_metadata.go @@ -6,9 +6,11 @@ package model import ( "encoding/binary" "encoding/json" + "fmt" "hash/fnv" "net/http" "time" + "unicode/utf8" "github.com/dyatlov/go-opengraph/opengraph" ) @@ -17,6 +19,7 @@ const ( LINK_METADATA_TYPE_IMAGE LinkMetadataType = "image" LINK_METADATA_TYPE_NONE LinkMetadataType = "none" LINK_METADATA_TYPE_OPENGRAPH LinkMetadataType = "opengraph" + MAX_IMAGES int = 5 ) type LinkMetadataType string @@ -38,6 +41,50 @@ type LinkMetadata struct { Data interface{} } +// truncateText ensure string is 300 chars, truncate and add ellipsis +// if it was bigger. +func truncateText(original string) string { + if utf8.RuneCountInString(original) > 300 { + return fmt.Sprintf("%.300s[...]", original) + } + return original +} + +func firstNImages(images []*opengraph.Image, maxImages int) []*opengraph.Image { + if maxImages < 0 { // dont break stuff, if it's weird, go for sane defaults + maxImages = MAX_IMAGES + } + numImages := len(images) + if numImages > maxImages { + subImages := make([]*opengraph.Image, maxImages) + subImages = images[0:maxImages] + return subImages + } + return images +} + +// TruncateOpenGraph ensure OpenGraph metadata doesn't grow too big by +// shortening strings, trimming fields and reducing the number of +// images. +func TruncateOpenGraph(ogdata *opengraph.OpenGraph) *opengraph.OpenGraph { + if ogdata != nil { + empty := &opengraph.OpenGraph{} + ogdata.Title = truncateText(ogdata.Title) + ogdata.Description = truncateText(ogdata.Description) + ogdata.SiteName = truncateText(ogdata.SiteName) + ogdata.Article = empty.Article + ogdata.Book = empty.Book + ogdata.Profile = empty.Profile + ogdata.Determiner = empty.Determiner + ogdata.Locale = empty.Locale + ogdata.LocalesAlternate = empty.LocalesAlternate + ogdata.Images = firstNImages(ogdata.Images, MAX_IMAGES) + ogdata.Audios = empty.Audios + ogdata.Videos = empty.Videos + } + return ogdata +} + func (o *LinkMetadata) PreSave() { o.Hash = GenerateLinkMetadataHash(o.URL, o.Timestamp) } diff --git a/model/link_metadata_test.go b/model/link_metadata_test.go index 1d9e095c83..a3f63c506e 100644 --- a/model/link_metadata_test.go +++ b/model/link_metadata_test.go @@ -5,13 +5,28 @@ package model import ( "encoding/json" + "fmt" + "strings" "testing" + "unicode/utf8" "github.com/dyatlov/go-opengraph/opengraph" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) +const BigText = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vivamus maximus faucibus ex, vitae placerat neque feugiat ac. Nam tempus libero quis pellentesque feugiat. Cras tristique diam vel condimentum viverra. Proin molestie posuere leo. Nam pulvinar, ex quis tristique cursus, turpis ante commodo elit, a dapibus est ipsum id eros. Mauris tortor dolor, posuere ac velit vitae, faucibus viverra fusce." + +func sampleImage(imageName string) *opengraph.Image { + return &opengraph.Image{ + URL: fmt.Sprintf("http://example.com/%s", imageName), + SecureURL: fmt.Sprintf("https://example.com/%s", imageName), + Type: "png", + Width: 32, + Height: 32, + } +} + func TestLinkMetadataIsValid(t *testing.T) { for _, test := range []struct { Name string @@ -225,3 +240,87 @@ func TestLinkMetadataDeserializeDataToConcreteType(t *testing.T) { func TestFloorToNearestHour(t *testing.T) { assert.True(t, isRoundedToNearestHour(FloorToNearestHour(1546346096000))) } + +func TestTruncateText(t *testing.T) { + t.Run("Shouldn't affect strings smaller than 300 characters", func(t *testing.T) { + assert.Equal(t, utf8.RuneCountInString(truncateText("abc")), 3, "should be 3") + }) + t.Run("Shouldn't affect empty strings", func(t *testing.T) { + assert.Equal(t, utf8.RuneCountInString(truncateText("")), 0, "should be empty") + }) + t.Run("Truncates string to 300 + 5", func(t *testing.T) { + assert.Equal(t, utf8.RuneCountInString(truncateText(BigText)), 305, "should be 300 chars + 5") + }) + t.Run("Truncated text ends in elipsis", func(t *testing.T) { + assert.True(t, strings.HasSuffix(truncateText(BigText), "[...]")) + }) +} + +func TestFirstNImages(t *testing.T) { + t.Run("when empty, return an empty one", func(t *testing.T) { + empty := make([]*opengraph.Image, 0) + assert.Exactly(t, firstNImages(empty, 1), empty, "Should be the same element") + }) + t.Run("when it contains one element, return the same array", func(t *testing.T) { + one := []*opengraph.Image{sampleImage("image.png")} + assert.Exactly(t, firstNImages(one, 1), one, "Should be the same element") + }) + t.Run("when it contains more than one element and asking for only one, return the first one", func(t *testing.T) { + two := []*opengraph.Image{sampleImage("image.png"), sampleImage("notme.png")} + assert.True(t, strings.HasSuffix(firstNImages(two, 1)[0].URL, "image.png"), "Should be the image element") + }) + t.Run("when it contains less than asked, return the original", func(t *testing.T) { + two := []*opengraph.Image{sampleImage("image.png"), sampleImage("notme.png")} + assert.Equal(t, two, firstNImages(two, 10), "should be the same pointer") + }) + + t.Run("asking for negative images", func(t *testing.T) { + six := []*opengraph.Image{ + sampleImage("image.png"), + sampleImage("another.png"), + sampleImage("yetanother.jpg"), + sampleImage("metoo.gif"), + sampleImage("fifth.ico"), + sampleImage("notme.tiff"), + } + assert.Len(t, firstNImages(six, -10), MAX_IMAGES, "On negative, go for defaults") + }) + +} + +func TestTruncateOpenGraph(t *testing.T) { + og := opengraph.OpenGraph{ + Type: "something", + URL: "http://myawesomesite.com", + Title: BigText, + Description: BigText, + Determiner: BigText, + SiteName: BigText, + Locale: "[EN-en]", + LocalesAlternate: []string{"[EN-ca]", "[ES-es]"}, + Images: []*opengraph.Image{ + sampleImage("image.png"), + sampleImage("another.png"), + sampleImage("yetanother.jpg"), + sampleImage("metoo.gif"), + sampleImage("fifth.ico"), + sampleImage("notme.tiff")}, + Audios: []*opengraph.Audio{&opengraph.Audio{}}, + Videos: []*opengraph.Video{&opengraph.Video{}}, + Article: &opengraph.Article{}, + Book: &opengraph.Book{}, + Profile: &opengraph.Profile{}, + } + result := TruncateOpenGraph(&og) + assert.Nil(t, result.Article, "No article stored") + assert.Nil(t, result.Book, "No book stored") + assert.Nil(t, result.Profile, "No profile stored") + assert.Len(t, result.Images, 5, "Only the first 5 images") + assert.Len(t, result.Audios, 0, "No audios stored") + assert.Len(t, result.Videos, 0, "No videos stored") + assert.Len(t, result.LocalesAlternate, 0, "No alternate locales stored") + assert.Equal(t, result.Determiner, "", "No determiner stored") + assert.Equal(t, utf8.RuneCountInString(result.Title), 305, "Title text is truncated") + assert.Equal(t, utf8.RuneCountInString(result.Description), 305, "Description text is truncated") + assert.Equal(t, utf8.RuneCountInString(result.SiteName), 305, "SiteName text is truncated") +}