Truncate data from OpenGraph metadata (#10532)

* Truncate strings from OpenGraph metada

* remove unwanted opengraph fields, limit to 1 image

* test helper functions

* Add truncating test

* fix typo

* change test into not comparing for the pointer value

* truncate only once

* only shorten fields that were already present

* set maximum of 5 images

* fix original tests

* fix test

* limit to 5 images

* fix typo

* place functions below types, simplify function commentaries

* Rewrite how to empty opengraph's structure
This commit is contained in:
Guillermo Vayá
2019-06-11 17:39:35 +02:00
committed by Jesús Espino
parent 88202a76d9
commit ad69002f9e
4 changed files with 158 additions and 19 deletions

View File

@@ -379,6 +379,7 @@ func (a *App) getLinkMetadata(requestURL string, timestamp int64, isNewPost bool
// Parse the data
og, image, err = a.parseLinkMetadata(requestURL, body, contentType)
}
og = model.TruncateOpenGraph(og) // remove unwanted length of texts
// Write back to cache and database, even if there was an error and the results are nil
cacheLinkMetadata(requestURL, timestamp, og, image)

View File

@@ -230,7 +230,7 @@ func TestPreparePostForClient(t *testing.T) {
clientPost := th.App.PreparePostForClient(post, false, false)
t.Run("pupulates emojis", func(t *testing.T) {
t.Run("populates emojis", func(t *testing.T) {
assert.ElementsMatch(t, []*model.Emoji{emoji1, emoji2, emoji3, emoji4}, clientPost.Metadata.Emojis, "should've populated post.Emojis")
})
@@ -331,26 +331,18 @@ func TestPreparePostForClient(t *testing.T) {
require.Nil(t, err)
clientPost := th.App.PreparePostForClient(post, false, false)
firstEmbed := clientPost.Metadata.Embeds[0]
ogData := firstEmbed.Data.(*opengraph.OpenGraph)
t.Run("populates embeds", func(t *testing.T) {
assert.ElementsMatch(t, []*model.PostEmbed{
{
Type: model.POST_EMBED_OPENGRAPH,
URL: "https://github.com/hmhealey/test-files",
Data: &opengraph.OpenGraph{
Description: "Contribute to hmhealey/test-files development by creating an account on GitHub.",
SiteName: "GitHub",
Title: "hmhealey/test-files",
Type: "object",
URL: "https://github.com/hmhealey/test-files",
Images: []*opengraph.Image{
{
URL: "https://avatars1.githubusercontent.com/u/3277310?s=400&v=4",
},
},
},
},
}, clientPost.Metadata.Embeds)
assert.Equal(t, firstEmbed.Type, model.POST_EMBED_OPENGRAPH)
assert.Equal(t, firstEmbed.URL, "https://github.com/hmhealey/test-files")
assert.Equal(t, ogData.Description, "Contribute to hmhealey/test-files development by creating an account on GitHub.")
assert.Equal(t, ogData.SiteName, "GitHub")
assert.Equal(t, ogData.Title, "hmhealey/test-files")
assert.Equal(t, ogData.Type, "object")
assert.Equal(t, ogData.URL, "https://github.com/hmhealey/test-files")
assert.Equal(t, ogData.Images[0].URL, "https://avatars1.githubusercontent.com/u/3277310?s=400&v=4")
})
t.Run("populates image dimensions", func(t *testing.T) {

View File

@@ -6,9 +6,11 @@ package model
import (
"encoding/binary"
"encoding/json"
"fmt"
"hash/fnv"
"net/http"
"time"
"unicode/utf8"
"github.com/dyatlov/go-opengraph/opengraph"
)
@@ -17,6 +19,7 @@ const (
LINK_METADATA_TYPE_IMAGE LinkMetadataType = "image"
LINK_METADATA_TYPE_NONE LinkMetadataType = "none"
LINK_METADATA_TYPE_OPENGRAPH LinkMetadataType = "opengraph"
MAX_IMAGES int = 5
)
type LinkMetadataType string
@@ -38,6 +41,50 @@ type LinkMetadata struct {
Data interface{}
}
// truncateText ensure string is 300 chars, truncate and add ellipsis
// if it was bigger.
func truncateText(original string) string {
if utf8.RuneCountInString(original) > 300 {
return fmt.Sprintf("%.300s[...]", original)
}
return original
}
func firstNImages(images []*opengraph.Image, maxImages int) []*opengraph.Image {
if maxImages < 0 { // dont break stuff, if it's weird, go for sane defaults
maxImages = MAX_IMAGES
}
numImages := len(images)
if numImages > maxImages {
subImages := make([]*opengraph.Image, maxImages)
subImages = images[0:maxImages]
return subImages
}
return images
}
// TruncateOpenGraph ensure OpenGraph metadata doesn't grow too big by
// shortening strings, trimming fields and reducing the number of
// images.
func TruncateOpenGraph(ogdata *opengraph.OpenGraph) *opengraph.OpenGraph {
if ogdata != nil {
empty := &opengraph.OpenGraph{}
ogdata.Title = truncateText(ogdata.Title)
ogdata.Description = truncateText(ogdata.Description)
ogdata.SiteName = truncateText(ogdata.SiteName)
ogdata.Article = empty.Article
ogdata.Book = empty.Book
ogdata.Profile = empty.Profile
ogdata.Determiner = empty.Determiner
ogdata.Locale = empty.Locale
ogdata.LocalesAlternate = empty.LocalesAlternate
ogdata.Images = firstNImages(ogdata.Images, MAX_IMAGES)
ogdata.Audios = empty.Audios
ogdata.Videos = empty.Videos
}
return ogdata
}
func (o *LinkMetadata) PreSave() {
o.Hash = GenerateLinkMetadataHash(o.URL, o.Timestamp)
}

View File

@@ -5,13 +5,28 @@ package model
import (
"encoding/json"
"fmt"
"strings"
"testing"
"unicode/utf8"
"github.com/dyatlov/go-opengraph/opengraph"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
const BigText = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vivamus maximus faucibus ex, vitae placerat neque feugiat ac. Nam tempus libero quis pellentesque feugiat. Cras tristique diam vel condimentum viverra. Proin molestie posuere leo. Nam pulvinar, ex quis tristique cursus, turpis ante commodo elit, a dapibus est ipsum id eros. Mauris tortor dolor, posuere ac velit vitae, faucibus viverra fusce."
func sampleImage(imageName string) *opengraph.Image {
return &opengraph.Image{
URL: fmt.Sprintf("http://example.com/%s", imageName),
SecureURL: fmt.Sprintf("https://example.com/%s", imageName),
Type: "png",
Width: 32,
Height: 32,
}
}
func TestLinkMetadataIsValid(t *testing.T) {
for _, test := range []struct {
Name string
@@ -225,3 +240,87 @@ func TestLinkMetadataDeserializeDataToConcreteType(t *testing.T) {
func TestFloorToNearestHour(t *testing.T) {
assert.True(t, isRoundedToNearestHour(FloorToNearestHour(1546346096000)))
}
func TestTruncateText(t *testing.T) {
t.Run("Shouldn't affect strings smaller than 300 characters", func(t *testing.T) {
assert.Equal(t, utf8.RuneCountInString(truncateText("abc")), 3, "should be 3")
})
t.Run("Shouldn't affect empty strings", func(t *testing.T) {
assert.Equal(t, utf8.RuneCountInString(truncateText("")), 0, "should be empty")
})
t.Run("Truncates string to 300 + 5", func(t *testing.T) {
assert.Equal(t, utf8.RuneCountInString(truncateText(BigText)), 305, "should be 300 chars + 5")
})
t.Run("Truncated text ends in elipsis", func(t *testing.T) {
assert.True(t, strings.HasSuffix(truncateText(BigText), "[...]"))
})
}
func TestFirstNImages(t *testing.T) {
t.Run("when empty, return an empty one", func(t *testing.T) {
empty := make([]*opengraph.Image, 0)
assert.Exactly(t, firstNImages(empty, 1), empty, "Should be the same element")
})
t.Run("when it contains one element, return the same array", func(t *testing.T) {
one := []*opengraph.Image{sampleImage("image.png")}
assert.Exactly(t, firstNImages(one, 1), one, "Should be the same element")
})
t.Run("when it contains more than one element and asking for only one, return the first one", func(t *testing.T) {
two := []*opengraph.Image{sampleImage("image.png"), sampleImage("notme.png")}
assert.True(t, strings.HasSuffix(firstNImages(two, 1)[0].URL, "image.png"), "Should be the image element")
})
t.Run("when it contains less than asked, return the original", func(t *testing.T) {
two := []*opengraph.Image{sampleImage("image.png"), sampleImage("notme.png")}
assert.Equal(t, two, firstNImages(two, 10), "should be the same pointer")
})
t.Run("asking for negative images", func(t *testing.T) {
six := []*opengraph.Image{
sampleImage("image.png"),
sampleImage("another.png"),
sampleImage("yetanother.jpg"),
sampleImage("metoo.gif"),
sampleImage("fifth.ico"),
sampleImage("notme.tiff"),
}
assert.Len(t, firstNImages(six, -10), MAX_IMAGES, "On negative, go for defaults")
})
}
func TestTruncateOpenGraph(t *testing.T) {
og := opengraph.OpenGraph{
Type: "something",
URL: "http://myawesomesite.com",
Title: BigText,
Description: BigText,
Determiner: BigText,
SiteName: BigText,
Locale: "[EN-en]",
LocalesAlternate: []string{"[EN-ca]", "[ES-es]"},
Images: []*opengraph.Image{
sampleImage("image.png"),
sampleImage("another.png"),
sampleImage("yetanother.jpg"),
sampleImage("metoo.gif"),
sampleImage("fifth.ico"),
sampleImage("notme.tiff")},
Audios: []*opengraph.Audio{&opengraph.Audio{}},
Videos: []*opengraph.Video{&opengraph.Video{}},
Article: &opengraph.Article{},
Book: &opengraph.Book{},
Profile: &opengraph.Profile{},
}
result := TruncateOpenGraph(&og)
assert.Nil(t, result.Article, "No article stored")
assert.Nil(t, result.Book, "No book stored")
assert.Nil(t, result.Profile, "No profile stored")
assert.Len(t, result.Images, 5, "Only the first 5 images")
assert.Len(t, result.Audios, 0, "No audios stored")
assert.Len(t, result.Videos, 0, "No videos stored")
assert.Len(t, result.LocalesAlternate, 0, "No alternate locales stored")
assert.Equal(t, result.Determiner, "", "No determiner stored")
assert.Equal(t, utf8.RuneCountInString(result.Title), 305, "Title text is truncated")
assert.Equal(t, utf8.RuneCountInString(result.Description), 305, "Description text is truncated")
assert.Equal(t, utf8.RuneCountInString(result.SiteName), 305, "SiteName text is truncated")
}