MM-10856: deduplicate posts with the same pending post id (#10006)

* MM-10856: deduplicate posts with the same pending post id

Leverage a fixed size cache with a window of 30 seconds to deduplicate posts received by a single app server. Clients that duplicate the same pending post id will see one request potentially delayed until the first finishes, after which the same payload should be returned by both.  Duplicate posts outside the 30 second window will not be de-duplicated.

Note that the cache is not synchronized between app servers. In an HA cluster consisting of more than one app server, sticky load balancing (e.g. hashing by IP or Session ID) is required to route the users to the same app instance for multiple requests.

Other options considered for this feature:
* adding a column to the `Posts` table: rejected as being too heavyweight
* maintaining a `PendingPostIds` table: similarly rejected for the database impact
* using the pending post id as the post id and relying on the unique constraints on the Post table: rejected for being difficult to show that it's safe to use a client-provided value as the row identifier

* utils/lru: simplify to ttl internally and for new methods

* move seenPendingPostIdsCache to App.Server

* just fail concurrent post requests, vs. trying to wait

* add debug log when create post is deduplicated
This commit is contained in:
Jesse Hallam
2018-12-17 18:16:57 -05:00
committed by Christopher Speller
parent ef4e37fb6b
commit 4286456077
8 changed files with 377 additions and 42 deletions

View File

@@ -32,7 +32,7 @@ func compileGo(t *testing.T, sourceCode, outputPath string) {
cmd.Dir = dir
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
require.NoError(t, cmd.Run())
require.NoError(t, cmd.Run(), "failed to compile go")
}
func SetAppEnvironmentWithPlugins(t *testing.T, pluginCode []string, app *App, apiFunc func(*model.Manifest) plugin.API) (func(), []string, []error) {

View File

@@ -11,6 +11,7 @@ import (
"fmt"
"net/http"
"strings"
"time"
"github.com/mattermost/mattermost-server/mlog"
"github.com/mattermost/mattermost-server/model"
@@ -19,6 +20,11 @@ import (
"github.com/mattermost/mattermost-server/utils"
)
const (
PENDING_POST_IDS_CACHE_SIZE = 25000
PENDING_POST_IDS_CACHE_TTL = 30 * time.Second
)
func (a *App) CreatePostAsUser(post *model.Post, clearPushNotifications bool) (*model.Post, *model.AppError) {
// Check that channel has not been deleted
result := <-a.Srv.Store.Channel().Get(post.ChannelId, true)
@@ -89,7 +95,69 @@ func (a *App) CreatePostMissingChannel(post *model.Post, triggerWebhooks bool) (
return a.CreatePost(post, channel, triggerWebhooks)
}
func (a *App) CreatePost(post *model.Post, channel *model.Channel, triggerWebhooks bool) (*model.Post, *model.AppError) {
// deduplicateCreatePost attempts to make posting idempotent within a caching window.
func (a *App) deduplicateCreatePost(post *model.Post) (foundPost *model.Post, err *model.AppError) {
// We rely on the client sending the pending post id across "duplicate" requests. If there
// isn't one, we can't deduplicate, so allow creation normally.
if post.PendingPostId == "" {
return nil, nil
}
const unknownPostId = ""
// Query the cache atomically for the given pending post id, saving a record if
// it hasn't previously been seen.
value, loaded := a.Srv.seenPendingPostIdsCache.GetOrAdd(post.PendingPostId, unknownPostId, PENDING_POST_IDS_CACHE_TTL)
// If we were the first thread to save this pending post id into the cache,
// proceed with create post normally.
if !loaded {
return nil, nil
}
postId := value.(string)
// If another thread saved the cache record, but hasn't yet updated it with the actual post
// id (because it's still saving), notify the client with an error. Ideally, we'd wait
// for the other thread, but coordinating that adds complexity to the happy path.
if postId == unknownPostId {
return nil, model.NewAppError("deduplicateCreatePost", "api.post.deduplicate_create_post.pending", nil, "", http.StatusInternalServerError)
}
// If the other thread finished creating the post, return the created post back to the
// client, making the API call feel idempotent.
actualPost, err := a.GetSinglePost(postId)
if err != nil {
return nil, model.NewAppError("deduplicateCreatePost", "api.post.deduplicate_create_post.failed_to_get", nil, err.Error(), http.StatusInternalServerError)
}
mlog.Debug("Deduplicated create post", mlog.String("post_id", actualPost.Id), mlog.String("pending_post_id", post.PendingPostId))
return actualPost, nil
}
func (a *App) CreatePost(post *model.Post, channel *model.Channel, triggerWebhooks bool) (savedPost *model.Post, err *model.AppError) {
if foundPost, err := a.deduplicateCreatePost(post); err != nil {
return nil, err
} else if foundPost != nil {
return foundPost, nil
}
// If we get this far, we've recorded the client-provided pending post id to the cache.
// Remove it if we fail below, allowing a proper retry by the client.
defer func() {
if post.PendingPostId == "" {
return
}
if err != nil {
a.Srv.seenPendingPostIdsCache.Remove(post.PendingPostId)
return
}
a.Srv.seenPendingPostIdsCache.AddWithExpiresInSecs(post.PendingPostId, savedPost.Id, int64(PENDING_POST_IDS_CACHE_TTL.Seconds()))
}()
post.SanitizeProps()
var pchan store.StoreChannel
@@ -179,6 +247,10 @@ func (a *App) CreatePost(post *model.Post, channel *model.Channel, triggerWebhoo
}
rpost := result.Data.(*model.Post)
// Update the mapping from pending post id to the actual post id, for any clients that
// might be duplicating requests.
a.Srv.seenPendingPostIdsCache.AddWithExpiresInSecs(post.PendingPostId, rpost.Id, int64(PENDING_POST_IDS_CACHE_TTL.Seconds()))
if pluginsEnvironment := a.GetPluginsEnvironment(); pluginsEnvironment != nil {
a.Srv.Go(func() {
pluginContext := a.PluginContext()
@@ -220,7 +292,7 @@ func (a *App) CreatePost(post *model.Post, channel *model.Channel, triggerWebhoo
rpost = a.PreparePostForClient(rpost)
if err := a.handlePostEvents(rpost, user, channel, triggerWebhooks, parentPostList); err != nil {
return nil, err
mlog.Error("Failed to handle post events", mlog.Err(err))
}
return rpost, nil

View File

@@ -6,6 +6,7 @@ package app
import (
"fmt"
"net/http"
"sync"
"sync/atomic"
"testing"
"time"
@@ -18,6 +19,174 @@ import (
"github.com/mattermost/mattermost-server/store/storetest"
)
func TestCreatePostDeduplicate(t *testing.T) {
th := Setup().InitBasic()
defer th.TearDown()
t.Run("duplicate create post is idempotent", func(t *testing.T) {
pendingPostId := model.NewId()
post, err := th.App.CreatePostAsUser(&model.Post{
UserId: th.BasicUser.Id,
ChannelId: th.BasicChannel.Id,
Message: "message",
PendingPostId: pendingPostId,
}, false)
require.Nil(t, err)
require.Equal(t, "message", post.Message)
duplicatePost, err := th.App.CreatePostAsUser(&model.Post{
UserId: th.BasicUser.Id,
ChannelId: th.BasicChannel.Id,
Message: "message",
PendingPostId: pendingPostId,
}, false)
require.Nil(t, err)
require.Equal(t, post.Id, duplicatePost.Id, "should have returned previously created post id")
require.Equal(t, "message", duplicatePost.Message)
})
t.Run("post rejected by plugin leaves cache ready for non-deduplicated try", func(t *testing.T) {
setupPluginApiTest(t, `
package main
import (
"github.com/mattermost/mattermost-server/plugin"
"github.com/mattermost/mattermost-server/model"
)
type MyPlugin struct {
plugin.MattermostPlugin
allow bool
}
func (p *MyPlugin) MessageWillBePosted(c *plugin.Context, post *model.Post) (*model.Post, string) {
if !p.allow {
p.allow = true
return nil, "rejected"
}
return nil, ""
}
func main() {
plugin.ClientMain(&MyPlugin{})
}
`, `{"id": "testrejectfirstpost", "backend": {"executable": "backend.exe"}}`, "testrejectfirstpost", th.App)
pendingPostId := model.NewId()
post, err := th.App.CreatePostAsUser(&model.Post{
UserId: th.BasicUser.Id,
ChannelId: th.BasicChannel.Id,
Message: "message",
PendingPostId: pendingPostId,
}, false)
require.NotNil(t, err)
require.Equal(t, "Post rejected by plugin. rejected", err.Id)
require.Nil(t, post)
duplicatePost, err := th.App.CreatePostAsUser(&model.Post{
UserId: th.BasicUser.Id,
ChannelId: th.BasicChannel.Id,
Message: "message",
PendingPostId: pendingPostId,
}, false)
require.Nil(t, err)
require.Equal(t, "message", duplicatePost.Message)
})
t.Run("slow posting after cache entry blocks duplicate request", func(t *testing.T) {
setupPluginApiTest(t, `
package main
import (
"github.com/mattermost/mattermost-server/plugin"
"github.com/mattermost/mattermost-server/model"
"time"
)
type MyPlugin struct {
plugin.MattermostPlugin
instant bool
}
func (p *MyPlugin) MessageWillBePosted(c *plugin.Context, post *model.Post) (*model.Post, string) {
if !p.instant {
p.instant = true
time.Sleep(3 * time.Second)
}
return nil, ""
}
func main() {
plugin.ClientMain(&MyPlugin{})
}
`, `{"id": "testdelayfirstpost", "backend": {"executable": "backend.exe"}}`, "testdelayfirstpost", th.App)
var post *model.Post
pendingPostId := model.NewId()
wg := sync.WaitGroup{}
// Launch a goroutine to make the first CreatePost call that will get delayed
// by the plugin above.
wg.Add(1)
go func() {
defer wg.Done()
var err error
post, err = th.App.CreatePostAsUser(&model.Post{
UserId: th.BasicUser.Id,
ChannelId: th.BasicChannel.Id,
Message: "plugin delayed",
PendingPostId: pendingPostId,
}, false)
require.Nil(t, err)
require.Equal(t, post.Message, "plugin delayed")
}()
// Give the goroutine above a chance to start and get delayed by the plugin.
time.Sleep(2 * time.Second)
// Try creating a duplicate post
duplicatePost, err := th.App.CreatePostAsUser(&model.Post{
UserId: th.BasicUser.Id,
ChannelId: th.BasicChannel.Id,
Message: "plugin delayed",
PendingPostId: pendingPostId,
}, false)
require.NotNil(t, err)
require.Equal(t, "api.post.deduplicate_create_post.pending", err.Id)
require.Nil(t, duplicatePost)
// Wait for the first CreatePost to finish to ensure assertions are made.
wg.Wait()
})
t.Run("duplicate create post after cache expires is not idempotent", func(t *testing.T) {
pendingPostId := model.NewId()
post, err := th.App.CreatePostAsUser(&model.Post{
UserId: th.BasicUser.Id,
ChannelId: th.BasicChannel.Id,
Message: "message",
PendingPostId: pendingPostId,
}, false)
require.Nil(t, err)
require.Equal(t, "message", post.Message)
time.Sleep(PENDING_POST_IDS_CACHE_TTL)
duplicatePost, err := th.App.CreatePostAsUser(&model.Post{
UserId: th.BasicUser.Id,
ChannelId: th.BasicChannel.Id,
Message: "message",
PendingPostId: pendingPostId,
}, false)
require.Nil(t, err)
require.NotEqual(t, post.Id, duplicatePost.Id, "should have created new post id")
require.Equal(t, "message", duplicatePost.Message)
})
}
func TestUpdatePostEditAt(t *testing.T) {
th := Setup().InitBasic()
defer th.TearDown()

View File

@@ -91,6 +91,7 @@ type Server struct {
htmlTemplateWatcher *utils.HTMLTemplateWatcher
sessionCache *utils.Cache
seenPendingPostIdsCache *utils.Cache
configListenerId string
licenseListenerId string
logListenerId string
@@ -128,13 +129,14 @@ func NewServer(options ...Option) (*Server, error) {
rootRouter := mux.NewRouter()
s := &Server{
goroutineExitSignal: make(chan struct{}, 1),
RootRouter: rootRouter,
configFile: "config.json",
configListeners: make(map[string]func(*model.Config, *model.Config)),
licenseListeners: map[string]func(){},
sessionCache: utils.NewLru(model.SESSION_CACHE_SIZE),
clientConfig: make(map[string]string),
goroutineExitSignal: make(chan struct{}, 1),
RootRouter: rootRouter,
configFile: "config.json",
configListeners: make(map[string]func(*model.Config, *model.Config)),
licenseListeners: map[string]func(){},
sessionCache: utils.NewLru(model.SESSION_CACHE_SIZE),
seenPendingPostIdsCache: utils.NewLru(PENDING_POST_IDS_CACHE_SIZE),
clientConfig: make(map[string]string),
}
for _, option := range options {
option(s)

View File

@@ -1448,6 +1448,14 @@
"id": "api.post.create_webhook_post.creating.app_error",
"translation": "Error creating post"
},
{
"id": "api.post.deduplicate_create_post.failed_to_get",
"translation": "Failed to fetch original post after deduplicating a client repeating the same request."
},
{
"id": "api.post.deduplicate_create_post.pending",
"translation": "Rejected post since another client is making the same request."
},
{
"id": "api.post.disabled_all",
"translation": "@all has been disabled because the channel has more than {{.Users}} users."

View File

@@ -33,6 +33,18 @@ type LocalCacheSupplier struct {
cluster einterfaces.ClusterInterface
}
// Caching Interface
type ObjectCache interface {
AddWithExpiresInSecs(key, value interface{}, expireAtSecs int64)
AddWithDefaultExpires(key, value interface{})
Purge()
Get(key interface{}) (value interface{}, ok bool)
Remove(key interface{})
Len() int
Name() string
GetInvalidateClusterEvent() string
}
func NewLocalCacheSupplier(metrics einterfaces.MetricsInterface, cluster einterfaces.ClusterInterface) *LocalCacheSupplier {
supplier := &LocalCacheSupplier{
reactionCache: utils.NewLruWithParams(REACTION_CACHE_SIZE, "Reaction", REACTION_CACHE_SEC, model.CLUSTER_EVENT_INVALIDATE_CACHE_FOR_REACTIONS),
@@ -58,7 +70,7 @@ func (s *LocalCacheSupplier) Next() LayeredStoreSupplier {
return s.next
}
func (s *LocalCacheSupplier) doStandardReadCache(ctx context.Context, cache utils.ObjectCache, key string, hints ...LayeredStoreHint) *LayeredStoreSupplierResult {
func (s *LocalCacheSupplier) doStandardReadCache(ctx context.Context, cache ObjectCache, key string, hints ...LayeredStoreHint) *LayeredStoreSupplierResult {
if hintsContains(hints, LSH_NO_CACHE) {
if s.metrics != nil {
s.metrics.IncrementMemCacheMissCounter(cache.Name())
@@ -82,13 +94,13 @@ func (s *LocalCacheSupplier) doStandardReadCache(ctx context.Context, cache util
return nil
}
func (s *LocalCacheSupplier) doStandardAddToCache(ctx context.Context, cache utils.ObjectCache, key string, result *LayeredStoreSupplierResult, hints ...LayeredStoreHint) {
func (s *LocalCacheSupplier) doStandardAddToCache(ctx context.Context, cache ObjectCache, key string, result *LayeredStoreSupplierResult, hints ...LayeredStoreHint) {
if result.Err == nil && result.Data != nil {
cache.AddWithDefaultExpires(key, result.Data)
}
}
func (s *LocalCacheSupplier) doInvalidateCacheCluster(cache utils.ObjectCache, key string) {
func (s *LocalCacheSupplier) doInvalidateCacheCluster(cache ObjectCache, key string) {
cache.Remove(key)
if s.cluster != nil {
msg := &model.ClusterMessage{
@@ -100,7 +112,7 @@ func (s *LocalCacheSupplier) doInvalidateCacheCluster(cache utils.ObjectCache, k
}
}
func (s *LocalCacheSupplier) doClearCacheCluster(cache utils.ObjectCache) {
func (s *LocalCacheSupplier) doClearCacheCluster(cache ObjectCache) {
cache.Purge()
if s.cluster != nil {
msg := &model.ClusterMessage{

View File

@@ -13,18 +13,6 @@ import (
"time"
)
// Caching Interface
type ObjectCache interface {
AddWithExpiresInSecs(key, value interface{}, expireAtSecs int64)
AddWithDefaultExpires(key, value interface{})
Purge()
Get(key interface{}) (value interface{}, ok bool)
Remove(key interface{})
Len() int
Name() string
GetInvalidateClusterEvent() string
}
// Cache is a thread-safe fixed size LRU cache.
type Cache struct {
size int
@@ -38,15 +26,15 @@ type Cache struct {
len int
}
// entry is used to hold a value in the evictList
// entry is used to hold a value in the evictList.
type entry struct {
key interface{}
value interface{}
expireAtSecs int64
generation int64
key interface{}
value interface{}
expires time.Time
generation int64
}
// New creates an LRU of the given size
// New creates an LRU of the given size.
func NewLru(size int) *Cache {
return &Cache{
size: size,
@@ -55,6 +43,7 @@ func NewLru(size int) *Cache {
}
}
// New creates an LRU with the given parameters.
func NewLruWithParams(size int, name string, defaultExpiry int64, invalidateClusterEvent string) *Cache {
lru := NewLru(size)
lru.name = name
@@ -63,7 +52,7 @@ func NewLruWithParams(size int, name string, defaultExpiry int64, invalidateClus
return lru
}
// Purge is used to completely clear the cache
// Purge is used to completely clear the cache.
func (c *Cache) Purge() {
c.lock.Lock()
defer c.lock.Unlock()
@@ -72,28 +61,36 @@ func (c *Cache) Purge() {
c.currentGeneration++
}
// Add adds the given key and value to the store without an expiry.
func (c *Cache) Add(key, value interface{}) {
c.AddWithExpiresInSecs(key, value, 0)
}
// Add adds the given key and value to the store with the default expiry.
func (c *Cache) AddWithDefaultExpires(key, value interface{}) {
c.AddWithExpiresInSecs(key, value, c.defaultExpiry)
}
// AddWithExpiresInSecs adds the given key and value to the cache with the given expiry.
func (c *Cache) AddWithExpiresInSecs(key, value interface{}, expireAtSecs int64) {
c.lock.Lock()
defer c.lock.Unlock()
if expireAtSecs > 0 {
expireAtSecs = (time.Now().UnixNano() / int64(time.Second)) + expireAtSecs
c.add(key, value, time.Duration(expireAtSecs)*time.Second)
}
func (c *Cache) add(key, value interface{}, ttl time.Duration) {
var expires time.Time
if ttl > 0 {
expires = time.Now().Add(ttl)
}
// Check for existing item
// Check for existing item, ignoring expiry since we'd update anyway.
if ent, ok := c.items[key]; ok {
c.evictList.MoveToFront(ent)
e := ent.Value.(*entry)
e.value = value
e.expireAtSecs = expireAtSecs
e.expires = expires
if e.generation != c.currentGeneration {
e.generation = c.currentGeneration
c.len++
@@ -102,7 +99,7 @@ func (c *Cache) AddWithExpiresInSecs(key, value interface{}, expireAtSecs int64)
}
// Add new item
ent := &entry{key, value, expireAtSecs, c.currentGeneration}
ent := &entry{key, value, expires, c.currentGeneration}
entry := c.evictList.PushFront(ent)
c.items[key] = entry
c.len++
@@ -112,14 +109,19 @@ func (c *Cache) AddWithExpiresInSecs(key, value interface{}, expireAtSecs int64)
}
}
// Get returns the value stored in the cache for a key, or nil if no value is present. The ok result indicates whether value was found in the cache.
func (c *Cache) Get(key interface{}) (value interface{}, ok bool) {
c.lock.Lock()
defer c.lock.Unlock()
return c.getValue(key)
}
func (c *Cache) getValue(key interface{}) (value interface{}, ok bool) {
if ent, ok := c.items[key]; ok {
e := ent.Value.(*entry)
if e.generation != c.currentGeneration || (e.expireAtSecs > 0 && (time.Now().UnixNano()/int64(time.Second)) > e.expireAtSecs) {
if e.generation != c.currentGeneration || (!e.expires.IsZero() && time.Now().After(e.expires)) {
c.removeElement(ent)
return nil, false
}
@@ -131,6 +133,23 @@ func (c *Cache) Get(key interface{}) (value interface{}, ok bool) {
return nil, false
}
// GetOrAdd returns the existing value for the key if present. Otherwise, it stores and returns the given value. The loaded result is true if the value was loaded, false if stored.
// This API intentionally deviates from the Add-only variants above for simplicity. We should simplify the entire API in the future.
func (c *Cache) GetOrAdd(key, value interface{}, ttl time.Duration) (actual interface{}, loaded bool) {
c.lock.Lock()
defer c.lock.Unlock()
// Check for existing item
if value, ok := c.getValue(key); ok {
return value, true
}
c.add(key, value, ttl)
return value, false
}
// Remove deletes the value for a key.
func (c *Cache) Remove(key interface{}) {
c.lock.Lock()
defer c.lock.Unlock()
@@ -165,15 +184,16 @@ func (c *Cache) Len() int {
return c.len
}
// Name identifies this cache instance among others in the system.
func (c *Cache) Name() string {
return c.name
}
// GetInvalidateClusterEvent returns the cluster event configured when this cache was created.
func (c *Cache) GetInvalidateClusterEvent() string {
return c.invalidateClusterEvent
}
// removeElement is used to remove a given list element from the cache
func (c *Cache) removeElement(e *list.Element) {
c.evictList.Remove(e)
kv := e.Value.(*entry)

View File

@@ -7,8 +7,12 @@
package utils
import "testing"
import "time"
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
)
func TestLRU(t *testing.T) {
l := NewLru(128)
@@ -79,3 +83,51 @@ func TestLRUExpire(t *testing.T) {
t.Fatal("should exist")
}
}
func TestLRUGetOrAdd(t *testing.T) {
l := NewLru(128)
// First GetOrAdd should save
value, loaded := l.GetOrAdd(1, 1, 0)
assert.Equal(t, 1, value)
assert.False(t, loaded)
// Second GetOrAdd should load original value, ignoring new value
value, loaded = l.GetOrAdd(1, 10, 0)
assert.Equal(t, 1, value)
assert.True(t, loaded)
// Third GetOrAdd should still load original value
value, loaded = l.GetOrAdd(1, 1, 0)
assert.Equal(t, 1, value)
assert.True(t, loaded)
// First GetOrAdd on a new key should save
value, loaded = l.GetOrAdd(2, 2, 0)
assert.Equal(t, 2, value)
assert.False(t, loaded)
l.Remove(1)
// GetOrAdd after a remove should save
value, loaded = l.GetOrAdd(1, 10, 0)
assert.Equal(t, 10, value)
assert.False(t, loaded)
// GetOrAdd after another key was removed should load original value for key
value, loaded = l.GetOrAdd(2, 2, 0)
assert.Equal(t, 2, value)
assert.True(t, loaded)
// GetOrAdd should expire
value, loaded = l.GetOrAdd(3, 3, 500*time.Millisecond)
assert.Equal(t, 3, value)
assert.False(t, loaded)
value, loaded = l.GetOrAdd(3, 4, 500*time.Millisecond)
assert.Equal(t, 3, value)
assert.True(t, loaded)
time.Sleep(1 * time.Second)
value, loaded = l.GetOrAdd(3, 5, 500*time.Millisecond)
assert.Equal(t, 5, value)
assert.False(t, loaded)
}