diff --git a/app/plugin_hooks_test.go b/app/plugin_hooks_test.go index 77391a2c4e..72b0045839 100644 --- a/app/plugin_hooks_test.go +++ b/app/plugin_hooks_test.go @@ -32,7 +32,7 @@ func compileGo(t *testing.T, sourceCode, outputPath string) { cmd.Dir = dir cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr - require.NoError(t, cmd.Run()) + require.NoError(t, cmd.Run(), "failed to compile go") } func SetAppEnvironmentWithPlugins(t *testing.T, pluginCode []string, app *App, apiFunc func(*model.Manifest) plugin.API) (func(), []string, []error) { diff --git a/app/post.go b/app/post.go index 403873feba..a6a67d1f25 100644 --- a/app/post.go +++ b/app/post.go @@ -11,6 +11,7 @@ import ( "fmt" "net/http" "strings" + "time" "github.com/mattermost/mattermost-server/mlog" "github.com/mattermost/mattermost-server/model" @@ -19,6 +20,11 @@ import ( "github.com/mattermost/mattermost-server/utils" ) +const ( + PENDING_POST_IDS_CACHE_SIZE = 25000 + PENDING_POST_IDS_CACHE_TTL = 30 * time.Second +) + func (a *App) CreatePostAsUser(post *model.Post, clearPushNotifications bool) (*model.Post, *model.AppError) { // Check that channel has not been deleted result := <-a.Srv.Store.Channel().Get(post.ChannelId, true) @@ -89,7 +95,69 @@ func (a *App) CreatePostMissingChannel(post *model.Post, triggerWebhooks bool) ( return a.CreatePost(post, channel, triggerWebhooks) } -func (a *App) CreatePost(post *model.Post, channel *model.Channel, triggerWebhooks bool) (*model.Post, *model.AppError) { +// deduplicateCreatePost attempts to make posting idempotent within a caching window. +func (a *App) deduplicateCreatePost(post *model.Post) (foundPost *model.Post, err *model.AppError) { + // We rely on the client sending the pending post id across "duplicate" requests. If there + // isn't one, we can't deduplicate, so allow creation normally. + if post.PendingPostId == "" { + return nil, nil + } + + const unknownPostId = "" + + // Query the cache atomically for the given pending post id, saving a record if + // it hasn't previously been seen. + value, loaded := a.Srv.seenPendingPostIdsCache.GetOrAdd(post.PendingPostId, unknownPostId, PENDING_POST_IDS_CACHE_TTL) + + // If we were the first thread to save this pending post id into the cache, + // proceed with create post normally. + if !loaded { + return nil, nil + } + + postId := value.(string) + + // If another thread saved the cache record, but hasn't yet updated it with the actual post + // id (because it's still saving), notify the client with an error. Ideally, we'd wait + // for the other thread, but coordinating that adds complexity to the happy path. + if postId == unknownPostId { + return nil, model.NewAppError("deduplicateCreatePost", "api.post.deduplicate_create_post.pending", nil, "", http.StatusInternalServerError) + } + + // If the other thread finished creating the post, return the created post back to the + // client, making the API call feel idempotent. + actualPost, err := a.GetSinglePost(postId) + if err != nil { + return nil, model.NewAppError("deduplicateCreatePost", "api.post.deduplicate_create_post.failed_to_get", nil, err.Error(), http.StatusInternalServerError) + } + + mlog.Debug("Deduplicated create post", mlog.String("post_id", actualPost.Id), mlog.String("pending_post_id", post.PendingPostId)) + + return actualPost, nil +} + +func (a *App) CreatePost(post *model.Post, channel *model.Channel, triggerWebhooks bool) (savedPost *model.Post, err *model.AppError) { + if foundPost, err := a.deduplicateCreatePost(post); err != nil { + return nil, err + } else if foundPost != nil { + return foundPost, nil + } + + // If we get this far, we've recorded the client-provided pending post id to the cache. + // Remove it if we fail below, allowing a proper retry by the client. + defer func() { + if post.PendingPostId == "" { + return + } + + if err != nil { + a.Srv.seenPendingPostIdsCache.Remove(post.PendingPostId) + return + } + + a.Srv.seenPendingPostIdsCache.AddWithExpiresInSecs(post.PendingPostId, savedPost.Id, int64(PENDING_POST_IDS_CACHE_TTL.Seconds())) + }() + post.SanitizeProps() var pchan store.StoreChannel @@ -179,6 +247,10 @@ func (a *App) CreatePost(post *model.Post, channel *model.Channel, triggerWebhoo } rpost := result.Data.(*model.Post) + // Update the mapping from pending post id to the actual post id, for any clients that + // might be duplicating requests. + a.Srv.seenPendingPostIdsCache.AddWithExpiresInSecs(post.PendingPostId, rpost.Id, int64(PENDING_POST_IDS_CACHE_TTL.Seconds())) + if pluginsEnvironment := a.GetPluginsEnvironment(); pluginsEnvironment != nil { a.Srv.Go(func() { pluginContext := a.PluginContext() @@ -220,7 +292,7 @@ func (a *App) CreatePost(post *model.Post, channel *model.Channel, triggerWebhoo rpost = a.PreparePostForClient(rpost) if err := a.handlePostEvents(rpost, user, channel, triggerWebhooks, parentPostList); err != nil { - return nil, err + mlog.Error("Failed to handle post events", mlog.Err(err)) } return rpost, nil diff --git a/app/post_test.go b/app/post_test.go index 5d605bc238..f39997b17b 100644 --- a/app/post_test.go +++ b/app/post_test.go @@ -6,6 +6,7 @@ package app import ( "fmt" "net/http" + "sync" "sync/atomic" "testing" "time" @@ -18,6 +19,174 @@ import ( "github.com/mattermost/mattermost-server/store/storetest" ) +func TestCreatePostDeduplicate(t *testing.T) { + th := Setup().InitBasic() + defer th.TearDown() + + t.Run("duplicate create post is idempotent", func(t *testing.T) { + pendingPostId := model.NewId() + post, err := th.App.CreatePostAsUser(&model.Post{ + UserId: th.BasicUser.Id, + ChannelId: th.BasicChannel.Id, + Message: "message", + PendingPostId: pendingPostId, + }, false) + require.Nil(t, err) + require.Equal(t, "message", post.Message) + + duplicatePost, err := th.App.CreatePostAsUser(&model.Post{ + UserId: th.BasicUser.Id, + ChannelId: th.BasicChannel.Id, + Message: "message", + PendingPostId: pendingPostId, + }, false) + require.Nil(t, err) + require.Equal(t, post.Id, duplicatePost.Id, "should have returned previously created post id") + require.Equal(t, "message", duplicatePost.Message) + }) + + t.Run("post rejected by plugin leaves cache ready for non-deduplicated try", func(t *testing.T) { + setupPluginApiTest(t, ` + package main + + import ( + "github.com/mattermost/mattermost-server/plugin" + "github.com/mattermost/mattermost-server/model" + ) + + type MyPlugin struct { + plugin.MattermostPlugin + allow bool + } + + func (p *MyPlugin) MessageWillBePosted(c *plugin.Context, post *model.Post) (*model.Post, string) { + if !p.allow { + p.allow = true + return nil, "rejected" + } + + return nil, "" + } + + func main() { + plugin.ClientMain(&MyPlugin{}) + } + `, `{"id": "testrejectfirstpost", "backend": {"executable": "backend.exe"}}`, "testrejectfirstpost", th.App) + + pendingPostId := model.NewId() + post, err := th.App.CreatePostAsUser(&model.Post{ + UserId: th.BasicUser.Id, + ChannelId: th.BasicChannel.Id, + Message: "message", + PendingPostId: pendingPostId, + }, false) + require.NotNil(t, err) + require.Equal(t, "Post rejected by plugin. rejected", err.Id) + require.Nil(t, post) + + duplicatePost, err := th.App.CreatePostAsUser(&model.Post{ + UserId: th.BasicUser.Id, + ChannelId: th.BasicChannel.Id, + Message: "message", + PendingPostId: pendingPostId, + }, false) + require.Nil(t, err) + require.Equal(t, "message", duplicatePost.Message) + }) + + t.Run("slow posting after cache entry blocks duplicate request", func(t *testing.T) { + setupPluginApiTest(t, ` + package main + + import ( + "github.com/mattermost/mattermost-server/plugin" + "github.com/mattermost/mattermost-server/model" + "time" + ) + + type MyPlugin struct { + plugin.MattermostPlugin + instant bool + } + + func (p *MyPlugin) MessageWillBePosted(c *plugin.Context, post *model.Post) (*model.Post, string) { + if !p.instant { + p.instant = true + time.Sleep(3 * time.Second) + } + + return nil, "" + } + + func main() { + plugin.ClientMain(&MyPlugin{}) + } + `, `{"id": "testdelayfirstpost", "backend": {"executable": "backend.exe"}}`, "testdelayfirstpost", th.App) + + var post *model.Post + pendingPostId := model.NewId() + + wg := sync.WaitGroup{} + + // Launch a goroutine to make the first CreatePost call that will get delayed + // by the plugin above. + wg.Add(1) + go func() { + defer wg.Done() + var err error + post, err = th.App.CreatePostAsUser(&model.Post{ + UserId: th.BasicUser.Id, + ChannelId: th.BasicChannel.Id, + Message: "plugin delayed", + PendingPostId: pendingPostId, + }, false) + require.Nil(t, err) + require.Equal(t, post.Message, "plugin delayed") + }() + + // Give the goroutine above a chance to start and get delayed by the plugin. + time.Sleep(2 * time.Second) + + // Try creating a duplicate post + duplicatePost, err := th.App.CreatePostAsUser(&model.Post{ + UserId: th.BasicUser.Id, + ChannelId: th.BasicChannel.Id, + Message: "plugin delayed", + PendingPostId: pendingPostId, + }, false) + require.NotNil(t, err) + require.Equal(t, "api.post.deduplicate_create_post.pending", err.Id) + require.Nil(t, duplicatePost) + + // Wait for the first CreatePost to finish to ensure assertions are made. + wg.Wait() + }) + + t.Run("duplicate create post after cache expires is not idempotent", func(t *testing.T) { + pendingPostId := model.NewId() + post, err := th.App.CreatePostAsUser(&model.Post{ + UserId: th.BasicUser.Id, + ChannelId: th.BasicChannel.Id, + Message: "message", + PendingPostId: pendingPostId, + }, false) + require.Nil(t, err) + require.Equal(t, "message", post.Message) + + time.Sleep(PENDING_POST_IDS_CACHE_TTL) + + duplicatePost, err := th.App.CreatePostAsUser(&model.Post{ + UserId: th.BasicUser.Id, + ChannelId: th.BasicChannel.Id, + Message: "message", + PendingPostId: pendingPostId, + }, false) + require.Nil(t, err) + require.NotEqual(t, post.Id, duplicatePost.Id, "should have created new post id") + require.Equal(t, "message", duplicatePost.Message) + }) +} + func TestUpdatePostEditAt(t *testing.T) { th := Setup().InitBasic() defer th.TearDown() diff --git a/app/server.go b/app/server.go index 8684ef4bc5..205f98eae5 100644 --- a/app/server.go +++ b/app/server.go @@ -91,6 +91,7 @@ type Server struct { htmlTemplateWatcher *utils.HTMLTemplateWatcher sessionCache *utils.Cache + seenPendingPostIdsCache *utils.Cache configListenerId string licenseListenerId string logListenerId string @@ -128,13 +129,14 @@ func NewServer(options ...Option) (*Server, error) { rootRouter := mux.NewRouter() s := &Server{ - goroutineExitSignal: make(chan struct{}, 1), - RootRouter: rootRouter, - configFile: "config.json", - configListeners: make(map[string]func(*model.Config, *model.Config)), - licenseListeners: map[string]func(){}, - sessionCache: utils.NewLru(model.SESSION_CACHE_SIZE), - clientConfig: make(map[string]string), + goroutineExitSignal: make(chan struct{}, 1), + RootRouter: rootRouter, + configFile: "config.json", + configListeners: make(map[string]func(*model.Config, *model.Config)), + licenseListeners: map[string]func(){}, + sessionCache: utils.NewLru(model.SESSION_CACHE_SIZE), + seenPendingPostIdsCache: utils.NewLru(PENDING_POST_IDS_CACHE_SIZE), + clientConfig: make(map[string]string), } for _, option := range options { option(s) diff --git a/i18n/en.json b/i18n/en.json index a84627c693..6880266018 100644 --- a/i18n/en.json +++ b/i18n/en.json @@ -1448,6 +1448,14 @@ "id": "api.post.create_webhook_post.creating.app_error", "translation": "Error creating post" }, + { + "id": "api.post.deduplicate_create_post.failed_to_get", + "translation": "Failed to fetch original post after deduplicating a client repeating the same request." + }, + { + "id": "api.post.deduplicate_create_post.pending", + "translation": "Rejected post since another client is making the same request." + }, { "id": "api.post.disabled_all", "translation": "@all has been disabled because the channel has more than {{.Users}} users." diff --git a/store/local_cache_supplier.go b/store/local_cache_supplier.go index a27aa12af1..3f87e7c5f9 100644 --- a/store/local_cache_supplier.go +++ b/store/local_cache_supplier.go @@ -33,6 +33,18 @@ type LocalCacheSupplier struct { cluster einterfaces.ClusterInterface } +// Caching Interface +type ObjectCache interface { + AddWithExpiresInSecs(key, value interface{}, expireAtSecs int64) + AddWithDefaultExpires(key, value interface{}) + Purge() + Get(key interface{}) (value interface{}, ok bool) + Remove(key interface{}) + Len() int + Name() string + GetInvalidateClusterEvent() string +} + func NewLocalCacheSupplier(metrics einterfaces.MetricsInterface, cluster einterfaces.ClusterInterface) *LocalCacheSupplier { supplier := &LocalCacheSupplier{ reactionCache: utils.NewLruWithParams(REACTION_CACHE_SIZE, "Reaction", REACTION_CACHE_SEC, model.CLUSTER_EVENT_INVALIDATE_CACHE_FOR_REACTIONS), @@ -58,7 +70,7 @@ func (s *LocalCacheSupplier) Next() LayeredStoreSupplier { return s.next } -func (s *LocalCacheSupplier) doStandardReadCache(ctx context.Context, cache utils.ObjectCache, key string, hints ...LayeredStoreHint) *LayeredStoreSupplierResult { +func (s *LocalCacheSupplier) doStandardReadCache(ctx context.Context, cache ObjectCache, key string, hints ...LayeredStoreHint) *LayeredStoreSupplierResult { if hintsContains(hints, LSH_NO_CACHE) { if s.metrics != nil { s.metrics.IncrementMemCacheMissCounter(cache.Name()) @@ -82,13 +94,13 @@ func (s *LocalCacheSupplier) doStandardReadCache(ctx context.Context, cache util return nil } -func (s *LocalCacheSupplier) doStandardAddToCache(ctx context.Context, cache utils.ObjectCache, key string, result *LayeredStoreSupplierResult, hints ...LayeredStoreHint) { +func (s *LocalCacheSupplier) doStandardAddToCache(ctx context.Context, cache ObjectCache, key string, result *LayeredStoreSupplierResult, hints ...LayeredStoreHint) { if result.Err == nil && result.Data != nil { cache.AddWithDefaultExpires(key, result.Data) } } -func (s *LocalCacheSupplier) doInvalidateCacheCluster(cache utils.ObjectCache, key string) { +func (s *LocalCacheSupplier) doInvalidateCacheCluster(cache ObjectCache, key string) { cache.Remove(key) if s.cluster != nil { msg := &model.ClusterMessage{ @@ -100,7 +112,7 @@ func (s *LocalCacheSupplier) doInvalidateCacheCluster(cache utils.ObjectCache, k } } -func (s *LocalCacheSupplier) doClearCacheCluster(cache utils.ObjectCache) { +func (s *LocalCacheSupplier) doClearCacheCluster(cache ObjectCache) { cache.Purge() if s.cluster != nil { msg := &model.ClusterMessage{ diff --git a/utils/lru.go b/utils/lru.go index 8e896a6dc3..37e0b6e64d 100644 --- a/utils/lru.go +++ b/utils/lru.go @@ -13,18 +13,6 @@ import ( "time" ) -// Caching Interface -type ObjectCache interface { - AddWithExpiresInSecs(key, value interface{}, expireAtSecs int64) - AddWithDefaultExpires(key, value interface{}) - Purge() - Get(key interface{}) (value interface{}, ok bool) - Remove(key interface{}) - Len() int - Name() string - GetInvalidateClusterEvent() string -} - // Cache is a thread-safe fixed size LRU cache. type Cache struct { size int @@ -38,15 +26,15 @@ type Cache struct { len int } -// entry is used to hold a value in the evictList +// entry is used to hold a value in the evictList. type entry struct { - key interface{} - value interface{} - expireAtSecs int64 - generation int64 + key interface{} + value interface{} + expires time.Time + generation int64 } -// New creates an LRU of the given size +// New creates an LRU of the given size. func NewLru(size int) *Cache { return &Cache{ size: size, @@ -55,6 +43,7 @@ func NewLru(size int) *Cache { } } +// New creates an LRU with the given parameters. func NewLruWithParams(size int, name string, defaultExpiry int64, invalidateClusterEvent string) *Cache { lru := NewLru(size) lru.name = name @@ -63,7 +52,7 @@ func NewLruWithParams(size int, name string, defaultExpiry int64, invalidateClus return lru } -// Purge is used to completely clear the cache +// Purge is used to completely clear the cache. func (c *Cache) Purge() { c.lock.Lock() defer c.lock.Unlock() @@ -72,28 +61,36 @@ func (c *Cache) Purge() { c.currentGeneration++ } +// Add adds the given key and value to the store without an expiry. func (c *Cache) Add(key, value interface{}) { c.AddWithExpiresInSecs(key, value, 0) } +// Add adds the given key and value to the store with the default expiry. func (c *Cache) AddWithDefaultExpires(key, value interface{}) { c.AddWithExpiresInSecs(key, value, c.defaultExpiry) } +// AddWithExpiresInSecs adds the given key and value to the cache with the given expiry. func (c *Cache) AddWithExpiresInSecs(key, value interface{}, expireAtSecs int64) { c.lock.Lock() defer c.lock.Unlock() - if expireAtSecs > 0 { - expireAtSecs = (time.Now().UnixNano() / int64(time.Second)) + expireAtSecs + c.add(key, value, time.Duration(expireAtSecs)*time.Second) +} + +func (c *Cache) add(key, value interface{}, ttl time.Duration) { + var expires time.Time + if ttl > 0 { + expires = time.Now().Add(ttl) } - // Check for existing item + // Check for existing item, ignoring expiry since we'd update anyway. if ent, ok := c.items[key]; ok { c.evictList.MoveToFront(ent) e := ent.Value.(*entry) e.value = value - e.expireAtSecs = expireAtSecs + e.expires = expires if e.generation != c.currentGeneration { e.generation = c.currentGeneration c.len++ @@ -102,7 +99,7 @@ func (c *Cache) AddWithExpiresInSecs(key, value interface{}, expireAtSecs int64) } // Add new item - ent := &entry{key, value, expireAtSecs, c.currentGeneration} + ent := &entry{key, value, expires, c.currentGeneration} entry := c.evictList.PushFront(ent) c.items[key] = entry c.len++ @@ -112,14 +109,19 @@ func (c *Cache) AddWithExpiresInSecs(key, value interface{}, expireAtSecs int64) } } +// Get returns the value stored in the cache for a key, or nil if no value is present. The ok result indicates whether value was found in the cache. func (c *Cache) Get(key interface{}) (value interface{}, ok bool) { c.lock.Lock() defer c.lock.Unlock() + return c.getValue(key) +} + +func (c *Cache) getValue(key interface{}) (value interface{}, ok bool) { if ent, ok := c.items[key]; ok { e := ent.Value.(*entry) - if e.generation != c.currentGeneration || (e.expireAtSecs > 0 && (time.Now().UnixNano()/int64(time.Second)) > e.expireAtSecs) { + if e.generation != c.currentGeneration || (!e.expires.IsZero() && time.Now().After(e.expires)) { c.removeElement(ent) return nil, false } @@ -131,6 +133,23 @@ func (c *Cache) Get(key interface{}) (value interface{}, ok bool) { return nil, false } +// GetOrAdd returns the existing value for the key if present. Otherwise, it stores and returns the given value. The loaded result is true if the value was loaded, false if stored. +// This API intentionally deviates from the Add-only variants above for simplicity. We should simplify the entire API in the future. +func (c *Cache) GetOrAdd(key, value interface{}, ttl time.Duration) (actual interface{}, loaded bool) { + c.lock.Lock() + defer c.lock.Unlock() + + // Check for existing item + if value, ok := c.getValue(key); ok { + return value, true + } + + c.add(key, value, ttl) + + return value, false +} + +// Remove deletes the value for a key. func (c *Cache) Remove(key interface{}) { c.lock.Lock() defer c.lock.Unlock() @@ -165,15 +184,16 @@ func (c *Cache) Len() int { return c.len } +// Name identifies this cache instance among others in the system. func (c *Cache) Name() string { return c.name } +// GetInvalidateClusterEvent returns the cluster event configured when this cache was created. func (c *Cache) GetInvalidateClusterEvent() string { return c.invalidateClusterEvent } -// removeElement is used to remove a given list element from the cache func (c *Cache) removeElement(e *list.Element) { c.evictList.Remove(e) kv := e.Value.(*entry) diff --git a/utils/lru_test.go b/utils/lru_test.go index 4312515b97..89cff52fdf 100644 --- a/utils/lru_test.go +++ b/utils/lru_test.go @@ -7,8 +7,12 @@ package utils -import "testing" -import "time" +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" +) func TestLRU(t *testing.T) { l := NewLru(128) @@ -79,3 +83,51 @@ func TestLRUExpire(t *testing.T) { t.Fatal("should exist") } } + +func TestLRUGetOrAdd(t *testing.T) { + l := NewLru(128) + + // First GetOrAdd should save + value, loaded := l.GetOrAdd(1, 1, 0) + assert.Equal(t, 1, value) + assert.False(t, loaded) + + // Second GetOrAdd should load original value, ignoring new value + value, loaded = l.GetOrAdd(1, 10, 0) + assert.Equal(t, 1, value) + assert.True(t, loaded) + + // Third GetOrAdd should still load original value + value, loaded = l.GetOrAdd(1, 1, 0) + assert.Equal(t, 1, value) + assert.True(t, loaded) + + // First GetOrAdd on a new key should save + value, loaded = l.GetOrAdd(2, 2, 0) + assert.Equal(t, 2, value) + assert.False(t, loaded) + + l.Remove(1) + + // GetOrAdd after a remove should save + value, loaded = l.GetOrAdd(1, 10, 0) + assert.Equal(t, 10, value) + assert.False(t, loaded) + + // GetOrAdd after another key was removed should load original value for key + value, loaded = l.GetOrAdd(2, 2, 0) + assert.Equal(t, 2, value) + assert.True(t, loaded) + + // GetOrAdd should expire + value, loaded = l.GetOrAdd(3, 3, 500*time.Millisecond) + assert.Equal(t, 3, value) + assert.False(t, loaded) + value, loaded = l.GetOrAdd(3, 4, 500*time.Millisecond) + assert.Equal(t, 3, value) + assert.True(t, loaded) + time.Sleep(1 * time.Second) + value, loaded = l.GetOrAdd(3, 5, 500*time.Millisecond) + assert.Equal(t, 5, value) + assert.False(t, loaded) +}