From 320262c3db2f1c294355c7900480972d782f3efa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-Philippe=20Qu=C3=A9m=C3=A9ner?= Date: Wed, 20 Jul 2022 16:54:18 +0200 Subject: [PATCH] Alerting: Cleanup the `alert_configuration` table on write (#51497) --- pkg/services/ngalert/store/alertmanager.go | 47 ++++++- .../ngalert/store/alertmanager_test.go | 132 ++++++++++++++++-- 2 files changed, 164 insertions(+), 15 deletions(-) diff --git a/pkg/services/ngalert/store/alertmanager.go b/pkg/services/ngalert/store/alertmanager.go index c23fbf7254b..6f97df9fd59 100644 --- a/pkg/services/ngalert/store/alertmanager.go +++ b/pkg/services/ngalert/store/alertmanager.go @@ -19,6 +19,10 @@ var ( // ErrVersionLockedObjectNotFound is returned when an object is not // found using the current hash. ErrVersionLockedObjectNotFound = fmt.Errorf("could not find object using provided id and hash") + // ConfigRecordsLimit defines the limit of how many alertmanager configuration versions + // should be stored in the database for each organization including the current one. + // Has to be > 0 + ConfigRecordsLimit int64 = 100 ) // GetLatestAlertmanagerConfiguration returns the lastest version of the alertmanager configuration. @@ -78,7 +82,9 @@ func (st DBstore) SaveAlertmanagerConfigurationWithCallback(ctx context.Context, if _, err := sess.Insert(config); err != nil { return err } - + if _, err := st.deleteOldConfigurations(ctx, cmd.OrgID, ConfigRecordsLimit); err != nil { + st.Logger.Warn("failed to delete old am configs", "org", cmd.OrgID, "err", err) + } if err := callback(); err != nil { return err } @@ -118,6 +124,9 @@ func (st *DBstore) UpdateAlertmanagerConfiguration(ctx context.Context, cmd *mod if rows == 0 { return ErrVersionLockedObjectNotFound } + if _, err := st.deleteOldConfigurations(ctx, cmd.OrgID, ConfigRecordsLimit); err != nil { + st.Logger.Warn("failed to delete old am configs", "org", cmd.OrgID, "err", err) + } return err }) } @@ -196,3 +205,39 @@ func getInsertQuery(driver string) string { )` } } + +func (st *DBstore) deleteOldConfigurations(ctx context.Context, orgID, limit int64) (int64, error) { + if limit < 1 { + return 0, fmt.Errorf("failed to delete old configurations: limit is set to '%d' but needs to be > 0", limit) + } + var affactedRows int64 + err := st.SQLStore.WithDbSession(ctx, func(sess *sqlstore.DBSession) error { + res, err := sess.Exec(` + DELETE FROM + alert_configuration + WHERE + org_id = ? + AND + id NOT IN ( + SELECT T.* FROM ( + SELECT id + FROM alert_configuration + WHERE org_id = ? ORDER BY id DESC LIMIT ? + )AS T + ) + `, orgID, orgID, limit) + if err != nil { + return err + } + rows, err := res.RowsAffected() + if err != nil { + return err + } + affactedRows = rows + if affactedRows > 0 { + st.Logger.Info("deleted old alert_configuration(s)", "org", orgID, "limit", limit, "delete_count", affactedRows) + } + return nil + }) + return affactedRows, err +} diff --git a/pkg/services/ngalert/store/alertmanager_test.go b/pkg/services/ngalert/store/alertmanager_test.go index b28d238bdfb..6d3284faead 100644 --- a/pkg/services/ngalert/store/alertmanager_test.go +++ b/pkg/services/ngalert/store/alertmanager_test.go @@ -6,6 +6,7 @@ import ( "fmt" "testing" + "github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/services/ngalert/models" "github.com/grafana/grafana/pkg/services/sqlstore" "github.com/stretchr/testify/require" @@ -18,20 +19,11 @@ func TestIntegrationAlertManagerHash(t *testing.T) { sqlStore := sqlstore.InitTestDB(t) store := &DBstore{ SQLStore: sqlStore, + Logger: log.NewNopLogger(), } - setupConfig := func(t *testing.T, config string) (string, string) { - config, configMD5 := config, fmt.Sprintf("%x", md5.Sum([]byte(config))) - err := store.SaveAlertmanagerConfiguration(context.Background(), &models.SaveAlertmanagerConfigurationCmd{ - AlertmanagerConfiguration: config, - ConfigurationVersion: "v1", - Default: false, - OrgID: 1, - }) - require.NoError(t, err) - return config, configMD5 - } + t.Run("After saving the DB should return the right hash", func(t *testing.T) { - _, configMD5 := setupConfig(t, "my-config") + _, configMD5 := setupConfig(t, "my-config", store) req := &models.GetLatestAlertmanagerConfigurationQuery{ OrgID: 1, } @@ -41,7 +33,7 @@ func TestIntegrationAlertManagerHash(t *testing.T) { }) t.Run("When passing the right hash the config should be updated", func(t *testing.T) { - _, configMD5 := setupConfig(t, "my-config") + _, configMD5 := setupConfig(t, "my-config", store) req := &models.GetLatestAlertmanagerConfigurationQuery{ OrgID: 1, } @@ -64,7 +56,7 @@ func TestIntegrationAlertManagerHash(t *testing.T) { }) t.Run("When passing the wrong hash the update should error", func(t *testing.T) { - config, configMD5 := setupConfig(t, "my-config") + config, configMD5 := setupConfig(t, "my-config", store) req := &models.GetLatestAlertmanagerConfigurationQuery{ OrgID: 1, } @@ -82,3 +74,115 @@ func TestIntegrationAlertManagerHash(t *testing.T) { require.EqualError(t, ErrVersionLockedObjectNotFound, err.Error()) }) } + +func TestIntegrationAlertManagerConfigCleanup(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + sqlStore := sqlstore.InitTestDB(t) + store := &DBstore{ + SQLStore: sqlStore, + Logger: log.NewNopLogger(), + } + t.Run("when calling the cleanup with less records than the limit all recrods should stay", func(t *testing.T) { + var orgID int64 = 3 + oldestConfig, _ := setupConfig(t, "oldest-record", store) + err := store.SaveAlertmanagerConfiguration(context.Background(), &models.SaveAlertmanagerConfigurationCmd{ + AlertmanagerConfiguration: oldestConfig, + ConfigurationVersion: "v1", + Default: false, + OrgID: orgID, + }) + require.NoError(t, err) + + olderConfig, _ := setupConfig(t, "older-record", store) + err = store.SaveAlertmanagerConfiguration(context.Background(), &models.SaveAlertmanagerConfigurationCmd{ + AlertmanagerConfiguration: olderConfig, + ConfigurationVersion: "v1", + Default: false, + OrgID: orgID, + }) + require.NoError(t, err) + + config, _ := setupConfig(t, "newest-record", store) + err = store.SaveAlertmanagerConfiguration(context.Background(), &models.SaveAlertmanagerConfigurationCmd{ + AlertmanagerConfiguration: config, + ConfigurationVersion: "v1", + Default: false, + OrgID: orgID, + }) + require.NoError(t, err) + + rowsAffacted, err := store.deleteOldConfigurations(context.Background(), orgID, 100) + require.Equal(t, int64(0), rowsAffacted) + require.NoError(t, err) + + req := &models.GetLatestAlertmanagerConfigurationQuery{ + OrgID: orgID, + } + err = store.GetLatestAlertmanagerConfiguration(context.Background(), req) + require.NoError(t, err) + require.Equal(t, "newest-record", req.Result.AlertmanagerConfiguration) + }) + t.Run("when calling the cleanup only the oldest records surpassing the limit should be deleted", func(t *testing.T) { + var orgID int64 = 2 + oldestConfig, _ := setupConfig(t, "oldest-record", store) + err := store.SaveAlertmanagerConfiguration(context.Background(), &models.SaveAlertmanagerConfigurationCmd{ + AlertmanagerConfiguration: oldestConfig, + ConfigurationVersion: "v1", + Default: false, + OrgID: orgID, + }) + require.NoError(t, err) + + olderConfig, _ := setupConfig(t, "older-record", store) + err = store.SaveAlertmanagerConfiguration(context.Background(), &models.SaveAlertmanagerConfigurationCmd{ + AlertmanagerConfiguration: olderConfig, + ConfigurationVersion: "v1", + Default: false, + OrgID: orgID, + }) + require.NoError(t, err) + + config, _ := setupConfig(t, "newest-record", store) + err = store.SaveAlertmanagerConfiguration(context.Background(), &models.SaveAlertmanagerConfigurationCmd{ + AlertmanagerConfiguration: config, + ConfigurationVersion: "v1", + Default: false, + OrgID: orgID, + }) + require.NoError(t, err) + + rowsAffacted, err := store.deleteOldConfigurations(context.Background(), orgID, 1) + require.Equal(t, int64(2), rowsAffacted) + require.NoError(t, err) + + req := &models.GetLatestAlertmanagerConfigurationQuery{ + OrgID: orgID, + } + err = store.GetLatestAlertmanagerConfiguration(context.Background(), req) + require.NoError(t, err) + require.Equal(t, "newest-record", req.Result.AlertmanagerConfiguration) + }) + t.Run("limit set to 0 should fail", func(t *testing.T) { + _, err := store.deleteOldConfigurations(context.Background(), 1, 0) + require.Error(t, err) + }) + t.Run("limit set to negative should fail", func(t *testing.T) { + _, err := store.deleteOldConfigurations(context.Background(), 1, -1) + require.Error(t, err) + }) +} + +func setupConfig(t *testing.T, config string, store *DBstore) (string, string) { + t.Helper() + config, configMD5 := config, fmt.Sprintf("%x", md5.Sum([]byte(config))) + err := store.SaveAlertmanagerConfiguration(context.Background(), &models.SaveAlertmanagerConfigurationCmd{ + AlertmanagerConfiguration: config, + ConfigurationVersion: "v1", + Default: false, + OrgID: 1, + }) + require.NoError(t, err) + return config, configMD5 +}