Alerting: Allow configuration of non-ready alertmanagers (#43063)

* Create API test for overwriting invalid alertmanager config

* Avoid requiring alertmanager readiness for config changes

* AlertmanagerSrv depends on functionality rather than concrete types

* Add test for non-ready alertmanagers

* Additional cleanup and polish

* Back out previous integration test changes

* Refactor of tests incorrectly caused a test to become redundant

* Use pre-existing fake secret service

* Drop unused interface

* Test against concrete MultiOrgAlertmanager re-using fake infra from other tests

* Fix linter error

* Empty commit to rerun checks
This commit is contained in:
Alexander Weaver 2021-12-27 17:01:17 -06:00 committed by GitHub
parent 9abdaf251f
commit 56b3dc5445
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 228 additions and 15 deletions

View File

@ -12,6 +12,7 @@ import (
"github.com/grafana/grafana/pkg/services/datasources"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/notifier"
"github.com/grafana/grafana/pkg/services/ngalert/schedule"
"github.com/grafana/grafana/pkg/services/ngalert/state"
@ -49,6 +50,10 @@ type Alertmanager interface {
TestReceivers(ctx context.Context, c apimodels.TestReceiversConfigBodyParams) (*notifier.TestReceiversResult, error)
}
type AlertingStore interface {
GetLatestAlertmanagerConfiguration(query *models.GetLatestAlertmanagerConfigurationQuery) error
}
// API handlers.
type API struct {
Cfg *setting.Cfg
@ -59,7 +64,7 @@ type API struct {
Schedule schedule.ScheduleService
RuleStore store.RuleStore
InstanceStore store.InstanceStore
AlertingStore store.AlertingStore
AlertingStore AlertingStore
AdminConfigStore store.AdminConfigurationStore
DataProxy *datasourceproxy.DataSourceProxyService
MultiOrgAlertmanager *notifier.MultiOrgAlertmanager

View File

@ -30,7 +30,7 @@ const (
type AlertmanagerSrv struct {
mam *notifier.MultiOrgAlertmanager
secrets secrets.Service
store store.AlertingStore
store AlertingStore
log log.Logger
}
@ -361,7 +361,10 @@ func (srv AlertmanagerSrv) RoutePostAlertingConfig(c *models.ReqContext, body ap
am, errResp := srv.AlertmanagerFor(c.OrgId)
if errResp != nil {
return errResp
// It's okay if the alertmanager isn't ready yet, we're changing its config anyway.
if !errors.Is(errResp.Err(), notifier.ErrAlertmanagerNotReady) {
return errResp
}
}
if err := am.SaveAndApplyConfig(&body); err != nil {
@ -517,11 +520,11 @@ func (srv AlertmanagerSrv) AlertmanagerFor(orgID int64) (Alertmanager, *response
}
if errors.Is(err, notifier.ErrNoAlertmanagerForOrg) {
return nil, response.Error(http.StatusNotFound, err.Error(), nil)
return nil, response.Error(http.StatusNotFound, err.Error(), err)
}
if errors.Is(err, notifier.ErrAlertmanagerNotReady) {
return nil, response.Error(http.StatusConflict, err.Error(), nil)
return am, response.Error(http.StatusConflict, err.Error(), err)
}
srv.log.Error("unable to obtain the org's Alertmanager", "err", err)

View File

@ -2,11 +2,22 @@ package api
import (
"context"
"io/ioutil"
"net/http"
"os"
"testing"
"time"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/models"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/notifier"
"github.com/grafana/grafana/pkg/services/secrets/fakes"
secretsManager "github.com/grafana/grafana/pkg/services/secrets/manager"
"github.com/grafana/grafana/pkg/setting"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/require"
)
@ -138,3 +149,148 @@ func TestStatusForTestReceivers(t *testing.T) {
}}))
})
}
func TestAlertmanagerConfig(t *testing.T) {
sut := createSut(t)
t.Run("assert 404 Not Found when applying config to nonexistent org", func(t *testing.T) {
rc := models.ReqContext{
SignedInUser: &models.SignedInUser{
OrgRole: models.ROLE_EDITOR,
OrgId: 12,
},
}
request := createAmConfigRequest(t)
response := sut.RoutePostAlertingConfig(&rc, request)
require.Equal(t, 404, response.Status())
require.Contains(t, string(response.Body()), "Alertmanager does not exist for this organization")
})
t.Run("assert 403 Forbidden when applying config while not Editor", func(t *testing.T) {
rc := models.ReqContext{
SignedInUser: &models.SignedInUser{
OrgRole: models.ROLE_VIEWER,
OrgId: 1,
},
}
request := createAmConfigRequest(t)
response := sut.RoutePostAlertingConfig(&rc, request)
require.Equal(t, 403, response.Status())
require.Contains(t, string(response.Body()), "permission denied")
})
t.Run("assert 202 when config successfully applied", func(t *testing.T) {
rc := models.ReqContext{
SignedInUser: &models.SignedInUser{
OrgRole: models.ROLE_EDITOR,
OrgId: 1,
},
}
request := createAmConfigRequest(t)
response := sut.RoutePostAlertingConfig(&rc, request)
require.Equal(t, 202, response.Status())
})
t.Run("assert 202 when alertmanager to configure is not ready", func(t *testing.T) {
sut := createSut(t)
rc := models.ReqContext{
SignedInUser: &models.SignedInUser{
OrgRole: models.ROLE_EDITOR,
OrgId: 3, // Org 3 was initialized with broken config.
},
}
request := createAmConfigRequest(t)
response := sut.RoutePostAlertingConfig(&rc, request)
require.Equal(t, 202, response.Status())
})
}
func createSut(t *testing.T) AlertmanagerSrv {
t.Helper()
mam := createMultiOrgAlertmanager(t)
store := newFakeAlertingStore(t)
store.Setup(1)
store.Setup(2)
store.Setup(3)
secrets := fakes.NewFakeSecretsService()
return AlertmanagerSrv{mam: mam, store: store, secrets: secrets}
}
func createAmConfigRequest(t *testing.T) apimodels.PostableUserConfig {
t.Helper()
request := apimodels.PostableUserConfig{}
err := request.UnmarshalJSON([]byte(validConfig))
require.NoError(t, err)
return request
}
func createMultiOrgAlertmanager(t *testing.T) *notifier.MultiOrgAlertmanager {
t.Helper()
configs := map[int64]*ngmodels.AlertConfiguration{
1: {AlertmanagerConfiguration: validConfig, OrgID: 1},
2: {AlertmanagerConfiguration: validConfig, OrgID: 2},
3: {AlertmanagerConfiguration: brokenConfig, OrgID: 3},
}
configStore := notifier.NewFakeConfigStore(t, configs)
orgStore := notifier.NewFakeOrgStore(t, []int64{1, 2, 3})
tmpDir, err := ioutil.TempDir("", "test")
require.NoError(t, err)
kvStore := notifier.NewFakeKVStore(t)
secretsService := secretsManager.SetupTestService(t, fakes.NewFakeSecretsStore())
reg := prometheus.NewPedanticRegistry()
m := metrics.NewNGAlert(reg)
decryptFn := secretsService.GetDecryptedValue
cfg := &setting.Cfg{
DataPath: tmpDir,
UnifiedAlerting: setting.UnifiedAlertingSettings{
AlertmanagerConfigPollInterval: 3 * time.Minute,
DefaultConfiguration: setting.GetAlertmanagerDefaultConfiguration(),
DisabledOrgs: map[int64]struct{}{5: {}},
}, // do not poll in tests.
}
mam, err := notifier.NewMultiOrgAlertmanager(cfg, &configStore, &orgStore, kvStore, decryptFn, m.GetMultiOrgAlertmanagerMetrics(), log.New("testlogger"))
require.NoError(t, err)
t.Cleanup(cleanOrgDirectories(tmpDir, t))
err = mam.LoadAndSyncAlertmanagersForOrgs(context.Background())
require.NoError(t, err)
return mam
}
func cleanOrgDirectories(path string, t *testing.T) func() {
return func() {
require.NoError(t, os.RemoveAll(path))
}
}
var validConfig = setting.GetAlertmanagerDefaultConfiguration()
var brokenConfig = `
"alertmanager_config": {
"route": {
"receiver": "grafana-default-email"
},
"receivers": [{
"name": "grafana-default-email",
"grafana_managed_receiver_configs": [{
"uid": "abc",
"name": "default-email",
"type": "email",
"isDefault": true,
"settings": {}
}]
}]
}
}`

View File

@ -0,0 +1,31 @@
package api
import (
"testing"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/store"
)
type FakeAlertingStore struct {
orgsWithConfig map[int64]bool
}
func newFakeAlertingStore(t *testing.T) FakeAlertingStore {
t.Helper()
return FakeAlertingStore{
orgsWithConfig: map[int64]bool{},
}
}
func (f FakeAlertingStore) Setup(orgID int64) {
f.orgsWithConfig[orgID] = true
}
func (f FakeAlertingStore) GetLatestAlertmanagerConfiguration(query *models.GetLatestAlertmanagerConfigurationQuery) error {
if _, ok := f.orgsWithConfig[query.OrgID]; ok {
return nil
}
return store.ErrNoAlertmanagerConfiguration
}

View File

@ -48,7 +48,7 @@ func setupAMTest(t *testing.T) *Alertmanager {
Logger: log.New("alertmanager-test"),
}
kvStore := newFakeKVStore(t)
kvStore := NewFakeKVStore(t)
secretsService := secretsManager.SetupTestService(t, database.ProvideSecretsStore(sqlStore))
decryptFn := secretsService.GetDecryptedValue
am, err := newAlertmanager(1, cfg, s, kvStore, &NilPeer{}, decryptFn, m)

View File

@ -11,7 +11,7 @@ import (
)
func TestFileStore_FilepathFor_DirectoryNotExist(t *testing.T) {
store := newFakeKVStore(t)
store := NewFakeKVStore(t)
workingDir := filepath.Join(t.TempDir(), "notexistdir")
fs := NewFileStore(1, store, workingDir)
filekey := "silences"
@ -32,7 +32,7 @@ func TestFileStore_FilepathFor_DirectoryNotExist(t *testing.T) {
}
}
func TestFileStore_FilepathFor(t *testing.T) {
store := newFakeKVStore(t)
store := NewFakeKVStore(t)
workingDir := t.TempDir()
fs := NewFileStore(1, store, workingDir)
filekey := "silences"
@ -74,7 +74,7 @@ func TestFileStore_FilepathFor(t *testing.T) {
}
func TestFileStore_Persist(t *testing.T) {
store := newFakeKVStore(t)
store := NewFakeKVStore(t)
state := &fakeState{data: "something to marshal"}
workingDir := t.TempDir()
fs := NewFileStore(1, store, workingDir)

View File

@ -311,7 +311,7 @@ func (moa *MultiOrgAlertmanager) AlertmanagerFor(orgID int64) (*Alertmanager, er
}
if !orgAM.Ready() {
return nil, ErrAlertmanagerNotReady
return orgAM, ErrAlertmanagerNotReady
}
return orgAM, nil

View File

@ -33,7 +33,7 @@ func TestMultiOrgAlertmanager_SyncAlertmanagersForOrgs(t *testing.T) {
tmpDir, err := ioutil.TempDir("", "test")
require.NoError(t, err)
kvStore := newFakeKVStore(t)
kvStore := NewFakeKVStore(t)
secretsService := secretsManager.SetupTestService(t, fakes.NewFakeSecretsStore())
decryptFn := secretsService.GetDecryptedValue
reg := prometheus.NewPedanticRegistry()
@ -161,7 +161,7 @@ func TestMultiOrgAlertmanager_SyncAlertmanagersForOrgsWithFailures(t *testing.T)
tmpDir, err := ioutil.TempDir("", "test")
require.NoError(t, err)
kvStore := newFakeKVStore(t)
kvStore := NewFakeKVStore(t)
secretsService := secretsManager.SetupTestService(t, fakes.NewFakeSecretsStore())
decryptFn := secretsService.GetDecryptedValue
reg := prometheus.NewPedanticRegistry()
@ -219,7 +219,7 @@ func TestMultiOrgAlertmanager_AlertmanagerFor(t *testing.T) {
DataPath: tmpDir,
UnifiedAlerting: setting.UnifiedAlertingSettings{AlertmanagerConfigPollInterval: 3 * time.Minute, DefaultConfiguration: setting.GetAlertmanagerDefaultConfiguration()}, // do not poll in tests.
}
kvStore := newFakeKVStore(t)
kvStore := NewFakeKVStore(t)
secretsService := secretsManager.SetupTestService(t, fakes.NewFakeSecretsStore())
decryptFn := secretsService.GetDecryptedValue
reg := prometheus.NewPedanticRegistry()
@ -246,7 +246,9 @@ func TestMultiOrgAlertmanager_AlertmanagerFor(t *testing.T) {
{
// let's delete its "running config" to make it non-ready
mam.alertmanagers[1].config = nil
_, err := mam.AlertmanagerFor(1)
am, err := mam.AlertmanagerFor(1)
require.NotNil(t, am)
require.False(t, am.Ready())
require.EqualError(t, err, ErrAlertmanagerNotReady.Error())
}

View File

@ -15,6 +15,14 @@ type FakeConfigStore struct {
configs map[int64]*models.AlertConfiguration
}
func NewFakeConfigStore(t *testing.T, configs map[int64]*models.AlertConfiguration) FakeConfigStore {
t.Helper()
return FakeConfigStore{
configs: configs,
}
}
func (f *FakeConfigStore) GetAllLatestAlertmanagerConfiguration(context.Context) ([]*models.AlertConfiguration, error) {
result := make([]*models.AlertConfiguration, 0, len(f.configs))
for _, configuration := range f.configs {
@ -63,6 +71,14 @@ type FakeOrgStore struct {
orgs []int64
}
func NewFakeOrgStore(t *testing.T, orgs []int64) FakeOrgStore {
t.Helper()
return FakeOrgStore{
orgs: orgs,
}
}
func (f *FakeOrgStore) GetOrgs(_ context.Context) ([]int64, error) {
return f.orgs, nil
}
@ -72,7 +88,7 @@ type FakeKVStore struct {
store map[int64]map[string]map[string]string
}
func newFakeKVStore(t *testing.T) *FakeKVStore {
func NewFakeKVStore(t *testing.T) *FakeKVStore {
t.Helper()
return &FakeKVStore{