Alerting: Fixes a bug when trying to sync broken alertmanager config (#40338)

* Alerting: Fixes a bug when trying to sync broken alertmanager config

Broken alertmanager configuration has the potential to be introduced as part of a migration e.g. due to incompatible data between what grafana accepts and what the Alertmanager expects. When this happens, we expect an eventually consistent behaviour where we'll keep trying to apply the configuration until it works.

As part of change in https://github.com/grafana/grafana/pull/39237 we introduced a regression that modified this behaviour and instead tried to create a new Alertmanager for that organization everytime, which eventually ended up in a panic due to a duplicate metrics being registered.

This PR fixes that and introduces a test to catch further regressions.

* Remove disable orgs
This commit is contained in:
gotjosh 2021-10-12 18:10:08 +01:00 committed by GitHub
parent fe5e49476b
commit 48d73cb148
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 80 additions and 0 deletions

View File

@ -174,6 +174,7 @@ func (moa *MultiOrgAlertmanager) SyncAlertmanagersForOrgs(ctx context.Context, o
if err != nil {
moa.logger.Error("unable to create Alertmanager for org", "org", orgID, "err", err)
}
moa.alertmanagers[orgID] = am
alertmanager = am
}

View File

@ -130,6 +130,63 @@ grafana_alerting_discovered_configurations 4
}
}
func TestMultiOrgAlertmanager_SyncAlertmanagersForOrgsWithFailures(t *testing.T) {
// Include a broken configuration for organization 2.
configStore := &FakeConfigStore{
configs: map[int64]*models.AlertConfiguration{
2: {AlertmanagerConfiguration: brokenConfig, OrgID: 2},
},
}
orgStore := &FakeOrgStore{
orgs: []int64{1, 2, 3},
}
tmpDir, err := ioutil.TempDir("", "test")
require.NoError(t, err)
kvStore := newFakeKVStore(t)
decryptFn := ossencryption.ProvideService().GetDecryptedValue
reg := prometheus.NewPedanticRegistry()
m := metrics.NewNGAlert(reg)
cfg := &setting.Cfg{
DataPath: tmpDir,
UnifiedAlerting: setting.UnifiedAlertingSettings{
AlertmanagerConfigPollInterval: 10 * time.Minute,
DefaultConfiguration: setting.GetAlertmanagerDefaultConfiguration(),
}, // do not poll in tests.
}
mam, err := NewMultiOrgAlertmanager(cfg, configStore, orgStore, kvStore, decryptFn, m.GetMultiOrgAlertmanagerMetrics(), log.New("testlogger"))
require.NoError(t, err)
ctx := context.Background()
// When you sync the first time, the alertmanager is created but is doesn't become ready until you have a configuration applied.
{
require.NoError(t, mam.LoadAndSyncAlertmanagersForOrgs(ctx))
require.Len(t, mam.alertmanagers, 3)
require.True(t, mam.alertmanagers[1].ready())
require.False(t, mam.alertmanagers[2].ready())
require.True(t, mam.alertmanagers[3].ready())
}
// On the next sync, it never panics and alertmanager is still not ready.
{
require.NoError(t, mam.LoadAndSyncAlertmanagersForOrgs(ctx))
require.Len(t, mam.alertmanagers, 3)
require.True(t, mam.alertmanagers[1].ready())
require.False(t, mam.alertmanagers[2].ready())
require.True(t, mam.alertmanagers[3].ready())
}
// If we fix the configuration, it becomes ready.
{
configStore.configs = map[int64]*models.AlertConfiguration{} // It'll apply the default config.
require.NoError(t, mam.LoadAndSyncAlertmanagersForOrgs(ctx))
require.Len(t, mam.alertmanagers, 3)
require.True(t, mam.alertmanagers[1].ready())
require.True(t, mam.alertmanagers[2].ready())
require.True(t, mam.alertmanagers[3].ready())
}
}
func TestMultiOrgAlertmanager_AlertmanagerFor(t *testing.T) {
configStore := &FakeConfigStore{
configs: map[int64]*models.AlertConfiguration{},
@ -197,3 +254,25 @@ func cleanOrgDirectories(path string, t *testing.T) func() {
require.NoError(t, os.RemoveAll(path))
}
}
var brokenConfig = `
"alertmanager_config": {
"route": {
"receiver": "grafana-default-email"
},
"receivers": [{
"name": "grafana-default-email",
"grafana_managed_receiver_configs": [{
"uid": "",
"name": "slack receiver",
"type": "slack",
"isDefault": true,
"settings": {
"addresses": "<example@email.com>"
"url": "<22>r_<72><5F>q/b<><62><EFBFBD><EFBFBD><EFBFBD>p@ⱎȏ =<3D><>@ӹtd>Rú<52>H<EFBFBD><48> <20>;<3B>@Uf<55><66>0<EFBFBD>\k2*jh<6A>}Íu<C38D>)"2<EFBFBD>F6]<EFBFBD>}r<EFBFBD><EFBFBD>R<EFBFBD>b<EFBFBD>d<EFBFBD>J;<EFBFBD><EFBFBD>S퓧<EFBFBD><EFBFBD>$<EFBFBD><EFBFBD>",
"recipient": "#graphana-metrics",
}
}]
}]
}
}`