mirror of
https://github.com/grafana/grafana.git
synced 2024-11-25 18:30:41 -06:00
Alerting: Fixes a bug when trying to sync broken alertmanager config (#40338)
* Alerting: Fixes a bug when trying to sync broken alertmanager config Broken alertmanager configuration has the potential to be introduced as part of a migration e.g. due to incompatible data between what grafana accepts and what the Alertmanager expects. When this happens, we expect an eventually consistent behaviour where we'll keep trying to apply the configuration until it works. As part of change in https://github.com/grafana/grafana/pull/39237 we introduced a regression that modified this behaviour and instead tried to create a new Alertmanager for that organization everytime, which eventually ended up in a panic due to a duplicate metrics being registered. This PR fixes that and introduces a test to catch further regressions. * Remove disable orgs
This commit is contained in:
parent
fe5e49476b
commit
48d73cb148
@ -174,6 +174,7 @@ func (moa *MultiOrgAlertmanager) SyncAlertmanagersForOrgs(ctx context.Context, o
|
||||
if err != nil {
|
||||
moa.logger.Error("unable to create Alertmanager for org", "org", orgID, "err", err)
|
||||
}
|
||||
moa.alertmanagers[orgID] = am
|
||||
alertmanager = am
|
||||
}
|
||||
|
||||
|
@ -130,6 +130,63 @@ grafana_alerting_discovered_configurations 4
|
||||
}
|
||||
}
|
||||
|
||||
func TestMultiOrgAlertmanager_SyncAlertmanagersForOrgsWithFailures(t *testing.T) {
|
||||
// Include a broken configuration for organization 2.
|
||||
configStore := &FakeConfigStore{
|
||||
configs: map[int64]*models.AlertConfiguration{
|
||||
2: {AlertmanagerConfiguration: brokenConfig, OrgID: 2},
|
||||
},
|
||||
}
|
||||
orgStore := &FakeOrgStore{
|
||||
orgs: []int64{1, 2, 3},
|
||||
}
|
||||
|
||||
tmpDir, err := ioutil.TempDir("", "test")
|
||||
require.NoError(t, err)
|
||||
kvStore := newFakeKVStore(t)
|
||||
decryptFn := ossencryption.ProvideService().GetDecryptedValue
|
||||
reg := prometheus.NewPedanticRegistry()
|
||||
m := metrics.NewNGAlert(reg)
|
||||
cfg := &setting.Cfg{
|
||||
DataPath: tmpDir,
|
||||
UnifiedAlerting: setting.UnifiedAlertingSettings{
|
||||
AlertmanagerConfigPollInterval: 10 * time.Minute,
|
||||
DefaultConfiguration: setting.GetAlertmanagerDefaultConfiguration(),
|
||||
}, // do not poll in tests.
|
||||
}
|
||||
mam, err := NewMultiOrgAlertmanager(cfg, configStore, orgStore, kvStore, decryptFn, m.GetMultiOrgAlertmanagerMetrics(), log.New("testlogger"))
|
||||
require.NoError(t, err)
|
||||
ctx := context.Background()
|
||||
|
||||
// When you sync the first time, the alertmanager is created but is doesn't become ready until you have a configuration applied.
|
||||
{
|
||||
require.NoError(t, mam.LoadAndSyncAlertmanagersForOrgs(ctx))
|
||||
require.Len(t, mam.alertmanagers, 3)
|
||||
require.True(t, mam.alertmanagers[1].ready())
|
||||
require.False(t, mam.alertmanagers[2].ready())
|
||||
require.True(t, mam.alertmanagers[3].ready())
|
||||
}
|
||||
|
||||
// On the next sync, it never panics and alertmanager is still not ready.
|
||||
{
|
||||
require.NoError(t, mam.LoadAndSyncAlertmanagersForOrgs(ctx))
|
||||
require.Len(t, mam.alertmanagers, 3)
|
||||
require.True(t, mam.alertmanagers[1].ready())
|
||||
require.False(t, mam.alertmanagers[2].ready())
|
||||
require.True(t, mam.alertmanagers[3].ready())
|
||||
}
|
||||
|
||||
// If we fix the configuration, it becomes ready.
|
||||
{
|
||||
configStore.configs = map[int64]*models.AlertConfiguration{} // It'll apply the default config.
|
||||
require.NoError(t, mam.LoadAndSyncAlertmanagersForOrgs(ctx))
|
||||
require.Len(t, mam.alertmanagers, 3)
|
||||
require.True(t, mam.alertmanagers[1].ready())
|
||||
require.True(t, mam.alertmanagers[2].ready())
|
||||
require.True(t, mam.alertmanagers[3].ready())
|
||||
}
|
||||
}
|
||||
|
||||
func TestMultiOrgAlertmanager_AlertmanagerFor(t *testing.T) {
|
||||
configStore := &FakeConfigStore{
|
||||
configs: map[int64]*models.AlertConfiguration{},
|
||||
@ -197,3 +254,25 @@ func cleanOrgDirectories(path string, t *testing.T) func() {
|
||||
require.NoError(t, os.RemoveAll(path))
|
||||
}
|
||||
}
|
||||
|
||||
var brokenConfig = `
|
||||
"alertmanager_config": {
|
||||
"route": {
|
||||
"receiver": "grafana-default-email"
|
||||
},
|
||||
"receivers": [{
|
||||
"name": "grafana-default-email",
|
||||
"grafana_managed_receiver_configs": [{
|
||||
"uid": "",
|
||||
"name": "slack receiver",
|
||||
"type": "slack",
|
||||
"isDefault": true,
|
||||
"settings": {
|
||||
"addresses": "<example@email.com>"
|
||||
"url": "<22>r_<72><5F>q/b<><62><EFBFBD><EFBFBD><EFBFBD>p@ⱎȏ =<3D><>@ӹtd>Rú<52>H<EFBFBD><48> <20>;<3B>@Uf<55><66>0<EFBFBD>\k2*jh<6A>}Íu<C38D>)"2<EFBFBD>F6]<EFBFBD>}r<EFBFBD><EFBFBD>R<EFBFBD>b<EFBFBD>d<EFBFBD>J;<EFBFBD><EFBFBD>S퓧<EFBFBD><EFBFBD>$<EFBFBD><EFBFBD>",
|
||||
"recipient": "#graphana-metrics",
|
||||
}
|
||||
}]
|
||||
}]
|
||||
}
|
||||
}`
|
||||
|
Loading…
Reference in New Issue
Block a user