grafana/pkg/services/ngalert/notifier/multiorg_alertmanager_test.go
gotjosh a2f4344bf2
Alerting: Refactor & fix unified alerting metrics structure (#39151)
* Alerting: Refactor & fix unified alerting metrics structure

Fixes and refactors the metrics structure we have for the ngalert service. Now, each component has its own metric struct that includes the JUST the metrics it uses. Additionally, I have fixed the configuration metrics and added new metrics to determine if we have discovered and started all the necessary configurations of an instance.

This allows us to alert on `grafana_alerting_discovered_configurations - grafana_alerting_active_configurations != 0` to know whether an alertmanager instance did not start successfully.
2021-09-14 12:55:01 +01:00

129 lines
4.9 KiB
Go

package notifier
import (
"bytes"
"context"
"testing"
"time"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/setting"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/stretchr/testify/require"
)
func TestMultiOrgAlertmanager_SyncAlertmanagersForOrgs(t *testing.T) {
t.Skipf("Skipping multiorg alertmanager tests for now")
configStore := &FakeConfigStore{
configs: map[int64]*models.AlertConfiguration{},
}
orgStore := &FakeOrgStore{
orgs: []int64{1, 2, 3},
}
SyncOrgsPollInterval = 10 * time.Minute // Don't poll in unit tests.
kvStore := newFakeKVStore(t)
reg := prometheus.NewPedanticRegistry()
m := metrics.NewNGAlert(reg)
mam := NewMultiOrgAlertmanager(&setting.Cfg{}, configStore, orgStore, kvStore, m.GetMultiOrgAlertmanagerMetrics())
ctx := context.Background()
// Ensure that one Alertmanager is created per org.
{
require.NoError(t, mam.LoadAndSyncAlertmanagersForOrgs(ctx))
require.Len(t, mam.alertmanagers, 3)
require.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(`
# HELP grafana_alerting_active_configurations The number of active Alertmanager configurations.
# TYPE grafana_alerting_active_configurations gauge
grafana_alerting_active_configurations 3
# HELP grafana_alerting_discovered_configurations The number of organizations we've discovered that require an Alertmanager configuration.
# TYPE grafana_alerting_discovered_configurations gauge
grafana_alerting_discovered_configurations 3
`), "grafana_alerting_discovered_configurations", "grafana_alerting_active_configurations"))
}
// When an org is removed, it should detect it.
{
orgStore.orgs = []int64{1, 3}
require.NoError(t, mam.LoadAndSyncAlertmanagersForOrgs(ctx))
require.Len(t, mam.alertmanagers, 2)
require.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(`
# HELP grafana_alerting_active_configurations The number of active Alertmanager configurations.
# TYPE grafana_alerting_active_configurations gauge
grafana_alerting_active_configurations 2
# HELP grafana_alerting_discovered_configurations The number of organizations we've discovered that require an Alertmanager configuration.
# TYPE grafana_alerting_discovered_configurations gauge
grafana_alerting_discovered_configurations 2
`), "grafana_alerting_discovered_configurations", "grafana_alerting_active_configurations"))
}
// if the org comes back, it should detect it.
{
orgStore.orgs = []int64{1, 2, 3, 4}
require.NoError(t, mam.LoadAndSyncAlertmanagersForOrgs(ctx))
require.Len(t, mam.alertmanagers, 4)
require.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(`
# HELP grafana_alerting_active_configurations The number of active Alertmanager configurations.
# TYPE grafana_alerting_active_configurations gauge
grafana_alerting_active_configurations 4
# HELP grafana_alerting_discovered_configurations The number of organizations we've discovered that require an Alertmanager configuration.
# TYPE grafana_alerting_discovered_configurations gauge
grafana_alerting_discovered_configurations 4
`), "grafana_alerting_discovered_configurations", "grafana_alerting_active_configurations"))
}
}
func TestMultiOrgAlertmanager_AlertmanagerFor(t *testing.T) {
t.Skipf("Skipping multiorg alertmanager tests for now")
configStore := &FakeConfigStore{
configs: map[int64]*models.AlertConfiguration{},
}
orgStore := &FakeOrgStore{
orgs: []int64{1, 2, 3},
}
SyncOrgsPollInterval = 10 * time.Minute // Don't poll in unit tests.
kvStore := newFakeKVStore(t)
reg := prometheus.NewPedanticRegistry()
m := metrics.NewNGAlert(reg)
mam := NewMultiOrgAlertmanager(&setting.Cfg{}, configStore, orgStore, kvStore, m.GetMultiOrgAlertmanagerMetrics())
ctx := context.Background()
// Ensure that one Alertmanagers is created per org.
{
require.NoError(t, mam.LoadAndSyncAlertmanagersForOrgs(ctx))
require.Len(t, mam.alertmanagers, 3)
}
// First, let's try to request an Alertmanager from an org that doesn't exist.
{
_, err := mam.AlertmanagerFor(5)
require.EqualError(t, err, ErrNoAlertmanagerForOrg.Error())
}
// Now, let's try to request an Alertmanager that is not ready.
{
// let's delete its "running config" to make it non-ready
mam.alertmanagers[1].config = nil
_, err := mam.AlertmanagerFor(1)
require.EqualError(t, err, ErrAlertmanagerNotReady.Error())
}
// With an Alertmanager that exists, it responds correctly.
{
am, err := mam.AlertmanagerFor(2)
require.NoError(t, err)
require.Equal(t, *am.GetStatus().VersionInfo.Version, "N/A")
require.Equal(t, am.orgID, int64(2))
require.NotNil(t, am.config)
}
// Let's now remove the previous queried organization.
orgStore.orgs = []int64{1, 3}
require.NoError(t, mam.LoadAndSyncAlertmanagersForOrgs(ctx))
{
_, err := mam.AlertmanagerFor(2)
require.EqualError(t, err, ErrNoAlertmanagerForOrg.Error())
}
}