mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Remove url based external alertmanagers config (#57918)
* Remove URL-based alertmanagers from endpoint config * WIP * Add migration and alertmanagers from admin_configuration * Empty comment removed * set BasicAuth true when user is present in url * Remove Alertmanagers from GET /admin_config payload * Remove URL-based alertmanager configuration from UI * Fix new uid generation in external alertmanagers migration * Fix tests for URL-based external alertmanagers * Fix API tests * Add more tests, move migration code to separate file, and remove possible am duplicate urls * Fix edge cases in migration * Fix imports * Remove useless fields and fix created_at/updated_at retrieval Co-authored-by: George Robinson <george.robinson@grafana.com> Co-authored-by: Konrad Lalik <konrad.lalik@grafana.com>
This commit is contained in:
@@ -2,9 +2,11 @@ package sender
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -103,45 +105,31 @@ func (d *AlertsRouter) SyncAndApplyConfigFromDatabase() error {
|
||||
continue
|
||||
}
|
||||
|
||||
externalAlertmanagers, err := d.alertmanagersFromDatasources(cfg.OrgID)
|
||||
alertmanagers, err := d.alertmanagersFromDatasources(cfg.OrgID)
|
||||
if err != nil {
|
||||
d.logger.Error("Failed to get alertmanagers from datasources",
|
||||
"org", cfg.OrgID,
|
||||
"error", err)
|
||||
d.logger.Error("Failed to get alertmanagers from datasources", "org", cfg.OrgID, "error", err)
|
||||
continue
|
||||
}
|
||||
cfg.Alertmanagers = append(cfg.Alertmanagers, externalAlertmanagers...)
|
||||
|
||||
// We have no running sender and no Alertmanager(s) configured, no-op.
|
||||
if !ok && len(cfg.Alertmanagers) == 0 {
|
||||
if !ok && len(alertmanagers) == 0 {
|
||||
d.logger.Debug("No external alertmanagers configured", "org", cfg.OrgID)
|
||||
continue
|
||||
}
|
||||
|
||||
// We have a running sender but no Alertmanager(s) configured, shut it down.
|
||||
if ok && len(cfg.Alertmanagers) == 0 {
|
||||
if ok && len(alertmanagers) == 0 {
|
||||
d.logger.Info("No external alertmanager(s) configured, sender will be stopped", "org", cfg.OrgID)
|
||||
delete(orgsFound, cfg.OrgID)
|
||||
continue
|
||||
}
|
||||
|
||||
// Avoid logging sensitive data
|
||||
var redactedAMs []string
|
||||
for _, am := range cfg.Alertmanagers {
|
||||
parsedAM, err := url.Parse(am)
|
||||
if err != nil {
|
||||
d.logger.Error("Failed to parse alertmanager string",
|
||||
"org", cfg.OrgID,
|
||||
"error", err)
|
||||
continue
|
||||
}
|
||||
redactedAMs = append(redactedAMs, parsedAM.Redacted())
|
||||
}
|
||||
|
||||
redactedAMs := buildRedactedAMs(d.logger, alertmanagers, cfg.OrgID)
|
||||
d.logger.Debug("Alertmanagers found in the configuration", "alertmanagers", redactedAMs)
|
||||
|
||||
// We have a running sender, check if we need to apply a new config.
|
||||
amHash := cfg.AsSHA256()
|
||||
amHash := asSHA256(alertmanagers)
|
||||
if ok {
|
||||
if d.externalAlertmanagersCfgHash[cfg.OrgID] == amHash {
|
||||
d.logger.Debug("Sender configuration is the same as the one running, no-op", "org", cfg.OrgID, "alertmanagers", redactedAMs)
|
||||
@@ -149,7 +137,7 @@ func (d *AlertsRouter) SyncAndApplyConfigFromDatabase() error {
|
||||
}
|
||||
|
||||
d.logger.Info("Applying new configuration to sender", "org", cfg.OrgID, "alertmanagers", redactedAMs, "cfg", cfg.ID)
|
||||
err := existing.ApplyConfig(cfg)
|
||||
err := existing.ApplyConfig(cfg.OrgID, cfg.ID, alertmanagers)
|
||||
if err != nil {
|
||||
d.logger.Error("Failed to apply configuration", "error", err, "org", cfg.OrgID)
|
||||
continue
|
||||
@@ -164,7 +152,7 @@ func (d *AlertsRouter) SyncAndApplyConfigFromDatabase() error {
|
||||
d.externalAlertmanagers[cfg.OrgID] = s
|
||||
s.Run()
|
||||
|
||||
err = s.ApplyConfig(cfg)
|
||||
err = s.ApplyConfig(cfg.OrgID, cfg.ID, alertmanagers)
|
||||
if err != nil {
|
||||
d.logger.Error("Failed to apply configuration", "error", err, "org", cfg.OrgID)
|
||||
continue
|
||||
@@ -184,7 +172,7 @@ func (d *AlertsRouter) SyncAndApplyConfigFromDatabase() error {
|
||||
}
|
||||
d.adminConfigMtx.Unlock()
|
||||
|
||||
// We can now stop these externalAlertmanagers w/o having to hold a lock.
|
||||
// We can now stop these external Alertmanagers w/o having to hold a lock.
|
||||
for orgID, s := range sendersToStop {
|
||||
d.logger.Info("Stopping sender", "org", orgID)
|
||||
s.Stop()
|
||||
@@ -196,6 +184,26 @@ func (d *AlertsRouter) SyncAndApplyConfigFromDatabase() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func buildRedactedAMs(l log.Logger, alertmanagers []string, ordId int64) []string {
|
||||
var redactedAMs []string
|
||||
for _, am := range alertmanagers {
|
||||
parsedAM, err := url.Parse(am)
|
||||
if err != nil {
|
||||
l.Error("Failed to parse alertmanager string", "org", ordId, "error", err)
|
||||
continue
|
||||
}
|
||||
redactedAMs = append(redactedAMs, parsedAM.Redacted())
|
||||
}
|
||||
return redactedAMs
|
||||
}
|
||||
|
||||
func asSHA256(strings []string) string {
|
||||
h := sha256.New()
|
||||
sort.Strings(strings)
|
||||
_, _ = h.Write([]byte(fmt.Sprintf("%v", strings)))
|
||||
return fmt.Sprintf("%x", h.Sum(nil))
|
||||
}
|
||||
|
||||
func (d *AlertsRouter) alertmanagersFromDatasources(orgID int64) ([]string, error) {
|
||||
var alertmanagers []string
|
||||
// We might have alertmanager datasources that are acting as external
|
||||
|
||||
@@ -10,10 +10,12 @@ import (
|
||||
|
||||
"github.com/benbjohnson/clock"
|
||||
"github.com/go-openapi/strfmt"
|
||||
"github.com/grafana/grafana/pkg/infra/log/logtest"
|
||||
models2 "github.com/prometheus/alertmanager/api/v2/models"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/grafana/grafana/pkg/components/simplejson"
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
"github.com/grafana/grafana/pkg/services/datasources"
|
||||
fake_ds "github.com/grafana/grafana/pkg/services/datasources/fakes"
|
||||
@@ -47,11 +49,20 @@ func TestSendingToExternalAlertmanager(t *testing.T) {
|
||||
Host: "localhost",
|
||||
}
|
||||
|
||||
ds1 := datasources.DataSource{
|
||||
Url: fakeAM.Server.URL,
|
||||
OrgId: ruleKey.OrgID,
|
||||
Type: datasources.DS_ALERTMANAGER,
|
||||
JsonData: simplejson.NewFromAny(map[string]interface{}{
|
||||
"handleGrafanaManagedAlerts": true,
|
||||
"implementation": "prometheus",
|
||||
}),
|
||||
}
|
||||
alertsRouter := NewAlertsRouter(moa, fakeAdminConfigStore, mockedClock, appUrl, map[int64]struct{}{}, 10*time.Minute,
|
||||
&fake_ds.FakeDataSourceService{}, fake_secrets.NewFakeSecretsService())
|
||||
&fake_ds.FakeDataSourceService{DataSources: []*datasources.DataSource{&ds1}}, fake_secrets.NewFakeSecretsService())
|
||||
|
||||
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
|
||||
{OrgID: ruleKey.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.AllAlertmanagers},
|
||||
{OrgID: ruleKey.OrgID, SendAlertsTo: models.AllAlertmanagers},
|
||||
}, nil)
|
||||
// Make sure we sync the configuration at least once before the evaluation happens to guarantee the sender is running
|
||||
// when the first alert triggers.
|
||||
@@ -105,11 +116,21 @@ func TestSendingToExternalAlertmanager_WithMultipleOrgs(t *testing.T) {
|
||||
Host: "localhost",
|
||||
}
|
||||
|
||||
ds1 := datasources.DataSource{
|
||||
Url: fakeAM.Server.URL,
|
||||
OrgId: ruleKey1.OrgID,
|
||||
Type: datasources.DS_ALERTMANAGER,
|
||||
JsonData: simplejson.NewFromAny(map[string]interface{}{
|
||||
"handleGrafanaManagedAlerts": true,
|
||||
"implementation": "prometheus",
|
||||
}),
|
||||
}
|
||||
fakeDs := &fake_ds.FakeDataSourceService{DataSources: []*datasources.DataSource{&ds1}}
|
||||
alertsRouter := NewAlertsRouter(moa, fakeAdminConfigStore, mockedClock, appUrl, map[int64]struct{}{}, 10*time.Minute,
|
||||
&fake_ds.FakeDataSourceService{}, fake_secrets.NewFakeSecretsService())
|
||||
fakeDs, fake_secrets.NewFakeSecretsService())
|
||||
|
||||
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
|
||||
{OrgID: ruleKey1.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.AllAlertmanagers},
|
||||
{OrgID: ruleKey1.OrgID, SendAlertsTo: models.AllAlertmanagers},
|
||||
}, nil)
|
||||
|
||||
// Make sure we sync the configuration at least once before the evaluation happens to guarantee the sender is running
|
||||
@@ -122,9 +143,20 @@ func TestSendingToExternalAlertmanager_WithMultipleOrgs(t *testing.T) {
|
||||
assertAlertmanagersStatusForOrg(t, alertsRouter, ruleKey1.OrgID, 1, 0)
|
||||
|
||||
// 1. Now, let's assume a new org comes along.
|
||||
ds2 := datasources.DataSource{
|
||||
Url: fakeAM.Server.URL,
|
||||
OrgId: ruleKey2.OrgID,
|
||||
Type: datasources.DS_ALERTMANAGER,
|
||||
JsonData: simplejson.NewFromAny(map[string]interface{}{
|
||||
"handleGrafanaManagedAlerts": true,
|
||||
"implementation": "prometheus",
|
||||
}),
|
||||
}
|
||||
fakeDs.DataSources = append(fakeDs.DataSources, &ds2)
|
||||
|
||||
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
|
||||
{OrgID: ruleKey1.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.AllAlertmanagers},
|
||||
{OrgID: ruleKey2.OrgID, Alertmanagers: []string{fakeAM.Server.URL}},
|
||||
{OrgID: ruleKey1.OrgID, SendAlertsTo: models.AllAlertmanagers},
|
||||
{OrgID: ruleKey2.OrgID},
|
||||
}, nil)
|
||||
|
||||
// If we sync again, new externalAlertmanagers must have spawned.
|
||||
@@ -157,10 +189,20 @@ func TestSendingToExternalAlertmanager_WithMultipleOrgs(t *testing.T) {
|
||||
|
||||
// 2. Next, let's modify the configuration of an organization by adding an extra alertmanager.
|
||||
fakeAM2 := NewFakeExternalAlertmanager(t)
|
||||
ds3 := datasources.DataSource{
|
||||
Url: fakeAM2.Server.URL,
|
||||
OrgId: ruleKey2.OrgID,
|
||||
Type: datasources.DS_ALERTMANAGER,
|
||||
JsonData: simplejson.NewFromAny(map[string]interface{}{
|
||||
"handleGrafanaManagedAlerts": true,
|
||||
"implementation": "prometheus",
|
||||
}),
|
||||
}
|
||||
fakeDs.DataSources = append(fakeDs.DataSources, &ds3)
|
||||
|
||||
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
|
||||
{OrgID: ruleKey1.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.AllAlertmanagers},
|
||||
{OrgID: ruleKey2.OrgID, Alertmanagers: []string{fakeAM.Server.URL, fakeAM2.Server.URL}},
|
||||
{OrgID: ruleKey1.OrgID, SendAlertsTo: models.AllAlertmanagers},
|
||||
{OrgID: ruleKey2.OrgID},
|
||||
}, nil)
|
||||
|
||||
// Before we sync, let's grab the existing hash of this particular org.
|
||||
@@ -177,9 +219,10 @@ func TestSendingToExternalAlertmanager_WithMultipleOrgs(t *testing.T) {
|
||||
assertAlertmanagersStatusForOrg(t, alertsRouter, ruleKey2.OrgID, 2, 0)
|
||||
|
||||
// 3. Now, let's provide a configuration that fails for OrgID = 1.
|
||||
fakeDs.DataSources[0].Url = "123://invalid.org"
|
||||
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
|
||||
{OrgID: ruleKey1.OrgID, Alertmanagers: []string{"123://invalid.org"}, SendAlertsTo: models.AllAlertmanagers},
|
||||
{OrgID: ruleKey2.OrgID, Alertmanagers: []string{fakeAM.Server.URL, fakeAM2.Server.URL}},
|
||||
{OrgID: ruleKey1.OrgID, SendAlertsTo: models.AllAlertmanagers},
|
||||
{OrgID: ruleKey2.OrgID},
|
||||
}, nil)
|
||||
|
||||
// Before we sync, let's get the current config hash.
|
||||
@@ -188,14 +231,15 @@ func TestSendingToExternalAlertmanager_WithMultipleOrgs(t *testing.T) {
|
||||
// Now, sync again.
|
||||
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
|
||||
|
||||
// The old configuration should still be running.
|
||||
require.Equal(t, alertsRouter.externalAlertmanagersCfgHash[ruleKey1.OrgID], currentHash)
|
||||
require.Equal(t, 1, len(alertsRouter.AlertmanagersFor(ruleKey1.OrgID)))
|
||||
// The old configuration should not be running.
|
||||
require.NotEqual(t, alertsRouter.externalAlertmanagersCfgHash[ruleKey1.OrgID], currentHash)
|
||||
require.Equal(t, 0, len(alertsRouter.AlertmanagersFor(ruleKey1.OrgID)))
|
||||
|
||||
// If we fix it - it should be applied.
|
||||
fakeDs.DataSources[0].Url = "notarealalertmanager:3030"
|
||||
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
|
||||
{OrgID: ruleKey1.OrgID, Alertmanagers: []string{"notarealalertmanager:3030"}, SendAlertsTo: models.AllAlertmanagers},
|
||||
{OrgID: ruleKey2.OrgID, Alertmanagers: []string{fakeAM.Server.URL, fakeAM2.Server.URL}},
|
||||
{OrgID: ruleKey1.OrgID, SendAlertsTo: models.AllAlertmanagers},
|
||||
{OrgID: ruleKey2.OrgID},
|
||||
}, nil)
|
||||
|
||||
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
|
||||
@@ -232,11 +276,20 @@ func TestChangingAlertmanagersChoice(t *testing.T) {
|
||||
Host: "localhost",
|
||||
}
|
||||
|
||||
ds := datasources.DataSource{
|
||||
Url: fakeAM.Server.URL,
|
||||
OrgId: ruleKey.OrgID,
|
||||
Type: datasources.DS_ALERTMANAGER,
|
||||
JsonData: simplejson.NewFromAny(map[string]interface{}{
|
||||
"handleGrafanaManagedAlerts": true,
|
||||
"implementation": "prometheus",
|
||||
}),
|
||||
}
|
||||
alertsRouter := NewAlertsRouter(moa, fakeAdminConfigStore, mockedClock, appUrl, map[int64]struct{}{},
|
||||
10*time.Minute, &fake_ds.FakeDataSourceService{}, fake_secrets.NewFakeSecretsService())
|
||||
10*time.Minute, &fake_ds.FakeDataSourceService{DataSources: []*datasources.DataSource{&ds}}, fake_secrets.NewFakeSecretsService())
|
||||
|
||||
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
|
||||
{OrgID: ruleKey.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.AllAlertmanagers},
|
||||
{OrgID: ruleKey.OrgID, SendAlertsTo: models.AllAlertmanagers},
|
||||
}, nil)
|
||||
// Make sure we sync the configuration at least once before the evaluation happens to guarantee the sender is running
|
||||
// when the first alert triggers.
|
||||
@@ -262,7 +315,7 @@ func TestChangingAlertmanagersChoice(t *testing.T) {
|
||||
|
||||
// Now, let's change the Alertmanagers choice to send only to the external Alertmanager.
|
||||
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
|
||||
{OrgID: ruleKey.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.ExternalAlertmanagers},
|
||||
{OrgID: ruleKey.OrgID, SendAlertsTo: models.ExternalAlertmanagers},
|
||||
}, nil)
|
||||
// Again, make sure we sync and verify the externalAlertmanagers.
|
||||
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
|
||||
@@ -274,7 +327,7 @@ func TestChangingAlertmanagersChoice(t *testing.T) {
|
||||
|
||||
// Finally, let's change the Alertmanagers choice to send only to the internal Alertmanager.
|
||||
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
|
||||
{OrgID: ruleKey.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.InternalAlertmanager},
|
||||
{OrgID: ruleKey.OrgID, SendAlertsTo: models.InternalAlertmanager},
|
||||
}, nil)
|
||||
|
||||
// Again, make sure we sync and verify the externalAlertmanagers.
|
||||
@@ -441,3 +494,62 @@ func TestBuildExternalURL(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAlertManegers_asSHA256(t *testing.T) {
|
||||
tc := []struct {
|
||||
name string
|
||||
amUrls []string
|
||||
ciphertext string
|
||||
}{
|
||||
{
|
||||
name: "asSHA256",
|
||||
amUrls: []string{"http://localhost:9093"},
|
||||
ciphertext: "3ec9db375a5ba12f7c7b704922cf4b8e21a31e30d85be2386803829f0ee24410",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tc {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
require.Equal(t, tt.ciphertext, asSHA256(tt.amUrls))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAlertManagers_buildRedactedAMs(t *testing.T) {
|
||||
fakeLogger := logtest.Fake{}
|
||||
|
||||
tc := []struct {
|
||||
name string
|
||||
orgId int64
|
||||
amUrls []string
|
||||
errCalls int
|
||||
errLog string
|
||||
errCtx []interface{}
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "buildRedactedAMs",
|
||||
orgId: 1,
|
||||
amUrls: []string{"http://user:password@localhost:9093"},
|
||||
errCalls: 0,
|
||||
errLog: "",
|
||||
expected: []string{"http://user:xxxxx@localhost:9093"},
|
||||
},
|
||||
{
|
||||
name: "Error building redacted AM URLs",
|
||||
orgId: 2,
|
||||
amUrls: []string{"1234://user:password@localhost:9094"},
|
||||
errCalls: 1,
|
||||
errLog: "Failed to parse alertmanager string",
|
||||
expected: nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tc {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
require.Equal(t, tt.expected, buildRedactedAMs(&fakeLogger, tt.amUrls, tt.orgId))
|
||||
require.Equal(t, tt.errCalls, fakeLogger.ErrorLogs.Calls)
|
||||
require.Equal(t, tt.errLog, fakeLogger.ErrorLogs.Message)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,8 +14,6 @@ import (
|
||||
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
|
||||
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
|
||||
"github.com/prometheus/alertmanager/api/v2/models"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
common_config "github.com/prometheus/common/config"
|
||||
@@ -63,13 +61,13 @@ func NewExternalAlertmanagerSender() *ExternalAlertmanager {
|
||||
}
|
||||
|
||||
// ApplyConfig syncs a configuration with the sender.
|
||||
func (s *ExternalAlertmanager) ApplyConfig(cfg *ngmodels.AdminConfiguration) error {
|
||||
notifierCfg, err := buildNotifierConfig(cfg)
|
||||
func (s *ExternalAlertmanager) ApplyConfig(orgId, id int64, alertmanagers []string) error {
|
||||
notifierCfg, err := buildNotifierConfig(alertmanagers)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
s.logger = s.logger.New("org", cfg.OrgID, "cfg", cfg.ID)
|
||||
s.logger = s.logger.New("org", orgId, "cfg", id)
|
||||
|
||||
s.logger.Info("Synchronizing config with external Alertmanager group")
|
||||
if err := s.manager.ApplyConfig(notifierCfg); err != nil {
|
||||
@@ -134,9 +132,9 @@ func (s *ExternalAlertmanager) DroppedAlertmanagers() []*url.URL {
|
||||
return s.manager.DroppedAlertmanagers()
|
||||
}
|
||||
|
||||
func buildNotifierConfig(cfg *ngmodels.AdminConfiguration) (*config.Config, error) {
|
||||
amConfigs := make([]*config.AlertmanagerConfig, 0, len(cfg.Alertmanagers))
|
||||
for _, amURL := range cfg.Alertmanagers {
|
||||
func buildNotifierConfig(alertmanagers []string) (*config.Config, error) {
|
||||
amConfigs := make([]*config.AlertmanagerConfig, 0, len(alertmanagers))
|
||||
for _, amURL := range alertmanagers {
|
||||
u, err := url.Parse(amURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
||||
Reference in New Issue
Block a user