Alerting: Remove url based external alertmanagers config (#57918)

* Remove URL-based alertmanagers from endpoint config

* WIP

* Add migration and alertmanagers from admin_configuration

* Empty comment removed

* set BasicAuth true when user is present in url

* Remove Alertmanagers from GET /admin_config payload

* Remove URL-based alertmanager configuration from UI

* Fix new uid generation in external alertmanagers migration

* Fix tests for URL-based external alertmanagers

* Fix API tests

* Add more tests, move migration code to separate file, and remove possible am duplicate urls

* Fix edge cases in migration

* Fix imports

* Remove useless fields and fix created_at/updated_at retrieval

Co-authored-by: George Robinson <george.robinson@grafana.com>
Co-authored-by: Konrad Lalik <konrad.lalik@grafana.com>
This commit is contained in:
Alex Moreno
2022-11-10 16:34:13 +01:00
committed by GitHub
parent 738e023d13
commit 45facbba11
21 changed files with 411 additions and 796 deletions

View File

@@ -2,9 +2,11 @@ package sender
import (
"context"
"crypto/sha256"
"errors"
"fmt"
"net/url"
"sort"
"sync"
"time"
@@ -103,45 +105,31 @@ func (d *AlertsRouter) SyncAndApplyConfigFromDatabase() error {
continue
}
externalAlertmanagers, err := d.alertmanagersFromDatasources(cfg.OrgID)
alertmanagers, err := d.alertmanagersFromDatasources(cfg.OrgID)
if err != nil {
d.logger.Error("Failed to get alertmanagers from datasources",
"org", cfg.OrgID,
"error", err)
d.logger.Error("Failed to get alertmanagers from datasources", "org", cfg.OrgID, "error", err)
continue
}
cfg.Alertmanagers = append(cfg.Alertmanagers, externalAlertmanagers...)
// We have no running sender and no Alertmanager(s) configured, no-op.
if !ok && len(cfg.Alertmanagers) == 0 {
if !ok && len(alertmanagers) == 0 {
d.logger.Debug("No external alertmanagers configured", "org", cfg.OrgID)
continue
}
// We have a running sender but no Alertmanager(s) configured, shut it down.
if ok && len(cfg.Alertmanagers) == 0 {
if ok && len(alertmanagers) == 0 {
d.logger.Info("No external alertmanager(s) configured, sender will be stopped", "org", cfg.OrgID)
delete(orgsFound, cfg.OrgID)
continue
}
// Avoid logging sensitive data
var redactedAMs []string
for _, am := range cfg.Alertmanagers {
parsedAM, err := url.Parse(am)
if err != nil {
d.logger.Error("Failed to parse alertmanager string",
"org", cfg.OrgID,
"error", err)
continue
}
redactedAMs = append(redactedAMs, parsedAM.Redacted())
}
redactedAMs := buildRedactedAMs(d.logger, alertmanagers, cfg.OrgID)
d.logger.Debug("Alertmanagers found in the configuration", "alertmanagers", redactedAMs)
// We have a running sender, check if we need to apply a new config.
amHash := cfg.AsSHA256()
amHash := asSHA256(alertmanagers)
if ok {
if d.externalAlertmanagersCfgHash[cfg.OrgID] == amHash {
d.logger.Debug("Sender configuration is the same as the one running, no-op", "org", cfg.OrgID, "alertmanagers", redactedAMs)
@@ -149,7 +137,7 @@ func (d *AlertsRouter) SyncAndApplyConfigFromDatabase() error {
}
d.logger.Info("Applying new configuration to sender", "org", cfg.OrgID, "alertmanagers", redactedAMs, "cfg", cfg.ID)
err := existing.ApplyConfig(cfg)
err := existing.ApplyConfig(cfg.OrgID, cfg.ID, alertmanagers)
if err != nil {
d.logger.Error("Failed to apply configuration", "error", err, "org", cfg.OrgID)
continue
@@ -164,7 +152,7 @@ func (d *AlertsRouter) SyncAndApplyConfigFromDatabase() error {
d.externalAlertmanagers[cfg.OrgID] = s
s.Run()
err = s.ApplyConfig(cfg)
err = s.ApplyConfig(cfg.OrgID, cfg.ID, alertmanagers)
if err != nil {
d.logger.Error("Failed to apply configuration", "error", err, "org", cfg.OrgID)
continue
@@ -184,7 +172,7 @@ func (d *AlertsRouter) SyncAndApplyConfigFromDatabase() error {
}
d.adminConfigMtx.Unlock()
// We can now stop these externalAlertmanagers w/o having to hold a lock.
// We can now stop these external Alertmanagers w/o having to hold a lock.
for orgID, s := range sendersToStop {
d.logger.Info("Stopping sender", "org", orgID)
s.Stop()
@@ -196,6 +184,26 @@ func (d *AlertsRouter) SyncAndApplyConfigFromDatabase() error {
return nil
}
func buildRedactedAMs(l log.Logger, alertmanagers []string, ordId int64) []string {
var redactedAMs []string
for _, am := range alertmanagers {
parsedAM, err := url.Parse(am)
if err != nil {
l.Error("Failed to parse alertmanager string", "org", ordId, "error", err)
continue
}
redactedAMs = append(redactedAMs, parsedAM.Redacted())
}
return redactedAMs
}
func asSHA256(strings []string) string {
h := sha256.New()
sort.Strings(strings)
_, _ = h.Write([]byte(fmt.Sprintf("%v", strings)))
return fmt.Sprintf("%x", h.Sum(nil))
}
func (d *AlertsRouter) alertmanagersFromDatasources(orgID int64) ([]string, error) {
var alertmanagers []string
// We might have alertmanager datasources that are acting as external

View File

@@ -10,10 +10,12 @@ import (
"github.com/benbjohnson/clock"
"github.com/go-openapi/strfmt"
"github.com/grafana/grafana/pkg/infra/log/logtest"
models2 "github.com/prometheus/alertmanager/api/v2/models"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/require"
"github.com/grafana/grafana/pkg/components/simplejson"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/datasources"
fake_ds "github.com/grafana/grafana/pkg/services/datasources/fakes"
@@ -47,11 +49,20 @@ func TestSendingToExternalAlertmanager(t *testing.T) {
Host: "localhost",
}
ds1 := datasources.DataSource{
Url: fakeAM.Server.URL,
OrgId: ruleKey.OrgID,
Type: datasources.DS_ALERTMANAGER,
JsonData: simplejson.NewFromAny(map[string]interface{}{
"handleGrafanaManagedAlerts": true,
"implementation": "prometheus",
}),
}
alertsRouter := NewAlertsRouter(moa, fakeAdminConfigStore, mockedClock, appUrl, map[int64]struct{}{}, 10*time.Minute,
&fake_ds.FakeDataSourceService{}, fake_secrets.NewFakeSecretsService())
&fake_ds.FakeDataSourceService{DataSources: []*datasources.DataSource{&ds1}}, fake_secrets.NewFakeSecretsService())
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
{OrgID: ruleKey.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.AllAlertmanagers},
{OrgID: ruleKey.OrgID, SendAlertsTo: models.AllAlertmanagers},
}, nil)
// Make sure we sync the configuration at least once before the evaluation happens to guarantee the sender is running
// when the first alert triggers.
@@ -105,11 +116,21 @@ func TestSendingToExternalAlertmanager_WithMultipleOrgs(t *testing.T) {
Host: "localhost",
}
ds1 := datasources.DataSource{
Url: fakeAM.Server.URL,
OrgId: ruleKey1.OrgID,
Type: datasources.DS_ALERTMANAGER,
JsonData: simplejson.NewFromAny(map[string]interface{}{
"handleGrafanaManagedAlerts": true,
"implementation": "prometheus",
}),
}
fakeDs := &fake_ds.FakeDataSourceService{DataSources: []*datasources.DataSource{&ds1}}
alertsRouter := NewAlertsRouter(moa, fakeAdminConfigStore, mockedClock, appUrl, map[int64]struct{}{}, 10*time.Minute,
&fake_ds.FakeDataSourceService{}, fake_secrets.NewFakeSecretsService())
fakeDs, fake_secrets.NewFakeSecretsService())
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
{OrgID: ruleKey1.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.AllAlertmanagers},
{OrgID: ruleKey1.OrgID, SendAlertsTo: models.AllAlertmanagers},
}, nil)
// Make sure we sync the configuration at least once before the evaluation happens to guarantee the sender is running
@@ -122,9 +143,20 @@ func TestSendingToExternalAlertmanager_WithMultipleOrgs(t *testing.T) {
assertAlertmanagersStatusForOrg(t, alertsRouter, ruleKey1.OrgID, 1, 0)
// 1. Now, let's assume a new org comes along.
ds2 := datasources.DataSource{
Url: fakeAM.Server.URL,
OrgId: ruleKey2.OrgID,
Type: datasources.DS_ALERTMANAGER,
JsonData: simplejson.NewFromAny(map[string]interface{}{
"handleGrafanaManagedAlerts": true,
"implementation": "prometheus",
}),
}
fakeDs.DataSources = append(fakeDs.DataSources, &ds2)
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
{OrgID: ruleKey1.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.AllAlertmanagers},
{OrgID: ruleKey2.OrgID, Alertmanagers: []string{fakeAM.Server.URL}},
{OrgID: ruleKey1.OrgID, SendAlertsTo: models.AllAlertmanagers},
{OrgID: ruleKey2.OrgID},
}, nil)
// If we sync again, new externalAlertmanagers must have spawned.
@@ -157,10 +189,20 @@ func TestSendingToExternalAlertmanager_WithMultipleOrgs(t *testing.T) {
// 2. Next, let's modify the configuration of an organization by adding an extra alertmanager.
fakeAM2 := NewFakeExternalAlertmanager(t)
ds3 := datasources.DataSource{
Url: fakeAM2.Server.URL,
OrgId: ruleKey2.OrgID,
Type: datasources.DS_ALERTMANAGER,
JsonData: simplejson.NewFromAny(map[string]interface{}{
"handleGrafanaManagedAlerts": true,
"implementation": "prometheus",
}),
}
fakeDs.DataSources = append(fakeDs.DataSources, &ds3)
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
{OrgID: ruleKey1.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.AllAlertmanagers},
{OrgID: ruleKey2.OrgID, Alertmanagers: []string{fakeAM.Server.URL, fakeAM2.Server.URL}},
{OrgID: ruleKey1.OrgID, SendAlertsTo: models.AllAlertmanagers},
{OrgID: ruleKey2.OrgID},
}, nil)
// Before we sync, let's grab the existing hash of this particular org.
@@ -177,9 +219,10 @@ func TestSendingToExternalAlertmanager_WithMultipleOrgs(t *testing.T) {
assertAlertmanagersStatusForOrg(t, alertsRouter, ruleKey2.OrgID, 2, 0)
// 3. Now, let's provide a configuration that fails for OrgID = 1.
fakeDs.DataSources[0].Url = "123://invalid.org"
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
{OrgID: ruleKey1.OrgID, Alertmanagers: []string{"123://invalid.org"}, SendAlertsTo: models.AllAlertmanagers},
{OrgID: ruleKey2.OrgID, Alertmanagers: []string{fakeAM.Server.URL, fakeAM2.Server.URL}},
{OrgID: ruleKey1.OrgID, SendAlertsTo: models.AllAlertmanagers},
{OrgID: ruleKey2.OrgID},
}, nil)
// Before we sync, let's get the current config hash.
@@ -188,14 +231,15 @@ func TestSendingToExternalAlertmanager_WithMultipleOrgs(t *testing.T) {
// Now, sync again.
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
// The old configuration should still be running.
require.Equal(t, alertsRouter.externalAlertmanagersCfgHash[ruleKey1.OrgID], currentHash)
require.Equal(t, 1, len(alertsRouter.AlertmanagersFor(ruleKey1.OrgID)))
// The old configuration should not be running.
require.NotEqual(t, alertsRouter.externalAlertmanagersCfgHash[ruleKey1.OrgID], currentHash)
require.Equal(t, 0, len(alertsRouter.AlertmanagersFor(ruleKey1.OrgID)))
// If we fix it - it should be applied.
fakeDs.DataSources[0].Url = "notarealalertmanager:3030"
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
{OrgID: ruleKey1.OrgID, Alertmanagers: []string{"notarealalertmanager:3030"}, SendAlertsTo: models.AllAlertmanagers},
{OrgID: ruleKey2.OrgID, Alertmanagers: []string{fakeAM.Server.URL, fakeAM2.Server.URL}},
{OrgID: ruleKey1.OrgID, SendAlertsTo: models.AllAlertmanagers},
{OrgID: ruleKey2.OrgID},
}, nil)
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
@@ -232,11 +276,20 @@ func TestChangingAlertmanagersChoice(t *testing.T) {
Host: "localhost",
}
ds := datasources.DataSource{
Url: fakeAM.Server.URL,
OrgId: ruleKey.OrgID,
Type: datasources.DS_ALERTMANAGER,
JsonData: simplejson.NewFromAny(map[string]interface{}{
"handleGrafanaManagedAlerts": true,
"implementation": "prometheus",
}),
}
alertsRouter := NewAlertsRouter(moa, fakeAdminConfigStore, mockedClock, appUrl, map[int64]struct{}{},
10*time.Minute, &fake_ds.FakeDataSourceService{}, fake_secrets.NewFakeSecretsService())
10*time.Minute, &fake_ds.FakeDataSourceService{DataSources: []*datasources.DataSource{&ds}}, fake_secrets.NewFakeSecretsService())
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
{OrgID: ruleKey.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.AllAlertmanagers},
{OrgID: ruleKey.OrgID, SendAlertsTo: models.AllAlertmanagers},
}, nil)
// Make sure we sync the configuration at least once before the evaluation happens to guarantee the sender is running
// when the first alert triggers.
@@ -262,7 +315,7 @@ func TestChangingAlertmanagersChoice(t *testing.T) {
// Now, let's change the Alertmanagers choice to send only to the external Alertmanager.
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
{OrgID: ruleKey.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.ExternalAlertmanagers},
{OrgID: ruleKey.OrgID, SendAlertsTo: models.ExternalAlertmanagers},
}, nil)
// Again, make sure we sync and verify the externalAlertmanagers.
require.NoError(t, alertsRouter.SyncAndApplyConfigFromDatabase())
@@ -274,7 +327,7 @@ func TestChangingAlertmanagersChoice(t *testing.T) {
// Finally, let's change the Alertmanagers choice to send only to the internal Alertmanager.
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
{OrgID: ruleKey.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.InternalAlertmanager},
{OrgID: ruleKey.OrgID, SendAlertsTo: models.InternalAlertmanager},
}, nil)
// Again, make sure we sync and verify the externalAlertmanagers.
@@ -441,3 +494,62 @@ func TestBuildExternalURL(t *testing.T) {
})
}
}
func TestAlertManegers_asSHA256(t *testing.T) {
tc := []struct {
name string
amUrls []string
ciphertext string
}{
{
name: "asSHA256",
amUrls: []string{"http://localhost:9093"},
ciphertext: "3ec9db375a5ba12f7c7b704922cf4b8e21a31e30d85be2386803829f0ee24410",
},
}
for _, tt := range tc {
t.Run(tt.name, func(t *testing.T) {
require.Equal(t, tt.ciphertext, asSHA256(tt.amUrls))
})
}
}
func TestAlertManagers_buildRedactedAMs(t *testing.T) {
fakeLogger := logtest.Fake{}
tc := []struct {
name string
orgId int64
amUrls []string
errCalls int
errLog string
errCtx []interface{}
expected []string
}{
{
name: "buildRedactedAMs",
orgId: 1,
amUrls: []string{"http://user:password@localhost:9093"},
errCalls: 0,
errLog: "",
expected: []string{"http://user:xxxxx@localhost:9093"},
},
{
name: "Error building redacted AM URLs",
orgId: 2,
amUrls: []string{"1234://user:password@localhost:9094"},
errCalls: 1,
errLog: "Failed to parse alertmanager string",
expected: nil,
},
}
for _, tt := range tc {
t.Run(tt.name, func(t *testing.T) {
require.Equal(t, tt.expected, buildRedactedAMs(&fakeLogger, tt.amUrls, tt.orgId))
require.Equal(t, tt.errCalls, fakeLogger.ErrorLogs.Calls)
require.Equal(t, tt.errLog, fakeLogger.ErrorLogs.Message)
})
}
}

View File

@@ -14,8 +14,6 @@ import (
"github.com/grafana/grafana/pkg/infra/log"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/prometheus/alertmanager/api/v2/models"
"github.com/prometheus/client_golang/prometheus"
common_config "github.com/prometheus/common/config"
@@ -63,13 +61,13 @@ func NewExternalAlertmanagerSender() *ExternalAlertmanager {
}
// ApplyConfig syncs a configuration with the sender.
func (s *ExternalAlertmanager) ApplyConfig(cfg *ngmodels.AdminConfiguration) error {
notifierCfg, err := buildNotifierConfig(cfg)
func (s *ExternalAlertmanager) ApplyConfig(orgId, id int64, alertmanagers []string) error {
notifierCfg, err := buildNotifierConfig(alertmanagers)
if err != nil {
return err
}
s.logger = s.logger.New("org", cfg.OrgID, "cfg", cfg.ID)
s.logger = s.logger.New("org", orgId, "cfg", id)
s.logger.Info("Synchronizing config with external Alertmanager group")
if err := s.manager.ApplyConfig(notifierCfg); err != nil {
@@ -134,9 +132,9 @@ func (s *ExternalAlertmanager) DroppedAlertmanagers() []*url.URL {
return s.manager.DroppedAlertmanagers()
}
func buildNotifierConfig(cfg *ngmodels.AdminConfiguration) (*config.Config, error) {
amConfigs := make([]*config.AlertmanagerConfig, 0, len(cfg.Alertmanagers))
for _, amURL := range cfg.Alertmanagers {
func buildNotifierConfig(alertmanagers []string) (*config.Config, error) {
amConfigs := make([]*config.AlertmanagerConfig, 0, len(alertmanagers))
for _, amURL := range alertmanagers {
u, err := url.Parse(amURL)
if err != nil {
return nil, err