Alerting: take datasources as external alertmanagers into consideration (#52534)

This commit is contained in:
Jean-Philippe Quéméner
2022-07-20 16:50:49 +02:00
committed by GitHub
parent 5c4aa4a7ac
commit 50ae42130b
12 changed files with 332 additions and 24 deletions

View File

@@ -3,6 +3,7 @@ package sender
import (
"context"
"errors"
"fmt"
"net/url"
"sync"
"time"
@@ -10,10 +11,12 @@ import (
"github.com/benbjohnson/clock"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/datasources"
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/notifier"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/services/secrets"
)
// AlertsRouter handles alerts generated during alert rule evaluation.
@@ -38,9 +41,14 @@ type AlertsRouter struct {
appURL *url.URL
disabledOrgs map[int64]struct{}
adminConfigPollInterval time.Duration
datasourceService datasources.DataSourceService
secretService secrets.Service
}
func NewAlertsRouter(multiOrgNotifier *notifier.MultiOrgAlertmanager, store store.AdminConfigurationStore, clk clock.Clock, appURL *url.URL, disabledOrgs map[int64]struct{}, configPollInterval time.Duration) *AlertsRouter {
func NewAlertsRouter(multiOrgNotifier *notifier.MultiOrgAlertmanager, store store.AdminConfigurationStore,
clk clock.Clock, appURL *url.URL, disabledOrgs map[int64]struct{}, configPollInterval time.Duration,
datasourceService datasources.DataSourceService, secretService secrets.Service) *AlertsRouter {
d := &AlertsRouter{
logger: log.New("alerts-router"),
clock: clk,
@@ -56,6 +64,9 @@ func NewAlertsRouter(multiOrgNotifier *notifier.MultiOrgAlertmanager, store stor
appURL: appURL,
disabledOrgs: disabledOrgs,
adminConfigPollInterval: configPollInterval,
datasourceService: datasourceService,
secretService: secretService,
}
return d
}
@@ -87,17 +98,27 @@ func (d *AlertsRouter) SyncAndApplyConfigFromDatabase() error {
existing, ok := d.externalAlertmanagers[cfg.OrgID]
// We have no running sender and no Alertmanager(s) configured, no-op.
if !ok && len(cfg.Alertmanagers) == 0 {
d.logger.Debug("no external alertmanagers configured", "org", cfg.OrgID)
continue
}
// We have no running sender and alerts are handled internally, no-op.
if !ok && cfg.SendAlertsTo == models.InternalAlertmanager {
d.logger.Debug("alerts are handled internally", "org", cfg.OrgID)
continue
}
externalAlertmanagers, err := d.alertmanagersFromDatasources(cfg.OrgID)
if err != nil {
d.logger.Error("failed to get alertmanagers from datasources",
"org", cfg.OrgID,
"err", err)
continue
}
cfg.Alertmanagers = append(cfg.Alertmanagers, externalAlertmanagers...)
// We have no running sender and no Alertmanager(s) configured, no-op.
if !ok && len(cfg.Alertmanagers) == 0 {
d.logger.Debug("no external alertmanagers configured", "org", cfg.OrgID)
continue
}
// We have a running sender but no Alertmanager(s) configured, shut it down.
if ok && len(cfg.Alertmanagers) == 0 {
d.logger.Debug("no external alertmanager(s) configured, sender will be stopped", "org", cfg.OrgID)
@@ -165,6 +186,56 @@ func (d *AlertsRouter) SyncAndApplyConfigFromDatabase() error {
return nil
}
func (d *AlertsRouter) alertmanagersFromDatasources(orgID int64) ([]string, error) {
var alertmanagers []string
// We might have alertmanager datasources that are acting as external
// alertmanager, let's fetch them.
query := &datasources.GetDataSourcesByTypeQuery{
OrgId: orgID,
Type: datasources.DS_ALERTMANAGER,
}
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
defer cancel()
err := d.datasourceService.GetDataSourcesByType(ctx, query)
if err != nil {
return nil, fmt.Errorf("failed to fetch datasources for org: %w", err)
}
for _, ds := range query.Result {
if !ds.JsonData.Get(definitions.HandleGrafanaManagedAlerts).MustBool(false) {
continue
}
amURL, err := d.buildExternalURL(ds)
if err != nil {
d.logger.Error("failed to build external alertmanager URL",
"org", ds.OrgId,
"uid", ds.Uid,
"err", err)
continue
}
alertmanagers = append(alertmanagers, amURL)
}
return alertmanagers, nil
}
func (d *AlertsRouter) buildExternalURL(ds *datasources.DataSource) (string, error) {
amURL := ds.Url
// if basic auth is enabled we need to build the url with basic auth baked in
if !ds.BasicAuth {
return amURL, nil
}
parsed, err := url.Parse(ds.Url)
if err != nil {
return "", fmt.Errorf("failed to parse alertmanager datasource url: %w", err)
}
password := d.secretService.GetDecryptedValue(context.Background(), ds.SecureJsonData, "basicAuthPassword", "")
if password == "" {
return "", fmt.Errorf("basic auth enabled but no password set")
}
return fmt.Sprintf("%s://%s:%s@%s%s%s", parsed.Scheme, ds.BasicAuthUser,
password, parsed.Host, parsed.Path, parsed.RawQuery), nil
}
func (d *AlertsRouter) Send(key models.AlertRuleKey, alerts definitions.PostableAlerts) {
logger := d.logger.New("rule_uid", key.UID, "org", key.OrgID)
if len(alerts.PostableAlerts) == 0 {

View File

@@ -15,13 +15,15 @@ import (
"github.com/stretchr/testify/require"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/datasources"
fake_ds "github.com/grafana/grafana/pkg/services/datasources/fakes"
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/notifier"
"github.com/grafana/grafana/pkg/services/ngalert/provisioning"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/services/secrets/fakes"
fake_secrets "github.com/grafana/grafana/pkg/services/secrets/fakes"
secretsManager "github.com/grafana/grafana/pkg/services/secrets/manager"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/util"
@@ -45,7 +47,8 @@ func TestSendingToExternalAlertmanager(t *testing.T) {
Host: "localhost",
}
alertsRouter := NewAlertsRouter(moa, fakeAdminConfigStore, mockedClock, appUrl, map[int64]struct{}{}, 10*time.Minute)
alertsRouter := NewAlertsRouter(moa, fakeAdminConfigStore, mockedClock, appUrl, map[int64]struct{}{}, 10*time.Minute,
&fake_ds.FakeDataSourceService{}, fake_secrets.NewFakeSecretsService())
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
{OrgID: ruleKey.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.AllAlertmanagers},
@@ -102,7 +105,8 @@ func TestSendingToExternalAlertmanager_WithMultipleOrgs(t *testing.T) {
Host: "localhost",
}
alertsRouter := NewAlertsRouter(moa, fakeAdminConfigStore, mockedClock, appUrl, map[int64]struct{}{}, 10*time.Minute)
alertsRouter := NewAlertsRouter(moa, fakeAdminConfigStore, mockedClock, appUrl, map[int64]struct{}{}, 10*time.Minute,
&fake_ds.FakeDataSourceService{}, fake_secrets.NewFakeSecretsService())
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
{OrgID: ruleKey1.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.AllAlertmanagers},
@@ -228,7 +232,8 @@ func TestChangingAlertmanagersChoice(t *testing.T) {
Host: "localhost",
}
alertsRouter := NewAlertsRouter(moa, fakeAdminConfigStore, mockedClock, appUrl, map[int64]struct{}{}, 10*time.Minute)
alertsRouter := NewAlertsRouter(moa, fakeAdminConfigStore, mockedClock, appUrl, map[int64]struct{}{},
10*time.Minute, &fake_ds.FakeDataSourceService{}, fake_secrets.NewFakeSecretsService())
mockedGetAdminConfigurations.Return([]*models.AdminConfiguration{
{OrgID: ruleKey.OrgID, Alertmanagers: []string{fakeAM.Server.URL}, SendAlertsTo: models.AllAlertmanagers},
@@ -344,7 +349,7 @@ func createMultiOrgAlertmanager(t *testing.T, orgs []int64) *notifier.MultiOrgAl
kvStore := notifier.NewFakeKVStore(t)
registry := prometheus.NewPedanticRegistry()
m := metrics.NewNGAlert(registry)
secretsService := secretsManager.SetupTestService(t, fakes.NewFakeSecretsStore())
secretsService := secretsManager.SetupTestService(t, fake_secrets.NewFakeSecretsStore())
decryptFn := secretsService.GetDecryptedValue
moa, err := notifier.NewMultiOrgAlertmanager(cfg, &cfgStore, &orgStore, kvStore, provisioning.NewFakeProvisioningStore(), decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService)
require.NoError(t, err)
@@ -360,3 +365,60 @@ func createMultiOrgAlertmanager(t *testing.T, orgs []int64) *notifier.MultiOrgAl
}, 10*time.Second, 100*time.Millisecond)
return moa
}
func TestBuildExternalURL(t *testing.T) {
sch := AlertsRouter{
secretService: fake_secrets.NewFakeSecretsService(),
}
tests := []struct {
name string
ds *datasources.DataSource
expectedURL string
}{
{
name: "datasource without auth",
ds: &datasources.DataSource{
Url: "https://localhost:9000",
},
expectedURL: "https://localhost:9000",
},
{
name: "datasource without auth and with path",
ds: &datasources.DataSource{
Url: "https://localhost:9000/path/to/am",
},
expectedURL: "https://localhost:9000/path/to/am",
},
{
name: "datasource with auth",
ds: &datasources.DataSource{
Url: "https://localhost:9000",
BasicAuth: true,
BasicAuthUser: "johndoe",
SecureJsonData: map[string][]byte{
"basicAuthPassword": []byte("123"),
},
},
expectedURL: "https://johndoe:123@localhost:9000",
},
{
name: "datasource with auth and path",
ds: &datasources.DataSource{
Url: "https://localhost:9000/path/to/am",
BasicAuth: true,
BasicAuthUser: "johndoe",
SecureJsonData: map[string][]byte{
"basicAuthPassword": []byte("123"),
},
},
expectedURL: "https://johndoe:123@localhost:9000/path/to/am",
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
url, err := sch.buildExternalURL(test.ds)
require.NoError(t, err)
require.Equal(t, test.expectedURL, url)
})
}
}