mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Promote configuration in the remote Alertmanager (#87388)
This commit is contained in:
@@ -191,20 +191,27 @@ func (ng *AlertNG) init() error {
|
||||
}
|
||||
|
||||
// Create remote Alertmanager.
|
||||
remoteAM, err := createRemoteAlertmanager(orgID, ng.Cfg.UnifiedAlerting.RemoteAlertmanager, ng.KVStore, ng.SecretsService.Decrypt, ng.Cfg.UnifiedAlerting.DefaultConfiguration, m)
|
||||
cfg := remote.AlertmanagerConfig{
|
||||
BasicAuthPassword: ng.Cfg.UnifiedAlerting.RemoteAlertmanager.Password,
|
||||
DefaultConfig: ng.Cfg.UnifiedAlerting.DefaultConfiguration,
|
||||
OrgID: orgID,
|
||||
TenantID: ng.Cfg.UnifiedAlerting.RemoteAlertmanager.TenantID,
|
||||
URL: ng.Cfg.UnifiedAlerting.RemoteAlertmanager.URL,
|
||||
}
|
||||
remoteAM, err := createRemoteAlertmanager(cfg, ng.KVStore, ng.SecretsService.Decrypt, m)
|
||||
if err != nil {
|
||||
moaLogger.Error("Failed to create remote Alertmanager, falling back to using only the internal one", "err", err)
|
||||
return internalAM, nil
|
||||
}
|
||||
|
||||
// Use both Alertmanager implementations in the forked Alertmanager.
|
||||
cfg := remote.RemoteSecondaryConfig{
|
||||
rsCfg := remote.RemoteSecondaryConfig{
|
||||
Logger: log.New("ngalert.forked-alertmanager.remote-secondary"),
|
||||
OrgID: orgID,
|
||||
Store: ng.store,
|
||||
SyncInterval: ng.Cfg.UnifiedAlerting.RemoteAlertmanager.SyncInterval,
|
||||
}
|
||||
return remote.NewRemoteSecondaryForkedAlertmanager(cfg, internalAM, remoteAM)
|
||||
return remote.NewRemoteSecondaryForkedAlertmanager(rsCfg, internalAM, remoteAM)
|
||||
}
|
||||
})
|
||||
|
||||
@@ -540,12 +547,6 @@ func ApplyStateHistoryFeatureToggles(cfg *setting.UnifiedAlertingStateHistorySet
|
||||
}
|
||||
}
|
||||
|
||||
func createRemoteAlertmanager(orgID int64, amCfg setting.RemoteAlertmanagerSettings, kvstore kvstore.KVStore, decryptFn remote.DecryptFn, defaultConfig string, m *metrics.RemoteAlertmanager) (*remote.Alertmanager, error) {
|
||||
externalAMCfg := remote.AlertmanagerConfig{
|
||||
OrgID: orgID,
|
||||
URL: amCfg.URL,
|
||||
TenantID: amCfg.TenantID,
|
||||
BasicAuthPassword: amCfg.Password,
|
||||
}
|
||||
return remote.NewAlertmanager(externalAMCfg, notifier.NewFileStore(orgID, kvstore), decryptFn, defaultConfig, m)
|
||||
func createRemoteAlertmanager(cfg remote.AlertmanagerConfig, kvstore kvstore.KVStore, decryptFn remote.DecryptFn, m *metrics.RemoteAlertmanager) (*remote.Alertmanager, error) {
|
||||
return remote.NewAlertmanager(cfg, notifier.NewFileStore(cfg.OrgID, kvstore), decryptFn, m)
|
||||
}
|
||||
|
||||
@@ -63,9 +63,10 @@ func TestMultiorgAlertmanager_RemoteSecondaryMode(t *testing.T) {
|
||||
URL: testsrv.URL,
|
||||
TenantID: tenantID,
|
||||
BasicAuthPassword: password,
|
||||
DefaultConfig: setting.GetAlertmanagerDefaultConfiguration(),
|
||||
}
|
||||
m := metrics.NewRemoteAlertmanagerMetrics(prometheus.NewRegistry())
|
||||
remoteAM, err := remote.NewAlertmanager(externalAMCfg, notifier.NewFileStore(orgID, kvStore), secretsService.Decrypt, setting.GetAlertmanagerDefaultConfiguration(), m)
|
||||
remoteAM, err := remote.NewAlertmanager(externalAMCfg, notifier.NewFileStore(orgID, kvStore), secretsService.Decrypt, m)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Use both Alertmanager implementations in the forked Alertmanager.
|
||||
|
||||
@@ -61,6 +61,11 @@ type AlertmanagerConfig struct {
|
||||
URL string
|
||||
TenantID string
|
||||
BasicAuthPassword string
|
||||
|
||||
DefaultConfig string
|
||||
// PromoteConfig is a flag that determines whether the configuration should be used in the remote Alertmanager.
|
||||
// The same flag is used for promoting state.
|
||||
PromoteConfig bool
|
||||
}
|
||||
|
||||
func (cfg *AlertmanagerConfig) Validate() error {
|
||||
@@ -78,7 +83,7 @@ func (cfg *AlertmanagerConfig) Validate() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func NewAlertmanager(cfg AlertmanagerConfig, store stateStore, decryptFn DecryptFn, defaultConfig string, metrics *metrics.RemoteAlertmanager) (*Alertmanager, error) {
|
||||
func NewAlertmanager(cfg AlertmanagerConfig, store stateStore, decryptFn DecryptFn, metrics *metrics.RemoteAlertmanager) (*Alertmanager, error) {
|
||||
if err := cfg.Validate(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -90,10 +95,11 @@ func NewAlertmanager(cfg AlertmanagerConfig, store stateStore, decryptFn Decrypt
|
||||
logger := log.New("ngalert.remote.alertmanager")
|
||||
|
||||
mcCfg := &remoteClient.Config{
|
||||
URL: u,
|
||||
TenantID: cfg.TenantID,
|
||||
Password: cfg.BasicAuthPassword,
|
||||
Logger: logger,
|
||||
Logger: logger,
|
||||
Password: cfg.BasicAuthPassword,
|
||||
TenantID: cfg.TenantID,
|
||||
URL: u,
|
||||
PromoteConfig: cfg.PromoteConfig,
|
||||
}
|
||||
mc, err := remoteClient.New(mcCfg, metrics)
|
||||
if err != nil {
|
||||
@@ -124,7 +130,7 @@ func NewAlertmanager(cfg AlertmanagerConfig, store stateStore, decryptFn Decrypt
|
||||
}
|
||||
|
||||
// Parse the default configuration into a postable config.
|
||||
pCfg, err := notifier.Load([]byte(defaultConfig))
|
||||
pCfg, err := notifier.Load([]byte(cfg.DefaultConfig))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -529,6 +535,10 @@ func (am *Alertmanager) shouldSendConfig(ctx context.Context, config *apimodels.
|
||||
return true
|
||||
}
|
||||
|
||||
if rc.Promoted != am.mimirClient.ShouldPromoteConfig() {
|
||||
return true
|
||||
}
|
||||
|
||||
rawRemote, err := json.Marshal(rc.GrafanaAlertmanagerConfig)
|
||||
if err != nil {
|
||||
am.log.Error("Unable to marshal the remote Alertmanager configuration for comparison", "err", err)
|
||||
|
||||
@@ -99,9 +99,10 @@ func TestNewAlertmanager(t *testing.T) {
|
||||
URL: test.url,
|
||||
TenantID: test.tenantID,
|
||||
BasicAuthPassword: test.password,
|
||||
DefaultConfig: defaultGrafanaConfig,
|
||||
}
|
||||
m := metrics.NewRemoteAlertmanagerMetrics(prometheus.NewRegistry())
|
||||
am, err := NewAlertmanager(cfg, nil, secretsService.Decrypt, defaultGrafanaConfig, m)
|
||||
am, err := NewAlertmanager(cfg, nil, secretsService.Decrypt, m)
|
||||
if test.expErr != "" {
|
||||
require.EqualError(tt, err, test.expErr)
|
||||
return
|
||||
@@ -121,16 +122,11 @@ func TestApplyConfig(t *testing.T) {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
})
|
||||
|
||||
var configSent string
|
||||
var configSent client.UserGrafanaConfig
|
||||
okHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method == http.MethodPost && strings.Contains(r.URL.Path, "/config") {
|
||||
var c client.UserGrafanaConfig
|
||||
require.NoError(t, json.NewDecoder(r.Body).Decode(&c))
|
||||
amCfg, err := json.Marshal(c.GrafanaAlertmanagerConfig)
|
||||
require.NoError(t, err)
|
||||
configSent = string(amCfg)
|
||||
require.NoError(t, json.NewDecoder(r.Body).Decode(&configSent))
|
||||
}
|
||||
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
|
||||
@@ -152,9 +148,11 @@ func TestApplyConfig(t *testing.T) {
|
||||
// A non-200 response should result in an error.
|
||||
server := httptest.NewServer(errorHandler)
|
||||
cfg := AlertmanagerConfig{
|
||||
OrgID: 1,
|
||||
TenantID: "test",
|
||||
URL: server.URL,
|
||||
OrgID: 1,
|
||||
TenantID: "test",
|
||||
URL: server.URL,
|
||||
DefaultConfig: defaultGrafanaConfig,
|
||||
PromoteConfig: true,
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
@@ -165,7 +163,7 @@ func TestApplyConfig(t *testing.T) {
|
||||
|
||||
// An error response from the remote Alertmanager should result in the readiness check failing.
|
||||
m := metrics.NewRemoteAlertmanagerMetrics(prometheus.NewRegistry())
|
||||
am, err := NewAlertmanager(cfg, fstore, secretsService.Decrypt, defaultGrafanaConfig, m)
|
||||
am, err := NewAlertmanager(cfg, fstore, secretsService.Decrypt, m)
|
||||
require.NoError(t, err)
|
||||
|
||||
config := &ngmodels.AlertConfiguration{
|
||||
@@ -179,8 +177,11 @@ func TestApplyConfig(t *testing.T) {
|
||||
require.NoError(t, am.ApplyConfig(ctx, config))
|
||||
require.True(t, am.Ready())
|
||||
|
||||
// Secrets in the sent configuration should be unencrypted.
|
||||
require.JSONEq(t, testGrafanaConfigWithSecret, configSent)
|
||||
// The sent configuration should be unencrypted and promoted.
|
||||
amCfg, err := json.Marshal(configSent.GrafanaAlertmanagerConfig)
|
||||
require.NoError(t, err)
|
||||
require.JSONEq(t, testGrafanaConfigWithSecret, string(amCfg))
|
||||
require.True(t, configSent.Promoted)
|
||||
|
||||
// If we already got a 200 status code response, we shouldn't make the HTTP request again.
|
||||
server.Config.Handler = errorHandler
|
||||
@@ -216,14 +217,14 @@ func TestCompareAndSendConfiguration(t *testing.T) {
|
||||
fstore := notifier.NewFileStore(1, ngfakes.NewFakeKVStore(t))
|
||||
m := metrics.NewRemoteAlertmanagerMetrics(prometheus.NewRegistry())
|
||||
cfg := AlertmanagerConfig{
|
||||
OrgID: 1,
|
||||
TenantID: "test",
|
||||
URL: server.URL,
|
||||
OrgID: 1,
|
||||
TenantID: "test",
|
||||
URL: server.URL,
|
||||
DefaultConfig: defaultGrafanaConfig,
|
||||
}
|
||||
am, err := NewAlertmanager(cfg,
|
||||
fstore,
|
||||
decryptFn,
|
||||
defaultGrafanaConfig,
|
||||
m,
|
||||
)
|
||||
require.NoError(t, err)
|
||||
@@ -298,6 +299,7 @@ func TestIntegrationRemoteAlertmanagerConfiguration(t *testing.T) {
|
||||
URL: amURL,
|
||||
TenantID: tenantID,
|
||||
BasicAuthPassword: password,
|
||||
DefaultConfig: defaultGrafanaConfig,
|
||||
}
|
||||
|
||||
testConfigHash := fmt.Sprintf("%x", md5.Sum([]byte(testGrafanaConfig)))
|
||||
@@ -319,7 +321,7 @@ func TestIntegrationRemoteAlertmanagerConfiguration(t *testing.T) {
|
||||
|
||||
secretsService := secretsManager.SetupTestService(t, database.ProvideSecretsStore(db.InitTestDB(t)))
|
||||
m := metrics.NewRemoteAlertmanagerMetrics(prometheus.NewRegistry())
|
||||
am, err := NewAlertmanager(cfg, fstore, secretsService.Decrypt, defaultGrafanaConfig, m)
|
||||
am, err := NewAlertmanager(cfg, fstore, secretsService.Decrypt, m)
|
||||
require.NoError(t, err)
|
||||
|
||||
encodedFullState, err := am.getFullState(ctx)
|
||||
@@ -461,11 +463,12 @@ func TestIntegrationRemoteAlertmanagerGetStatus(t *testing.T) {
|
||||
URL: amURL,
|
||||
TenantID: tenantID,
|
||||
BasicAuthPassword: password,
|
||||
DefaultConfig: defaultGrafanaConfig,
|
||||
}
|
||||
|
||||
secretsService := secretsManager.SetupTestService(t, fakes.NewFakeSecretsStore())
|
||||
m := metrics.NewRemoteAlertmanagerMetrics(prometheus.NewRegistry())
|
||||
am, err := NewAlertmanager(cfg, nil, secretsService.Decrypt, defaultGrafanaConfig, m)
|
||||
am, err := NewAlertmanager(cfg, nil, secretsService.Decrypt, m)
|
||||
require.NoError(t, err)
|
||||
|
||||
// We should get the default Cloud Alertmanager configuration.
|
||||
@@ -494,11 +497,12 @@ func TestIntegrationRemoteAlertmanagerSilences(t *testing.T) {
|
||||
URL: amURL,
|
||||
TenantID: tenantID,
|
||||
BasicAuthPassword: password,
|
||||
DefaultConfig: defaultGrafanaConfig,
|
||||
}
|
||||
|
||||
secretsService := secretsManager.SetupTestService(t, fakes.NewFakeSecretsStore())
|
||||
m := metrics.NewRemoteAlertmanagerMetrics(prometheus.NewRegistry())
|
||||
am, err := NewAlertmanager(cfg, nil, secretsService.Decrypt, defaultGrafanaConfig, m)
|
||||
am, err := NewAlertmanager(cfg, nil, secretsService.Decrypt, m)
|
||||
require.NoError(t, err)
|
||||
|
||||
// We should have no silences at first.
|
||||
@@ -578,11 +582,12 @@ func TestIntegrationRemoteAlertmanagerAlerts(t *testing.T) {
|
||||
URL: amURL,
|
||||
TenantID: tenantID,
|
||||
BasicAuthPassword: password,
|
||||
DefaultConfig: defaultGrafanaConfig,
|
||||
}
|
||||
|
||||
secretsService := secretsManager.SetupTestService(t, fakes.NewFakeSecretsStore())
|
||||
m := metrics.NewRemoteAlertmanagerMetrics(prometheus.NewRegistry())
|
||||
am, err := NewAlertmanager(cfg, nil, secretsService.Decrypt, defaultGrafanaConfig, m)
|
||||
am, err := NewAlertmanager(cfg, nil, secretsService.Decrypt, m)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Wait until the Alertmanager is ready to send alerts.
|
||||
@@ -646,11 +651,12 @@ func TestIntegrationRemoteAlertmanagerReceivers(t *testing.T) {
|
||||
URL: amURL,
|
||||
TenantID: tenantID,
|
||||
BasicAuthPassword: password,
|
||||
DefaultConfig: defaultGrafanaConfig,
|
||||
}
|
||||
|
||||
secretsService := secretsManager.SetupTestService(t, fakes.NewFakeSecretsStore())
|
||||
m := metrics.NewRemoteAlertmanagerMetrics(prometheus.NewRegistry())
|
||||
am, err := NewAlertmanager(cfg, nil, secretsService.Decrypt, defaultGrafanaConfig, m)
|
||||
am, err := NewAlertmanager(cfg, nil, secretsService.Decrypt, m)
|
||||
require.NoError(t, err)
|
||||
|
||||
// We should start with the default config.
|
||||
|
||||
@@ -19,6 +19,11 @@ type UserGrafanaConfig struct {
|
||||
Hash string `json:"configuration_hash"`
|
||||
CreatedAt int64 `json:"created"`
|
||||
Default bool `json:"default"`
|
||||
Promoted bool `json:"promoted"`
|
||||
}
|
||||
|
||||
func (mc *Mimir) ShouldPromoteConfig() bool {
|
||||
return mc.promoteConfig
|
||||
}
|
||||
|
||||
func (mc *Mimir) GetGrafanaAlertmanagerConfig(ctx context.Context) (*UserGrafanaConfig, error) {
|
||||
@@ -46,6 +51,7 @@ func (mc *Mimir) CreateGrafanaAlertmanagerConfig(ctx context.Context, cfg *apimo
|
||||
Hash: hash,
|
||||
CreatedAt: createdAt,
|
||||
Default: isDefault,
|
||||
Promoted: mc.promoteConfig,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
|
||||
@@ -26,13 +26,16 @@ type MimirClient interface {
|
||||
GetGrafanaAlertmanagerConfig(ctx context.Context) (*UserGrafanaConfig, error)
|
||||
CreateGrafanaAlertmanagerConfig(ctx context.Context, configuration *apimodels.PostableUserConfig, hash string, createdAt int64, isDefault bool) error
|
||||
DeleteGrafanaAlertmanagerConfig(ctx context.Context) error
|
||||
|
||||
ShouldPromoteConfig() bool
|
||||
}
|
||||
|
||||
type Mimir struct {
|
||||
client client.Requester
|
||||
endpoint *url.URL
|
||||
logger log.Logger
|
||||
metrics *metrics.RemoteAlertmanager
|
||||
client client.Requester
|
||||
endpoint *url.URL
|
||||
logger log.Logger
|
||||
metrics *metrics.RemoteAlertmanager
|
||||
promoteConfig bool
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
@@ -40,7 +43,8 @@ type Config struct {
|
||||
TenantID string
|
||||
Password string
|
||||
|
||||
Logger log.Logger
|
||||
Logger log.Logger
|
||||
PromoteConfig bool
|
||||
}
|
||||
|
||||
// successResponse represents a successful response from the Mimir API.
|
||||
@@ -76,10 +80,11 @@ func New(cfg *Config, metrics *metrics.RemoteAlertmanager) (*Mimir, error) {
|
||||
}
|
||||
|
||||
return &Mimir{
|
||||
endpoint: cfg.URL,
|
||||
client: client.NewTimedClient(c, metrics.RequestLatency),
|
||||
logger: cfg.Logger,
|
||||
metrics: metrics,
|
||||
endpoint: cfg.URL,
|
||||
client: client.NewTimedClient(c, metrics.RequestLatency),
|
||||
logger: cfg.Logger,
|
||||
metrics: metrics,
|
||||
promoteConfig: cfg.PromoteConfig,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user