grafana/pkg/services/ngalert/notifier/alertmanager_config.go

317 lines
12 KiB
Go
Raw Normal View History

package notifier
import (
"context"
"errors"
"fmt"
"time"
"github.com/go-openapi/strfmt"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/util"
)
type UnknownReceiverError struct {
UID string
}
func (e UnknownReceiverError) Error() string {
return fmt.Sprintf("unknown receiver: %s", e.UID)
}
type AlertmanagerConfigRejectedError struct {
Inner error
}
func (e AlertmanagerConfigRejectedError) Error() string {
return fmt.Sprintf("failed to save and apply Alertmanager configuration: %s", e.Inner.Error())
}
type configurationStore interface {
GetLatestAlertmanagerConfiguration(ctx context.Context, orgID int64) (*models.AlertConfiguration, error)
}
// ApplyConfig will apply the given alertmanager configuration for a given org.
// Can be used to force regeneration of autogenerated routes.
func (moa *MultiOrgAlertmanager) ApplyConfig(ctx context.Context, orgId int64, dbConfig *models.AlertConfiguration) error {
am, err := moa.AlertmanagerFor(orgId)
if err != nil {
// It's okay if the alertmanager isn't ready yet, we're changing its config anyway.
if !errors.Is(err, ErrAlertmanagerNotReady) {
return err
}
}
err = am.ApplyConfig(ctx, dbConfig)
if err != nil {
return fmt.Errorf("failed to apply configuration: %w", err)
}
return nil
}
// GetAlertmanagerConfiguration returns the latest alertmanager configuration for a given org.
// If withAutogen is true, the configuration will be augmented with autogenerated routes.
func (moa *MultiOrgAlertmanager) GetAlertmanagerConfiguration(ctx context.Context, org int64, withAutogen bool) (definitions.GettableUserConfig, error) {
amConfig, err := moa.configStore.GetLatestAlertmanagerConfiguration(ctx, org)
if err != nil {
return definitions.GettableUserConfig{}, fmt.Errorf("failed to get latest configuration: %w", err)
}
cfg, err := moa.gettableUserConfigFromAMConfigString(ctx, org, amConfig.AlertmanagerConfiguration)
if err != nil {
return definitions.GettableUserConfig{}, err
}
if moa.featureManager.IsEnabled(ctx, featuremgmt.FlagAlertingSimplifiedRouting) && withAutogen {
// We validate the notification settings in a similar way to when we POST.
// Otherwise, broken settings (e.g. a receiver that doesn't exist) will cause the config returned here to be
// different than the config currently in-use.
// TODO: Preferably, we'd be getting the config directly from the in-memory AM so adding the autogen config would not be necessary.
err := AddAutogenConfig(ctx, moa.logger, moa.configStore, org, &cfg.AlertmanagerConfig, true)
if err != nil {
return definitions.GettableUserConfig{}, err
}
}
return cfg, nil
}
// ActivateHistoricalConfiguration will set the current alertmanager configuration to a previous value based on the provided
// alert_configuration_history id.
func (moa *MultiOrgAlertmanager) ActivateHistoricalConfiguration(ctx context.Context, orgId int64, id int64) error {
config, err := moa.configStore.GetHistoricalConfiguration(ctx, orgId, id)
if err != nil {
return fmt.Errorf("failed to get historical alertmanager configuration: %w", err)
}
cfg, err := Load([]byte(config.AlertmanagerConfiguration))
if err != nil {
return fmt.Errorf("failed to unmarshal historical alertmanager configuration: %w", err)
}
am, err := moa.AlertmanagerFor(orgId)
if err != nil {
// It's okay if the alertmanager isn't ready yet, we're changing its config anyway.
if !errors.Is(err, ErrAlertmanagerNotReady) {
return err
}
}
if err := am.SaveAndApplyConfig(ctx, cfg); err != nil {
moa.logger.Error("Unable to save and apply historical alertmanager configuration", "error", err, "org", orgId, "id", id)
return AlertmanagerConfigRejectedError{err}
}
moa.logger.Info("Applied historical alertmanager configuration", "org", orgId, "id", id)
return nil
}
// GetAppliedAlertmanagerConfigurations returns the last n configurations marked as applied for a given org.
func (moa *MultiOrgAlertmanager) GetAppliedAlertmanagerConfigurations(ctx context.Context, org int64, limit int) ([]*definitions.GettableHistoricUserConfig, error) {
configs, err := moa.configStore.GetAppliedConfigurations(ctx, org, limit)
if err != nil {
return []*definitions.GettableHistoricUserConfig{}, fmt.Errorf("failed to get applied configurations: %w", err)
}
gettableHistoricConfigs := make([]*definitions.GettableHistoricUserConfig, 0, len(configs))
for _, config := range configs {
appliedAt := strfmt.DateTime(time.Unix(config.LastApplied, 0).UTC())
gettableConfig, err := moa.gettableUserConfigFromAMConfigString(ctx, org, config.AlertmanagerConfiguration)
if err != nil {
// If there are invalid records, skip them and return the valid ones.
moa.logger.Warn("Invalid configuration found in alert configuration history table", "id", config.ID, "orgID", org)
continue
}
gettableHistoricConfig := definitions.GettableHistoricUserConfig{
ID: config.ID,
TemplateFiles: gettableConfig.TemplateFiles,
TemplateFileProvenances: gettableConfig.TemplateFileProvenances,
AlertmanagerConfig: gettableConfig.AlertmanagerConfig,
LastApplied: &appliedAt,
}
gettableHistoricConfigs = append(gettableHistoricConfigs, &gettableHistoricConfig)
}
return gettableHistoricConfigs, nil
}
func (moa *MultiOrgAlertmanager) gettableUserConfigFromAMConfigString(ctx context.Context, orgID int64, config string) (definitions.GettableUserConfig, error) {
cfg, err := Load([]byte(config))
if err != nil {
return definitions.GettableUserConfig{}, fmt.Errorf("failed to unmarshal alertmanager configuration: %w", err)
}
result := definitions.GettableUserConfig{
TemplateFiles: cfg.TemplateFiles,
AlertmanagerConfig: definitions.GettableApiAlertingConfig{
Config: cfg.AlertmanagerConfig.Config,
},
}
for _, recv := range cfg.AlertmanagerConfig.Receivers {
receivers := make([]*definitions.GettableGrafanaReceiver, 0, len(recv.PostableGrafanaReceivers.GrafanaManagedReceivers))
for _, pr := range recv.PostableGrafanaReceivers.GrafanaManagedReceivers {
secureFields := make(map[string]bool, len(pr.SecureSettings))
for k := range pr.SecureSettings {
decryptedValue, err := moa.Crypto.getDecryptedSecret(pr, k)
if err != nil {
return definitions.GettableUserConfig{}, fmt.Errorf("failed to decrypt stored secure setting: %w", err)
}
if decryptedValue == "" {
continue
}
secureFields[k] = true
}
gr := definitions.GettableGrafanaReceiver{
UID: pr.UID,
Name: pr.Name,
Type: pr.Type,
DisableResolveMessage: pr.DisableResolveMessage,
Settings: pr.Settings,
SecureFields: secureFields,
}
receivers = append(receivers, &gr)
}
gettableApiReceiver := definitions.GettableApiReceiver{
GettableGrafanaReceivers: definitions.GettableGrafanaReceivers{
GrafanaManagedReceivers: receivers,
},
}
gettableApiReceiver.Name = recv.Name
result.AlertmanagerConfig.Receivers = append(result.AlertmanagerConfig.Receivers, &gettableApiReceiver)
}
result, err = moa.mergeProvenance(ctx, result, orgID)
if err != nil {
return definitions.GettableUserConfig{}, err
}
return result, nil
}
func (moa *MultiOrgAlertmanager) SaveAndApplyAlertmanagerConfiguration(ctx context.Context, org int64, config definitions.PostableUserConfig) error {
// We cannot add this validation to PostableUserConfig as that struct is used for both
// Grafana Alertmanager (where inhibition rules are not supported) and External Alertmanagers
// (including Mimir) where inhibition rules are supported.
if len(config.AlertmanagerConfig.InhibitRules) > 0 {
return errors.New("inhibition rules are not supported")
}
// Get the last known working configuration
_, err := moa.configStore.GetLatestAlertmanagerConfiguration(ctx, org)
if err != nil {
// If we don't have a configuration there's nothing for us to know and we should just continue saving the new one
if !errors.Is(err, store.ErrNoAlertmanagerConfiguration) {
return fmt.Errorf("failed to get latest configuration %w", err)
}
}
if err := moa.Crypto.ProcessSecureSettings(ctx, org, config.AlertmanagerConfig.Receivers); err != nil {
Alerting: Fix Alertmanager change detection for receivers with secure settings (#71307) * Alerting: Make ApplyAlertmanagerConfiguration only decrypt/encrypt new/changed secure settings Previously, ApplyAlertmanagerConfiguration would decrypt and re-encrypt all secure settings. However, this caused re-encrypted secure settings to be included in the raw configuration when applied to the embedded alertmanager, resulting in changes to the hash. Consequently, even if no actual modifications were made, saving any alertmanager configuration triggered an apply/restart and created a new historical entry in the database. To address the issue, this modifies ApplyAlertmanagerConfiguration, which is called by POST `api/alertmanager/grafana/config/api/v1/alerts`, to decrypt and re-encrypt only new and updated secure settings. Unchanged secure settings are loaded directly from the database without alteration. We determine whether secure settings have changed based on the following (already in-use) assumption: Only new or updated secure settings are provided via the POST `api/alertmanager/grafana/config/api/v1/alerts` request, while existing unchanged settings are omitted. * Ensure saving a grafana-managed contact point will only send new/changed secure settings Previously, when saving a grafana-managed contact point, empty string values were transmitted for all unset secure settings. This led to potential backend issues, as it assumed that only newly added or updated secure settings would be provided. To address this, we now exclude empty ('', null, undefined) secure settings, unless there was a pre-existing entry in secureFields for that specific setting. In essence, this means we only transmit an empty secure setting if a previously configured value was cleared. * Fix linting * refactor omitEmptyUnlessExisting * fixup --------- Co-authored-by: Gilles De Mey <gilles.de.mey@gmail.com>
2023-07-11 01:23:07 -05:00
return fmt.Errorf("failed to post process Alertmanager configuration: %w", err)
}
if err := assignReceiverConfigsUIDs(config.AlertmanagerConfig.Receivers); err != nil {
return fmt.Errorf("failed to assign missing uids: %w", err)
}
am, err := moa.AlertmanagerFor(org)
if err != nil {
// It's okay if the alertmanager isn't ready yet, we're changing its config anyway.
if !errors.Is(err, ErrAlertmanagerNotReady) {
return err
}
}
if err := am.SaveAndApplyConfig(ctx, &config); err != nil {
moa.logger.Error("Unable to save and apply alertmanager configuration", "error", err)
return AlertmanagerConfigRejectedError{err}
}
return nil
}
// assignReceiverConfigsUIDs assigns missing UUIDs to receiver configs.
func assignReceiverConfigsUIDs(c []*definitions.PostableApiReceiver) error {
seenUIDs := make(map[string]struct{})
// encrypt secure settings for storing them in DB
for _, r := range c {
switch r.Type() {
case definitions.GrafanaReceiverType:
for _, gr := range r.PostableGrafanaReceivers.GrafanaManagedReceivers {
if gr.UID == "" {
retries := 5
for i := 0; i < retries; i++ {
gen := util.GenerateShortUID()
_, ok := seenUIDs[gen]
if !ok {
gr.UID = gen
break
}
}
if gr.UID == "" {
return fmt.Errorf("all %d attempts to generate UID for receiver have failed; please retry", retries)
}
}
seenUIDs[gr.UID] = struct{}{}
}
default:
}
}
return nil
}
type provisioningStore interface {
GetProvenance(ctx context.Context, o models.Provisionable, org int64) (models.Provenance, error)
GetProvenances(ctx context.Context, org int64, resourceType string) (map[string]models.Provenance, error)
SetProvenance(ctx context.Context, o models.Provisionable, org int64, p models.Provenance) error
DeleteProvenance(ctx context.Context, o models.Provisionable, org int64) error
}
func (moa *MultiOrgAlertmanager) mergeProvenance(ctx context.Context, config definitions.GettableUserConfig, org int64) (definitions.GettableUserConfig, error) {
if config.AlertmanagerConfig.Route != nil {
provenance, err := moa.ProvStore.GetProvenance(ctx, config.AlertmanagerConfig.Route, org)
if err != nil {
return definitions.GettableUserConfig{}, err
}
config.AlertmanagerConfig.Route.Provenance = definitions.Provenance(provenance)
}
cp := definitions.EmbeddedContactPoint{}
cpProvs, err := moa.ProvStore.GetProvenances(ctx, org, cp.ResourceType())
if err != nil {
return definitions.GettableUserConfig{}, err
}
for _, receiver := range config.AlertmanagerConfig.Receivers {
for _, contactPoint := range receiver.GrafanaManagedReceivers {
if provenance, exists := cpProvs[contactPoint.UID]; exists {
contactPoint.Provenance = definitions.Provenance(provenance)
}
}
}
tmpl := definitions.NotificationTemplate{}
tmplProvs, err := moa.ProvStore.GetProvenances(ctx, org, tmpl.ResourceType())
if err != nil {
return definitions.GettableUserConfig{}, nil
}
config.TemplateFileProvenances = make(map[string]definitions.Provenance, len(tmplProvs))
for key, provenance := range tmplProvs {
config.TemplateFileProvenances[key] = definitions.Provenance(provenance)
}
mt := definitions.MuteTimeInterval{}
mtProvs, err := moa.ProvStore.GetProvenances(ctx, org, mt.ResourceType())
if err != nil {
return definitions.GettableUserConfig{}, nil
}
config.AlertmanagerConfig.MuteTimeProvenances = make(map[string]definitions.Provenance, len(mtProvs))
for key, provenance := range mtProvs {
config.AlertmanagerConfig.MuteTimeProvenances[key] = definitions.Provenance(provenance)
}
return config, nil
}