mirror of
https://github.com/grafana/grafana.git
synced 2024-11-25 18:30:41 -06:00
3397e8bf09
* Alerting: Improve error when receiver used by rule is deleted * Remove RuleUID from public error and data * Improve fallback error in am config post * Refactor to expand to time intervals * Fix message on unchecked errors to be same as before
339 lines
13 KiB
Go
339 lines
13 KiB
Go
package notifier
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/go-openapi/strfmt"
|
|
|
|
"github.com/grafana/grafana/pkg/services/featuremgmt"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/models"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/store"
|
|
"github.com/grafana/grafana/pkg/util"
|
|
"github.com/grafana/grafana/pkg/util/errutil"
|
|
)
|
|
|
|
var (
|
|
// ErrAlertmanagerReceiverInUse is primarily meant for when a receiver is used by a rule and is being deleted.
|
|
ErrAlertmanagerReceiverInUse = errutil.BadRequest("alerting.notifications.alertmanager.receiverInUse").MustTemplate("receiver [Name: {{ .Public.Receiver }}] is used by rule: {{ .Error }}",
|
|
errutil.WithPublic(
|
|
"receiver [Name: {{ .Public.Receiver }}] is used by rule",
|
|
))
|
|
// ErrAlertmanagerTimeIntervalInUse is primarily meant for when a time interval is used by a rule and is being deleted.
|
|
ErrAlertmanagerTimeIntervalInUse = errutil.BadRequest("alerting.notifications.alertmanager.intervalInUse").MustTemplate("time interval [Name: {{ .Public.Interval }}] is used by rule: {{ .Error }}",
|
|
errutil.WithPublic(
|
|
"time interval [Name: {{ .Public.Interval }}] is used by rule",
|
|
))
|
|
)
|
|
|
|
type UnknownReceiverError struct {
|
|
UID string
|
|
}
|
|
|
|
func (e UnknownReceiverError) Error() string {
|
|
return fmt.Sprintf("unknown receiver: %s", e.UID)
|
|
}
|
|
|
|
type AlertmanagerConfigRejectedError struct {
|
|
Inner error
|
|
}
|
|
|
|
func (e AlertmanagerConfigRejectedError) Error() string {
|
|
return fmt.Sprintf("failed to save and apply Alertmanager configuration: %s", e.Inner.Error())
|
|
}
|
|
|
|
type configurationStore interface {
|
|
GetLatestAlertmanagerConfiguration(ctx context.Context, orgID int64) (*models.AlertConfiguration, error)
|
|
}
|
|
|
|
// ApplyConfig will apply the given alertmanager configuration for a given org.
|
|
// Can be used to force regeneration of autogenerated routes.
|
|
func (moa *MultiOrgAlertmanager) ApplyConfig(ctx context.Context, orgId int64, dbConfig *models.AlertConfiguration) error {
|
|
am, err := moa.AlertmanagerFor(orgId)
|
|
if err != nil {
|
|
// It's okay if the alertmanager isn't ready yet, we're changing its config anyway.
|
|
if !errors.Is(err, ErrAlertmanagerNotReady) {
|
|
return err
|
|
}
|
|
}
|
|
|
|
err = am.ApplyConfig(ctx, dbConfig)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to apply configuration: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// GetAlertmanagerConfiguration returns the latest alertmanager configuration for a given org.
|
|
// If withAutogen is true, the configuration will be augmented with autogenerated routes.
|
|
func (moa *MultiOrgAlertmanager) GetAlertmanagerConfiguration(ctx context.Context, org int64, withAutogen bool) (definitions.GettableUserConfig, error) {
|
|
amConfig, err := moa.configStore.GetLatestAlertmanagerConfiguration(ctx, org)
|
|
if err != nil {
|
|
return definitions.GettableUserConfig{}, fmt.Errorf("failed to get latest configuration: %w", err)
|
|
}
|
|
|
|
cfg, err := moa.gettableUserConfigFromAMConfigString(ctx, org, amConfig.AlertmanagerConfiguration)
|
|
if err != nil {
|
|
return definitions.GettableUserConfig{}, err
|
|
}
|
|
|
|
if moa.featureManager.IsEnabled(ctx, featuremgmt.FlagAlertingSimplifiedRouting) && withAutogen {
|
|
// We validate the notification settings in a similar way to when we POST.
|
|
// Otherwise, broken settings (e.g. a receiver that doesn't exist) will cause the config returned here to be
|
|
// different than the config currently in-use.
|
|
// TODO: Preferably, we'd be getting the config directly from the in-memory AM so adding the autogen config would not be necessary.
|
|
err := AddAutogenConfig(ctx, moa.logger, moa.configStore, org, &cfg.AlertmanagerConfig, true)
|
|
if err != nil {
|
|
return definitions.GettableUserConfig{}, err
|
|
}
|
|
}
|
|
return cfg, nil
|
|
}
|
|
|
|
// ActivateHistoricalConfiguration will set the current alertmanager configuration to a previous value based on the provided
|
|
// alert_configuration_history id.
|
|
func (moa *MultiOrgAlertmanager) ActivateHistoricalConfiguration(ctx context.Context, orgId int64, id int64) error {
|
|
config, err := moa.configStore.GetHistoricalConfiguration(ctx, orgId, id)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get historical alertmanager configuration: %w", err)
|
|
}
|
|
|
|
cfg, err := Load([]byte(config.AlertmanagerConfiguration))
|
|
if err != nil {
|
|
return fmt.Errorf("failed to unmarshal historical alertmanager configuration: %w", err)
|
|
}
|
|
|
|
am, err := moa.AlertmanagerFor(orgId)
|
|
if err != nil {
|
|
// It's okay if the alertmanager isn't ready yet, we're changing its config anyway.
|
|
if !errors.Is(err, ErrAlertmanagerNotReady) {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if err := am.SaveAndApplyConfig(ctx, cfg); err != nil {
|
|
moa.logger.Error("Unable to save and apply historical alertmanager configuration", "error", err, "org", orgId, "id", id)
|
|
return AlertmanagerConfigRejectedError{err}
|
|
}
|
|
moa.logger.Info("Applied historical alertmanager configuration", "org", orgId, "id", id)
|
|
|
|
return nil
|
|
}
|
|
|
|
// GetAppliedAlertmanagerConfigurations returns the last n configurations marked as applied for a given org.
|
|
func (moa *MultiOrgAlertmanager) GetAppliedAlertmanagerConfigurations(ctx context.Context, org int64, limit int) ([]*definitions.GettableHistoricUserConfig, error) {
|
|
configs, err := moa.configStore.GetAppliedConfigurations(ctx, org, limit)
|
|
if err != nil {
|
|
return []*definitions.GettableHistoricUserConfig{}, fmt.Errorf("failed to get applied configurations: %w", err)
|
|
}
|
|
|
|
gettableHistoricConfigs := make([]*definitions.GettableHistoricUserConfig, 0, len(configs))
|
|
for _, config := range configs {
|
|
appliedAt := strfmt.DateTime(time.Unix(config.LastApplied, 0).UTC())
|
|
gettableConfig, err := moa.gettableUserConfigFromAMConfigString(ctx, org, config.AlertmanagerConfiguration)
|
|
if err != nil {
|
|
// If there are invalid records, skip them and return the valid ones.
|
|
moa.logger.Warn("Invalid configuration found in alert configuration history table", "id", config.ID, "orgID", org)
|
|
continue
|
|
}
|
|
|
|
gettableHistoricConfig := definitions.GettableHistoricUserConfig{
|
|
ID: config.ID,
|
|
TemplateFiles: gettableConfig.TemplateFiles,
|
|
TemplateFileProvenances: gettableConfig.TemplateFileProvenances,
|
|
AlertmanagerConfig: gettableConfig.AlertmanagerConfig,
|
|
LastApplied: &appliedAt,
|
|
}
|
|
gettableHistoricConfigs = append(gettableHistoricConfigs, &gettableHistoricConfig)
|
|
}
|
|
|
|
return gettableHistoricConfigs, nil
|
|
}
|
|
|
|
func (moa *MultiOrgAlertmanager) gettableUserConfigFromAMConfigString(ctx context.Context, orgID int64, config string) (definitions.GettableUserConfig, error) {
|
|
cfg, err := Load([]byte(config))
|
|
if err != nil {
|
|
return definitions.GettableUserConfig{}, fmt.Errorf("failed to unmarshal alertmanager configuration: %w", err)
|
|
}
|
|
|
|
result := definitions.GettableUserConfig{
|
|
TemplateFiles: cfg.TemplateFiles,
|
|
AlertmanagerConfig: definitions.GettableApiAlertingConfig{
|
|
Config: cfg.AlertmanagerConfig.Config,
|
|
},
|
|
}
|
|
for _, recv := range cfg.AlertmanagerConfig.Receivers {
|
|
receivers := make([]*definitions.GettableGrafanaReceiver, 0, len(recv.PostableGrafanaReceivers.GrafanaManagedReceivers))
|
|
for _, pr := range recv.PostableGrafanaReceivers.GrafanaManagedReceivers {
|
|
secureFields := make(map[string]bool, len(pr.SecureSettings))
|
|
for k := range pr.SecureSettings {
|
|
decryptedValue, err := moa.Crypto.getDecryptedSecret(pr, k)
|
|
if err != nil {
|
|
return definitions.GettableUserConfig{}, fmt.Errorf("failed to decrypt stored secure setting: %w", err)
|
|
}
|
|
if decryptedValue == "" {
|
|
continue
|
|
}
|
|
secureFields[k] = true
|
|
}
|
|
gr := definitions.GettableGrafanaReceiver{
|
|
UID: pr.UID,
|
|
Name: pr.Name,
|
|
Type: pr.Type,
|
|
DisableResolveMessage: pr.DisableResolveMessage,
|
|
Settings: pr.Settings,
|
|
SecureFields: secureFields,
|
|
}
|
|
receivers = append(receivers, &gr)
|
|
}
|
|
gettableApiReceiver := definitions.GettableApiReceiver{
|
|
GettableGrafanaReceivers: definitions.GettableGrafanaReceivers{
|
|
GrafanaManagedReceivers: receivers,
|
|
},
|
|
}
|
|
gettableApiReceiver.Name = recv.Name
|
|
result.AlertmanagerConfig.Receivers = append(result.AlertmanagerConfig.Receivers, &gettableApiReceiver)
|
|
}
|
|
|
|
result, err = moa.mergeProvenance(ctx, result, orgID)
|
|
if err != nil {
|
|
return definitions.GettableUserConfig{}, err
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
func (moa *MultiOrgAlertmanager) SaveAndApplyAlertmanagerConfiguration(ctx context.Context, org int64, config definitions.PostableUserConfig) error {
|
|
// We cannot add this validation to PostableUserConfig as that struct is used for both
|
|
// Grafana Alertmanager (where inhibition rules are not supported) and External Alertmanagers
|
|
// (including Mimir) where inhibition rules are supported.
|
|
if len(config.AlertmanagerConfig.InhibitRules) > 0 {
|
|
return errors.New("inhibition rules are not supported")
|
|
}
|
|
|
|
// Get the last known working configuration
|
|
_, err := moa.configStore.GetLatestAlertmanagerConfiguration(ctx, org)
|
|
if err != nil {
|
|
// If we don't have a configuration there's nothing for us to know and we should just continue saving the new one
|
|
if !errors.Is(err, store.ErrNoAlertmanagerConfiguration) {
|
|
return fmt.Errorf("failed to get latest configuration %w", err)
|
|
}
|
|
}
|
|
|
|
if err := moa.Crypto.ProcessSecureSettings(ctx, org, config.AlertmanagerConfig.Receivers); err != nil {
|
|
return fmt.Errorf("failed to post process Alertmanager configuration: %w", err)
|
|
}
|
|
|
|
if err := assignReceiverConfigsUIDs(config.AlertmanagerConfig.Receivers); err != nil {
|
|
return fmt.Errorf("failed to assign missing uids: %w", err)
|
|
}
|
|
|
|
am, err := moa.AlertmanagerFor(org)
|
|
if err != nil {
|
|
// It's okay if the alertmanager isn't ready yet, we're changing its config anyway.
|
|
if !errors.Is(err, ErrAlertmanagerNotReady) {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if err := am.SaveAndApplyConfig(ctx, &config); err != nil {
|
|
moa.logger.Error("Unable to save and apply alertmanager configuration", "error", err)
|
|
errReceiverDoesNotExist := ErrorReceiverDoesNotExist{}
|
|
if errors.As(err, &errReceiverDoesNotExist) {
|
|
return ErrAlertmanagerReceiverInUse.Build(errutil.TemplateData{Public: map[string]interface{}{"Receiver": errReceiverDoesNotExist.Reference}, Error: err})
|
|
}
|
|
errTimeIntervalDoesNotExist := ErrorTimeIntervalDoesNotExist{}
|
|
if errors.As(err, &errTimeIntervalDoesNotExist) {
|
|
return ErrAlertmanagerTimeIntervalInUse.Build(errutil.TemplateData{Public: map[string]interface{}{"Interval": errTimeIntervalDoesNotExist.Reference}, Error: err})
|
|
}
|
|
return AlertmanagerConfigRejectedError{err}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// assignReceiverConfigsUIDs assigns missing UUIDs to receiver configs.
|
|
func assignReceiverConfigsUIDs(c []*definitions.PostableApiReceiver) error {
|
|
seenUIDs := make(map[string]struct{})
|
|
// encrypt secure settings for storing them in DB
|
|
for _, r := range c {
|
|
switch r.Type() {
|
|
case definitions.GrafanaReceiverType:
|
|
for _, gr := range r.PostableGrafanaReceivers.GrafanaManagedReceivers {
|
|
if gr.UID == "" {
|
|
retries := 5
|
|
for i := 0; i < retries; i++ {
|
|
gen := util.GenerateShortUID()
|
|
_, ok := seenUIDs[gen]
|
|
if !ok {
|
|
gr.UID = gen
|
|
break
|
|
}
|
|
}
|
|
if gr.UID == "" {
|
|
return fmt.Errorf("all %d attempts to generate UID for receiver have failed; please retry", retries)
|
|
}
|
|
}
|
|
seenUIDs[gr.UID] = struct{}{}
|
|
}
|
|
default:
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
type provisioningStore interface {
|
|
GetProvenance(ctx context.Context, o models.Provisionable, org int64) (models.Provenance, error)
|
|
GetProvenances(ctx context.Context, org int64, resourceType string) (map[string]models.Provenance, error)
|
|
SetProvenance(ctx context.Context, o models.Provisionable, org int64, p models.Provenance) error
|
|
DeleteProvenance(ctx context.Context, o models.Provisionable, org int64) error
|
|
}
|
|
|
|
func (moa *MultiOrgAlertmanager) mergeProvenance(ctx context.Context, config definitions.GettableUserConfig, org int64) (definitions.GettableUserConfig, error) {
|
|
if config.AlertmanagerConfig.Route != nil {
|
|
provenance, err := moa.ProvStore.GetProvenance(ctx, config.AlertmanagerConfig.Route, org)
|
|
if err != nil {
|
|
return definitions.GettableUserConfig{}, err
|
|
}
|
|
config.AlertmanagerConfig.Route.Provenance = definitions.Provenance(provenance)
|
|
}
|
|
|
|
cp := definitions.EmbeddedContactPoint{}
|
|
cpProvs, err := moa.ProvStore.GetProvenances(ctx, org, cp.ResourceType())
|
|
if err != nil {
|
|
return definitions.GettableUserConfig{}, err
|
|
}
|
|
for _, receiver := range config.AlertmanagerConfig.Receivers {
|
|
for _, contactPoint := range receiver.GrafanaManagedReceivers {
|
|
if provenance, exists := cpProvs[contactPoint.UID]; exists {
|
|
contactPoint.Provenance = definitions.Provenance(provenance)
|
|
}
|
|
}
|
|
}
|
|
|
|
tmpl := definitions.NotificationTemplate{}
|
|
tmplProvs, err := moa.ProvStore.GetProvenances(ctx, org, tmpl.ResourceType())
|
|
if err != nil {
|
|
return definitions.GettableUserConfig{}, nil
|
|
}
|
|
config.TemplateFileProvenances = make(map[string]definitions.Provenance, len(tmplProvs))
|
|
for key, provenance := range tmplProvs {
|
|
config.TemplateFileProvenances[key] = definitions.Provenance(provenance)
|
|
}
|
|
|
|
mt := definitions.MuteTimeInterval{}
|
|
mtProvs, err := moa.ProvStore.GetProvenances(ctx, org, mt.ResourceType())
|
|
if err != nil {
|
|
return definitions.GettableUserConfig{}, nil
|
|
}
|
|
config.AlertmanagerConfig.MuteTimeProvenances = make(map[string]definitions.Provenance, len(mtProvs))
|
|
for key, provenance := range mtProvs {
|
|
config.AlertmanagerConfig.MuteTimeProvenances[key] = definitions.Provenance(provenance)
|
|
}
|
|
|
|
return config, nil
|
|
}
|