Alerting: Support for simplified notification settings in rule API (#81011)

* Add notification settings to storage\domain and API models. Settings are a slice to workaround XORM mapping
* Support validation of notification settings when rules are updated

* Implement route generator for Alertmanager configuration. That fetches all notification settings.
* Update multi-tenant Alertmanager to run the generator before applying the configuration.

* Add notification settings labels to state calculation
* update the Multi-tenant Alertmanager to provide validation for notification settings

* update GET API so only admins can see auto-gen
This commit is contained in:
Yuri Tseretyan
2024-02-15 09:45:10 -05:00
committed by GitHub
parent ff916d9c15
commit 1eebd2a4de
60 changed files with 3466 additions and 304 deletions

View File

@@ -43,6 +43,7 @@ var silenceMaintenanceInterval = 15 * time.Minute
type AlertingStore interface {
store.AlertingStore
store.ImageStore
autogenRuleStore
}
type alertmanager struct {
@@ -57,6 +58,8 @@ type alertmanager struct {
decryptFn alertingNotify.GetDecryptedValueFn
orgID int64
withAutogen bool
}
// maintenanceOptions represent the options for components that need maintenance on a frequency within the Alertmanager.
@@ -86,7 +89,7 @@ func (m maintenanceOptions) MaintenanceFunc(state alertingNotify.State) (int64,
func NewAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store AlertingStore, kvStore kvstore.KVStore,
peer alertingNotify.ClusterPeer, decryptFn alertingNotify.GetDecryptedValueFn, ns notifications.Service,
m *metrics.Alertmanager) (*alertmanager, error) {
m *metrics.Alertmanager, withAutogen bool) (*alertmanager, error) {
workingPath := filepath.Join(cfg.DataPath, workingDir, strconv.Itoa(int(orgID)))
fileStore := NewFileStore(orgID, kvStore, workingPath)
@@ -144,6 +147,9 @@ func NewAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store A
decryptFn: decryptFn,
fileStore: fileStore,
logger: l,
// TODO: Preferably, logic around autogen would be outside of the specific alertmanager implementation so that remote alertmanager will get it for free.
withAutogen: withAutogen,
}
return am, nil
@@ -180,11 +186,17 @@ func (am *alertmanager) SaveAndApplyDefaultConfig(ctx context.Context) error {
}
err = am.Store.SaveAlertmanagerConfigurationWithCallback(ctx, cmd, func() error {
_, err := am.applyConfig(cfg)
if am.withAutogen {
err := AddAutogenConfig(ctx, am.logger, am.Store, am.orgID, &cfg.AlertmanagerConfig, true)
if err != nil {
return err
}
}
_, err = am.applyConfig(cfg)
return err
})
if err != nil {
outerErr = nil
outerErr = err
return
}
})
@@ -195,6 +207,9 @@ func (am *alertmanager) SaveAndApplyDefaultConfig(ctx context.Context) error {
// SaveAndApplyConfig saves the configuration the database and applies the configuration to the Alertmanager.
// It rollbacks the save if we fail to apply the configuration.
func (am *alertmanager) SaveAndApplyConfig(ctx context.Context, cfg *apimodels.PostableUserConfig) error {
// Remove autogenerated config from the user config before saving it, may not be necessary as we already remove
// the autogenerated config before provenance guard. However, this is low impact and a good safety net.
RemoveAutogenConfigIfExists(cfg.AlertmanagerConfig.Route)
rawConfig, err := json.Marshal(&cfg)
if err != nil {
return fmt.Errorf("failed to serialize to the Alertmanager configuration: %w", err)
@@ -210,7 +225,14 @@ func (am *alertmanager) SaveAndApplyConfig(ctx context.Context, cfg *apimodels.P
}
err = am.Store.SaveAlertmanagerConfigurationWithCallback(ctx, cmd, func() error {
_, err := am.applyConfig(cfg)
if am.withAutogen {
err := AddAutogenConfig(ctx, am.logger, am.Store, am.orgID, &cfg.AlertmanagerConfig, false)
if err != nil {
return err
}
}
_, err = am.applyConfig(cfg)
return err
})
if err != nil {
@@ -232,6 +254,17 @@ func (am *alertmanager) ApplyConfig(ctx context.Context, dbCfg *ngmodels.AlertCo
var outerErr error
am.Base.WithLock(func() {
if am.withAutogen {
err := AddAutogenConfig(ctx, am.logger, am.Store, am.orgID, &cfg.AlertmanagerConfig, true)
if err != nil {
outerErr = err
return
}
}
// Note: Adding the autogen config here causes alert_configuration_history to update last_applied more often.
// Since we will now update last_applied when autogen changes even if the user-created config remains the same.
// To fix this however, the local alertmanager needs to be able to tell the difference between user-created and
// autogen config, which may introduce cross-cutting complexity.
if err := am.applyAndMarkConfig(ctx, dbCfg.ConfigurationHash, cfg); err != nil {
outerErr = fmt.Errorf("unable to apply configuration: %w", err)
return

View File

@@ -8,6 +8,7 @@ import (
"github.com/go-openapi/strfmt"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/store"
@@ -34,13 +35,48 @@ type configurationStore interface {
GetLatestAlertmanagerConfiguration(ctx context.Context, orgID int64) (*models.AlertConfiguration, error)
}
func (moa *MultiOrgAlertmanager) GetAlertmanagerConfiguration(ctx context.Context, org int64) (definitions.GettableUserConfig, error) {
// ApplyConfig will apply the given alertmanager configuration for a given org.
// Can be used to force regeneration of autogenerated routes.
func (moa *MultiOrgAlertmanager) ApplyConfig(ctx context.Context, orgId int64, dbConfig *models.AlertConfiguration) error {
am, err := moa.AlertmanagerFor(orgId)
if err != nil {
// It's okay if the alertmanager isn't ready yet, we're changing its config anyway.
if !errors.Is(err, ErrAlertmanagerNotReady) {
return err
}
}
err = am.ApplyConfig(ctx, dbConfig)
if err != nil {
return fmt.Errorf("failed to apply configuration: %w", err)
}
return nil
}
// GetAlertmanagerConfiguration returns the latest alertmanager configuration for a given org.
// If withAutogen is true, the configuration will be augmented with autogenerated routes.
func (moa *MultiOrgAlertmanager) GetAlertmanagerConfiguration(ctx context.Context, org int64, withAutogen bool) (definitions.GettableUserConfig, error) {
amConfig, err := moa.configStore.GetLatestAlertmanagerConfiguration(ctx, org)
if err != nil {
return definitions.GettableUserConfig{}, fmt.Errorf("failed to get latest configuration: %w", err)
}
return moa.gettableUserConfigFromAMConfigString(ctx, org, amConfig.AlertmanagerConfiguration)
cfg, err := moa.gettableUserConfigFromAMConfigString(ctx, org, amConfig.AlertmanagerConfiguration)
if err != nil {
return definitions.GettableUserConfig{}, err
}
if moa.featureManager.IsEnabled(ctx, featuremgmt.FlagAlertingSimplifiedRouting) && withAutogen {
// We validate the notification settings in a similar way to when we POST.
// Otherwise, broken settings (e.g. a receiver that doesn't exist) will cause the config returned here to be
// different than the config currently in-use.
// TODO: Preferably, we'd be getting the config directly from the in-memory AM so adding the autogen config would not be necessary.
err := AddAutogenConfig(ctx, moa.logger, moa.configStore, org, &cfg.AlertmanagerConfig, true)
if err != nil {
return definitions.GettableUserConfig{}, err
}
}
return cfg, nil
}
// ActivateHistoricalConfiguration will set the current alertmanager configuration to a previous value based on the provided
@@ -108,6 +144,7 @@ func (moa *MultiOrgAlertmanager) gettableUserConfigFromAMConfigString(ctx contex
if err != nil {
return definitions.GettableUserConfig{}, fmt.Errorf("failed to unmarshal alertmanager configuration: %w", err)
}
result := definitions.GettableUserConfig{
TemplateFiles: cfg.TemplateFiles,
AlertmanagerConfig: definitions.GettableApiAlertingConfig{
@@ -155,7 +192,7 @@ func (moa *MultiOrgAlertmanager) gettableUserConfigFromAMConfigString(ctx contex
return result, nil
}
func (moa *MultiOrgAlertmanager) ApplyAlertmanagerConfiguration(ctx context.Context, org int64, config definitions.PostableUserConfig) error {
func (moa *MultiOrgAlertmanager) SaveAndApplyAlertmanagerConfiguration(ctx context.Context, org int64, config definitions.PostableUserConfig) error {
// We cannot add this validation to PostableUserConfig as that struct is used for both
// Grafana Alertmanager (where inhibition rules are not supported) and External Alertmanagers
// (including Mimir) where inhibition rules are supported.

View File

@@ -46,7 +46,7 @@ func setupAMTest(t *testing.T) *alertmanager {
kvStore := fakes.NewFakeKVStore(t)
secretsService := secretsManager.SetupTestService(t, database.ProvideSecretsStore(sqlStore))
decryptFn := secretsService.GetDecryptedValue
am, err := NewAlertmanager(context.Background(), 1, cfg, s, kvStore, &NilPeer{}, decryptFn, nil, m)
am, err := NewAlertmanager(context.Background(), 1, cfg, s, kvStore, &NilPeer{}, decryptFn, nil, m, false)
require.NoError(t, err)
return am
}

View File

@@ -0,0 +1,185 @@
package notifier
import (
"context"
"errors"
"fmt"
"slices"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/prometheus/alertmanager/pkg/labels"
"github.com/prometheus/common/model"
"golang.org/x/exp/maps"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/models"
)
type autogenRuleStore interface {
ListNotificationSettings(ctx context.Context, q models.ListNotificationSettingsQuery) (map[models.AlertRuleKey][]models.NotificationSettings, error)
}
// AddAutogenConfig creates the autogenerated configuration and adds it to the given apiAlertingConfig.
// If skipInvalid is true, then invalid notification settings are skipped, otherwise an error is returned.
func AddAutogenConfig[R receiver](ctx context.Context, logger log.Logger, store autogenRuleStore, orgId int64, cfg apiAlertingConfig[R], skipInvalid bool) error {
autogenRoute, err := newAutogeneratedRoute(ctx, logger, store, orgId, cfg, skipInvalid)
if err != nil {
return err
}
err = autogenRoute.addToRoute(cfg.GetRoute())
if err != nil {
return err
}
return nil
}
// newAutogeneratedRoute creates a new autogenerated route based on the notification settings for the given org.
// cfg is used to construct the settings validator and to ensure we create a dedicated route for each receiver.
// skipInvalid is used to skip invalid settings instead of returning an error.
func newAutogeneratedRoute[R receiver](ctx context.Context, logger log.Logger, store autogenRuleStore, orgId int64, cfg apiAlertingConfig[R], skipInvalid bool) (autogeneratedRoute, error) {
settings, err := store.ListNotificationSettings(ctx, models.ListNotificationSettingsQuery{OrgID: orgId})
if err != nil {
return autogeneratedRoute{}, fmt.Errorf("failed to list alert rules: %w", err)
}
notificationSettings := make(map[data.Fingerprint]models.NotificationSettings)
// Add a default notification setting for each contact point. This is to ensure that we always have a route for each
// contact point even if no rules are using it. This will prevent race conditions between AM sync and rule sync.
for _, receiver := range cfg.GetReceivers() {
setting := models.NewDefaultNotificationSettings(receiver.GetName())
fp := setting.Fingerprint()
notificationSettings[fp] = setting
}
validator := NewNotificationSettingsValidator(cfg)
for ruleKey, ruleSettings := range settings {
for _, setting := range ruleSettings {
// TODO we should register this errors and somehow present to the users or make sure the config is always valid.
if err = validator.Validate(setting); err != nil {
if skipInvalid {
logger.Error("Rule notification settings are invalid. Skipping", append(ruleKey.LogContext(), "error", err)...)
continue
}
return autogeneratedRoute{}, fmt.Errorf("invalid notification settings for rule %s: %w", ruleKey.UID, err)
}
fp := setting.Fingerprint()
// Keep only unique settings.
if _, ok := notificationSettings[fp]; ok {
continue
}
notificationSettings[fp] = setting
}
}
if len(notificationSettings) == 0 {
return autogeneratedRoute{}, nil
}
newAutogenRoute, err := generateRouteFromSettings(cfg.GetRoute().Receiver, notificationSettings)
if err != nil {
return autogeneratedRoute{}, fmt.Errorf("failed to create autogenerated route: %w", err)
}
return newAutogenRoute, nil
}
type autogeneratedRoute struct {
Route *definitions.Route
}
// generateRouteFromSettings generates a route and fingerprint for this route. The route is a tree of 3 layers:
// 1. with matcher by label models.AutogeneratedRouteLabel equals 'true'.
// 2. with matcher by receiver name.
// 3. with matcher by unique combination of optional settings. It is created only if there are optional settings.
func generateRouteFromSettings(defaultReceiver string, settings map[data.Fingerprint]models.NotificationSettings) (autogeneratedRoute, error) {
keys := maps.Keys(settings)
// sort keys to make sure that the hash we calculate using it is stable
slices.Sort(keys)
rootMatcher, err := labels.NewMatcher(labels.MatchEqual, models.AutogeneratedRouteLabel, "true")
if err != nil {
return autogeneratedRoute{}, err
}
autoGenRoot := &definitions.Route{
Receiver: defaultReceiver,
ObjectMatchers: definitions.ObjectMatchers{rootMatcher},
Continue: false, // We explicitly don't continue toward user-created routes if this matches.
}
receiverRoutes := make(map[string]*definitions.Route)
for _, fingerprint := range keys {
s := settings[fingerprint]
receiverRoute, ok := receiverRoutes[s.Receiver]
if !ok {
contactMatcher, err := labels.NewMatcher(labels.MatchEqual, models.AutogeneratedRouteReceiverNameLabel, s.Receiver)
if err != nil {
return autogeneratedRoute{}, err
}
receiverRoute = &definitions.Route{
Receiver: s.Receiver,
ObjectMatchers: definitions.ObjectMatchers{contactMatcher},
Continue: false,
// Since we'll have many rules from different folders using this policy, we ensure it has these necessary groupings.
GroupByStr: []string{models.FolderTitleLabel, model.AlertNameLabel},
}
receiverRoutes[s.Receiver] = receiverRoute
autoGenRoot.Routes = append(autoGenRoot.Routes, receiverRoute)
}
// Do not create hash specific route if all group settings such as mute timings, group_wait, group_interval, etc are default
if s.IsAllDefault() {
continue
}
settingMatcher, err := labels.NewMatcher(labels.MatchEqual, models.AutogeneratedRouteSettingsHashLabel, fingerprint.String())
if err != nil {
return autogeneratedRoute{}, err
}
receiverRoute.Routes = append(receiverRoute.Routes, &definitions.Route{
Receiver: s.Receiver,
ObjectMatchers: definitions.ObjectMatchers{settingMatcher},
Continue: false, // Only a single setting-specific route should match.
GroupByStr: s.GroupBy, // Note: in order to pass validation at least FolderTitleLabel and AlertNameLabel are always included.
MuteTimeIntervals: s.MuteTimeIntervals,
GroupWait: s.GroupWait,
GroupInterval: s.GroupInterval,
RepeatInterval: s.RepeatInterval,
})
}
return autogeneratedRoute{
Route: autoGenRoot,
}, nil
}
// addToRoute adds this autogenerated route to the given route as the first top-level route under the root.
func (ar *autogeneratedRoute) addToRoute(route *definitions.Route) error {
if route == nil {
return errors.New("route does not exist")
}
if ar == nil || ar.Route == nil {
return nil
}
// Combine autogenerated route with the user-created route.
ar.Route.Receiver = route.Receiver
// Remove existing autogenerated route if it exists.
RemoveAutogenConfigIfExists(route)
route.Routes = append([]*definitions.Route{ar.Route}, route.Routes...)
return nil
}
// RemoveAutogenConfigIfExists removes all top-level autogenerated routes from the provided route.
// If no autogenerated routes exist, this function does nothing.
func RemoveAutogenConfigIfExists(route *definitions.Route) {
route.Routes = slices.DeleteFunc(route.Routes, func(route *definitions.Route) bool {
return isAutogeneratedRoot(route)
})
}
// isAutogeneratedRoot returns true if the route is the root of an autogenerated route.
func isAutogeneratedRoot(route *definitions.Route) bool {
return len(route.ObjectMatchers) == 1 && route.ObjectMatchers[0].Name == models.AutogeneratedRouteLabel
}

View File

@@ -0,0 +1,238 @@
package notifier
import (
"context"
"testing"
"time"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/prometheus/alertmanager/config"
"github.com/prometheus/alertmanager/pkg/labels"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"github.com/grafana/grafana/pkg/infra/log/logtest"
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/util"
)
func TestAddAutogenConfig(t *testing.T) {
rootRoute := func() *definitions.Route {
return &definitions.Route{
Receiver: "default",
}
}
configGen := func(receivers []string, muteIntervals []string) *definitions.PostableApiAlertingConfig {
cfg := &definitions.PostableApiAlertingConfig{
Config: definitions.Config{
Route: rootRoute(),
},
}
for _, receiver := range receivers {
cfg.Receivers = append(cfg.Receivers, &definitions.PostableApiReceiver{
Receiver: config.Receiver{
Name: receiver,
},
})
}
for _, muteInterval := range muteIntervals {
cfg.MuteTimeIntervals = append(cfg.MuteTimeIntervals, config.MuteTimeInterval{
Name: muteInterval,
})
}
return cfg
}
withChildRoutes := func(route *definitions.Route, children ...*definitions.Route) *definitions.Route {
route.Routes = append(route.Routes, children...)
return route
}
matcher := func(key, val string) definitions.ObjectMatchers {
m, err := labels.NewMatcher(labels.MatchEqual, key, val)
require.NoError(t, err)
return definitions.ObjectMatchers{m}
}
basicContactRoute := func(receiver string) *definitions.Route {
return &definitions.Route{
Receiver: receiver,
ObjectMatchers: matcher(models.AutogeneratedRouteReceiverNameLabel, receiver),
GroupByStr: []string{models.FolderTitleLabel, model.AlertNameLabel},
}
}
testCases := []struct {
name string
existingConfig *definitions.PostableApiAlertingConfig
storeSettings []models.NotificationSettings
skipInvalid bool
expRoute *definitions.Route
expErrorContains string
}{
{
name: "no settings or receivers, no change",
existingConfig: configGen(nil, nil),
storeSettings: []models.NotificationSettings{},
expRoute: rootRoute(),
},
{
name: "no settings but some receivers, add default routes for receivers",
existingConfig: configGen([]string{"receiver1", "receiver2", "receiver3"}, nil),
storeSettings: []models.NotificationSettings{},
expRoute: withChildRoutes(rootRoute(), &definitions.Route{
Receiver: "default",
ObjectMatchers: matcher(models.AutogeneratedRouteLabel, "true"),
Routes: []*definitions.Route{
basicContactRoute("receiver1"),
basicContactRoute("receiver3"),
basicContactRoute("receiver2"),
},
}),
},
{
name: "settings with no custom options, add default routes only",
existingConfig: configGen([]string{"receiver1", "receiver2", "receiver3"}, nil),
storeSettings: []models.NotificationSettings{models.NewDefaultNotificationSettings("receiver1"), models.NewDefaultNotificationSettings("receiver2")},
expRoute: withChildRoutes(rootRoute(), &definitions.Route{
Receiver: "default",
ObjectMatchers: matcher(models.AutogeneratedRouteLabel, "true"),
Routes: []*definitions.Route{
basicContactRoute("receiver1"),
basicContactRoute("receiver3"),
basicContactRoute("receiver2"),
},
}),
},
{
name: "settings with custom options, add option-specific routes",
existingConfig: configGen([]string{"receiver1", "receiver2", "receiver3", "receiver4", "receiver5"}, []string{"maintenance"}),
storeSettings: []models.NotificationSettings{
models.CopyNotificationSettings(models.NewDefaultNotificationSettings("receiver1"), models.NSMuts.WithGroupInterval(util.Pointer(1*time.Minute))),
models.CopyNotificationSettings(models.NewDefaultNotificationSettings("receiver2"), models.NSMuts.WithGroupWait(util.Pointer(2*time.Minute))),
models.CopyNotificationSettings(models.NewDefaultNotificationSettings("receiver3"), models.NSMuts.WithRepeatInterval(util.Pointer(3*time.Minute))),
models.CopyNotificationSettings(models.NewDefaultNotificationSettings("receiver4"), models.NSMuts.WithGroupBy(model.AlertNameLabel, models.FolderTitleLabel, "custom")),
models.CopyNotificationSettings(models.NewDefaultNotificationSettings("receiver5"), models.NSMuts.WithMuteTimeIntervals("maintenance")),
{
Receiver: "receiver1",
GroupBy: []string{model.AlertNameLabel, models.FolderTitleLabel, "custom"},
GroupInterval: util.Pointer(model.Duration(1 * time.Minute)),
GroupWait: util.Pointer(model.Duration(2 * time.Minute)),
RepeatInterval: util.Pointer(model.Duration(3 * time.Minute)),
MuteTimeIntervals: []string{"maintenance"},
},
},
expRoute: withChildRoutes(rootRoute(), &definitions.Route{
Receiver: "default",
ObjectMatchers: matcher(models.AutogeneratedRouteLabel, "true"),
Routes: []*definitions.Route{
withChildRoutes(basicContactRoute("receiver5"), &definitions.Route{
Receiver: "receiver5",
ObjectMatchers: matcher(models.AutogeneratedRouteSettingsHashLabel, "030d6474aec0b553"),
MuteTimeIntervals: []string{"maintenance"},
}),
withChildRoutes(basicContactRoute("receiver1"), &definitions.Route{
Receiver: "receiver1",
ObjectMatchers: matcher(models.AutogeneratedRouteSettingsHashLabel, "dde34b8127e68f31"),
GroupInterval: util.Pointer(model.Duration(1 * time.Minute)),
}, &definitions.Route{
Receiver: "receiver1",
ObjectMatchers: matcher(models.AutogeneratedRouteSettingsHashLabel, "ed4038c5d6733607"),
GroupByStr: []string{model.AlertNameLabel, models.FolderTitleLabel, "custom"},
GroupInterval: util.Pointer(model.Duration(1 * time.Minute)),
GroupWait: util.Pointer(model.Duration(2 * time.Minute)),
RepeatInterval: util.Pointer(model.Duration(3 * time.Minute)),
MuteTimeIntervals: []string{"maintenance"},
}),
withChildRoutes(basicContactRoute("receiver2"), &definitions.Route{
Receiver: "receiver2",
ObjectMatchers: matcher(models.AutogeneratedRouteSettingsHashLabel, "27e1d1717c9ef621"),
GroupWait: util.Pointer(model.Duration(2 * time.Minute)),
}),
withChildRoutes(basicContactRoute("receiver4"), &definitions.Route{
Receiver: "receiver4",
ObjectMatchers: matcher(models.AutogeneratedRouteSettingsHashLabel, "5e5ab8d592b12e86"),
GroupByStr: []string{model.AlertNameLabel, models.FolderTitleLabel, "custom"},
}),
withChildRoutes(basicContactRoute("receiver3"), &definitions.Route{
Receiver: "receiver3",
ObjectMatchers: matcher(models.AutogeneratedRouteSettingsHashLabel, "9e282ef0193d830a"),
RepeatInterval: util.Pointer(model.Duration(3 * time.Minute)),
}),
},
}),
},
{
name: "when skipInvalid=true, invalid settings are skipped",
existingConfig: configGen([]string{"receiver1", "receiver2", "receiver3"}, nil),
storeSettings: []models.NotificationSettings{
models.NewDefaultNotificationSettings("receiverA"), // Doesn't exist.
models.CopyNotificationSettings(models.NewDefaultNotificationSettings("receiver1"), models.NSMuts.WithMuteTimeIntervals("maintenance")), // Doesn't exist.
models.CopyNotificationSettings(models.NewDefaultNotificationSettings("receiver2"), models.NSMuts.WithGroupWait(util.Pointer(-2*time.Minute))), // Negative.
},
skipInvalid: true,
expRoute: withChildRoutes(rootRoute(), &definitions.Route{
Receiver: "default",
ObjectMatchers: matcher(models.AutogeneratedRouteLabel, "true"),
Routes: []*definitions.Route{
basicContactRoute("receiver1"),
basicContactRoute("receiver3"),
basicContactRoute("receiver2"),
},
}),
},
{
name: "when skipInvalid=false, invalid receiver throws error",
existingConfig: configGen([]string{"receiver1", "receiver2", "receiver3"}, nil),
storeSettings: []models.NotificationSettings{models.NewDefaultNotificationSettings("receiverA")},
skipInvalid: false,
expErrorContains: "receiverA",
},
{
name: "when skipInvalid=false, invalid settings throws error",
existingConfig: configGen([]string{"receiver1", "receiver2", "receiver3"}, nil),
storeSettings: []models.NotificationSettings{models.CopyNotificationSettings(models.NewDefaultNotificationSettings("receiver1"), models.NSMuts.WithMuteTimeIntervals("maintenance"))},
skipInvalid: false,
expErrorContains: "maintenance",
},
{
name: "when skipInvalid=false, invalid settings throws error",
existingConfig: configGen([]string{"receiver1", "receiver2", "receiver3"}, nil),
storeSettings: []models.NotificationSettings{models.CopyNotificationSettings(models.NewDefaultNotificationSettings("receiver2"), models.NSMuts.WithGroupWait(util.Pointer(-2*time.Minute)))},
skipInvalid: false,
expErrorContains: "group wait",
},
}
for _, tt := range testCases {
t.Run(tt.name, func(t *testing.T) {
orgId := int64(1)
store := &fakeConfigStore{
notificationSettings: make(map[int64]map[models.AlertRuleKey][]models.NotificationSettings),
}
store.notificationSettings[orgId] = make(map[models.AlertRuleKey][]models.NotificationSettings)
for _, setting := range tt.storeSettings {
store.notificationSettings[orgId][models.AlertRuleKey{OrgID: orgId, UID: util.GenerateShortUID()}] = []models.NotificationSettings{setting}
}
err := AddAutogenConfig(context.Background(), &logtest.Fake{}, store, orgId, tt.existingConfig, tt.skipInvalid)
if tt.expErrorContains != "" {
require.Error(t, err)
require.ErrorContains(t, err, tt.expErrorContains)
return
} else {
require.NoError(t, err)
}
cOpt := []cmp.Option{
cmpopts.IgnoreUnexported(definitions.Route{}, labels.Matcher{}),
}
if !cmp.Equal(tt.expRoute, tt.existingConfig.Route, cOpt...) {
t.Errorf("Unexpected Route: %v", cmp.Diff(tt.expRoute, tt.existingConfig.Route, cOpt...))
}
})
}
}

View File

@@ -16,6 +16,7 @@ import (
"github.com/grafana/grafana/pkg/infra/kvstore"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/featuremgmt"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
"github.com/grafana/grafana/pkg/services/ngalert/models"
@@ -67,8 +68,9 @@ type MultiOrgAlertmanager struct {
alertmanagersMtx sync.RWMutex
alertmanagers map[int64]Alertmanager
settings *setting.Cfg
logger log.Logger
settings *setting.Cfg
featureManager featuremgmt.FeatureToggles
logger log.Logger
// clusterPeer represents the clustering peers of Alertmanagers between Grafana instances.
peer alertingNotify.ClusterPeer
@@ -95,24 +97,35 @@ func WithAlertmanagerOverride(f func(OrgAlertmanagerFactory) OrgAlertmanagerFact
}
}
func NewMultiOrgAlertmanager(cfg *setting.Cfg, configStore AlertingStore, orgStore store.OrgStore,
kvStore kvstore.KVStore, provStore provisioningStore, decryptFn alertingNotify.GetDecryptedValueFn,
m *metrics.MultiOrgAlertmanager, ns notifications.Service, l log.Logger, s secrets.Service, opts ...Option,
func NewMultiOrgAlertmanager(
cfg *setting.Cfg,
configStore AlertingStore,
orgStore store.OrgStore,
kvStore kvstore.KVStore,
provStore provisioningStore,
decryptFn alertingNotify.GetDecryptedValueFn,
m *metrics.MultiOrgAlertmanager,
ns notifications.Service,
l log.Logger,
s secrets.Service,
featureManager featuremgmt.FeatureToggles,
opts ...Option,
) (*MultiOrgAlertmanager, error) {
moa := &MultiOrgAlertmanager{
Crypto: NewCrypto(s, configStore, l),
ProvStore: provStore,
logger: l,
settings: cfg,
alertmanagers: map[int64]Alertmanager{},
configStore: configStore,
orgStore: orgStore,
kvStore: kvStore,
decryptFn: decryptFn,
metrics: m,
ns: ns,
peer: &NilPeer{},
logger: l,
settings: cfg,
featureManager: featureManager,
alertmanagers: map[int64]Alertmanager{},
configStore: configStore,
orgStore: orgStore,
kvStore: kvStore,
decryptFn: decryptFn,
metrics: m,
ns: ns,
peer: &NilPeer{},
}
if err := moa.setupClustering(cfg); err != nil {
@@ -122,7 +135,7 @@ func NewMultiOrgAlertmanager(cfg *setting.Cfg, configStore AlertingStore, orgSto
// Set up the default per tenant Alertmanager factory.
moa.factory = func(ctx context.Context, orgID int64) (Alertmanager, error) {
m := metrics.NewAlertmanagerMetrics(moa.metrics.GetOrCreateOrgRegistry(orgID))
return NewAlertmanager(ctx, orgID, moa.settings, moa.configStore, moa.kvStore, moa.peer, moa.decryptFn, moa.ns, m)
return NewAlertmanager(ctx, orgID, moa.settings, moa.configStore, moa.kvStore, moa.peer, moa.decryptFn, moa.ns, m, featureManager.IsEnabled(ctx, featuremgmt.FlagAlertingSimplifiedRouting))
}
for _, opt := range opts {

View File

@@ -9,6 +9,7 @@ import (
"time"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/notifier"
@@ -98,6 +99,7 @@ func TestMultiorgAlertmanager_RemoteSecondaryMode(t *testing.T) {
nil,
nopLogger,
secretsService,
&featuremgmt.FeatureManager{},
override,
)
require.NoError(t, err)

View File

@@ -15,6 +15,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/store"
@@ -45,7 +46,7 @@ func TestMultiOrgAlertmanager_SyncAlertmanagersForOrgs(t *testing.T) {
DisabledOrgs: map[int64]struct{}{5: {}},
}, // do not poll in tests.
}
mam, err := NewMultiOrgAlertmanager(cfg, configStore, orgStore, kvStore, provStore, decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService)
mam, err := NewMultiOrgAlertmanager(cfg, configStore, orgStore, kvStore, provStore, decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService, &featuremgmt.FeatureManager{})
require.NoError(t, err)
ctx := context.Background()
@@ -178,7 +179,7 @@ func TestMultiOrgAlertmanager_SyncAlertmanagersForOrgsWithFailures(t *testing.T)
DefaultConfiguration: setting.GetAlertmanagerDefaultConfiguration(),
}, // do not poll in tests.
}
mam, err := NewMultiOrgAlertmanager(cfg, configStore, orgStore, kvStore, provStore, decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService)
mam, err := NewMultiOrgAlertmanager(cfg, configStore, orgStore, kvStore, provStore, decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService, &featuremgmt.FeatureManager{})
require.NoError(t, err)
ctx := context.Background()
@@ -265,7 +266,7 @@ func TestMultiOrgAlertmanager_AlertmanagerFor(t *testing.T) {
decryptFn := secretsService.GetDecryptedValue
reg := prometheus.NewPedanticRegistry()
m := metrics.NewNGAlert(reg)
mam, err := NewMultiOrgAlertmanager(cfg, configStore, orgStore, kvStore, provStore, decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService)
mam, err := NewMultiOrgAlertmanager(cfg, configStore, orgStore, kvStore, provStore, decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService, &featuremgmt.FeatureManager{})
require.NoError(t, err)
ctx := context.Background()
@@ -317,7 +318,7 @@ func TestMultiOrgAlertmanager_ActivateHistoricalConfiguration(t *testing.T) {
decryptFn := secretsService.GetDecryptedValue
reg := prometheus.NewPedanticRegistry()
m := metrics.NewNGAlert(reg)
mam, err := NewMultiOrgAlertmanager(cfg, configStore, orgStore, kvStore, provStore, decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService)
mam, err := NewMultiOrgAlertmanager(cfg, configStore, orgStore, kvStore, provStore, decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService, &featuremgmt.FeatureManager{})
require.NoError(t, err)
ctx := context.Background()

View File

@@ -19,6 +19,35 @@ type fakeConfigStore struct {
// historicConfigs stores configs by orgID.
historicConfigs map[int64][]*models.HistoricAlertConfiguration
// notificationSettings stores notification settings by orgID.
notificationSettings map[int64]map[models.AlertRuleKey][]models.NotificationSettings
}
func (f *fakeConfigStore) ListNotificationSettings(ctx context.Context, q models.ListNotificationSettingsQuery) (map[models.AlertRuleKey][]models.NotificationSettings, error) {
settings, ok := f.notificationSettings[q.OrgID]
if !ok {
return nil, nil
}
if q.ReceiverName != "" {
filteredSettings := make(map[models.AlertRuleKey][]models.NotificationSettings)
for key, notificationSettings := range settings {
// Current semantics is that we only key entries where any of the settings match the receiver name.
var found bool
for _, setting := range notificationSettings {
if q.ReceiverName == setting.Receiver {
found = true
break
}
}
if found {
filteredSettings[key] = notificationSettings
}
}
return filteredSettings, nil
}
return settings, nil
}
// Saves the image or returns an error.
@@ -199,3 +228,10 @@ type fakeState struct {
func (fs *fakeState) MarshalBinary() ([]byte, error) {
return []byte(fs.data), nil
}
type NoValidation struct {
}
func (n NoValidation) Validate(_ models.NotificationSettings) error {
return nil
}

View File

@@ -0,0 +1,132 @@
package notifier
import (
"context"
"errors"
"fmt"
"sync"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/prometheus/alertmanager/config"
)
// NotificationSettingsValidator validates NotificationSettings against the current Alertmanager configuration
type NotificationSettingsValidator interface {
Validate(s models.NotificationSettings) error
}
// staticValidator is a NotificationSettingsValidator that uses static pre-fetched values for available receivers and mute timings.
type staticValidator struct {
availableReceivers map[string]struct{}
availableMuteTimings map[string]struct{}
}
// apiAlertingConfig contains the methods required to validate NotificationSettings and create autogen routes.
type apiAlertingConfig[R receiver] interface {
GetReceivers() []R
GetMuteTimeIntervals() []config.MuteTimeInterval
GetRoute() *definitions.Route
}
type receiver interface {
GetName() string
}
// NewNotificationSettingsValidator creates a new NotificationSettingsValidator from the given apiAlertingConfig.
func NewNotificationSettingsValidator[R receiver](am apiAlertingConfig[R]) NotificationSettingsValidator {
availableReceivers := make(map[string]struct{})
for _, receiver := range am.GetReceivers() {
availableReceivers[receiver.GetName()] = struct{}{}
}
availableMuteTimings := make(map[string]struct{})
for _, interval := range am.GetMuteTimeIntervals() {
availableMuteTimings[interval.Name] = struct{}{}
}
return staticValidator{
availableReceivers: availableReceivers,
availableMuteTimings: availableMuteTimings,
}
}
// Validate checks that models.NotificationSettings is valid and references existing receivers and mute timings.
func (n staticValidator) Validate(settings models.NotificationSettings) error {
if err := settings.Validate(); err != nil {
return err
}
var errs []error
if _, ok := n.availableReceivers[settings.Receiver]; !ok {
errs = append(errs, fmt.Errorf("receiver '%s' does not exist", settings.Receiver))
}
for _, interval := range settings.MuteTimeIntervals {
if _, ok := n.availableMuteTimings[interval]; !ok {
errs = append(errs, fmt.Errorf("mute time interval '%s' does not exist", interval))
}
}
return errors.Join(errs...)
}
// NotificationSettingsValidatorProvider provides a NotificationSettingsValidator for a given orgID.
type NotificationSettingsValidatorProvider interface {
Validator(ctx context.Context, orgID int64) (NotificationSettingsValidator, error)
}
// notificationSettingsValidationService provides a new NotificationSettingsValidator for a given orgID by loading the latest Alertmanager configuration.
type notificationSettingsValidationService struct {
store store.AlertingStore
}
func NewNotificationSettingsValidationService(store store.AlertingStore) NotificationSettingsValidatorProvider {
return &notificationSettingsValidationService{
store: store,
}
}
// Validator returns a NotificationSettingsValidator using the alertmanager configuration from the given orgID.
func (v *notificationSettingsValidationService) Validator(ctx context.Context, orgID int64) (NotificationSettingsValidator, error) {
rawCfg, err := v.store.GetLatestAlertmanagerConfiguration(ctx, orgID)
if err != nil {
return staticValidator{}, err
}
cfg, err := Load([]byte(rawCfg.AlertmanagerConfiguration))
if err != nil {
return staticValidator{}, err
}
log.New("ngalert.notifier.validator").FromContext(ctx).Debug("Create validator from Alertmanager configuration", "hash", rawCfg.ConfigurationHash)
return NewNotificationSettingsValidator(&cfg.AlertmanagerConfig), nil
}
type cachedNotificationSettingsValidationService struct {
srv NotificationSettingsValidatorProvider
mtx sync.Mutex
validators map[int64]NotificationSettingsValidator
}
func NewCachedNotificationSettingsValidationService(store store.AlertingStore) NotificationSettingsValidatorProvider {
return &cachedNotificationSettingsValidationService{
srv: NewNotificationSettingsValidationService(store),
mtx: sync.Mutex{},
validators: map[int64]NotificationSettingsValidator{},
}
}
// Validator returns a NotificationSettingsValidator using the alertmanager configuration from the given orgID.
func (v *cachedNotificationSettingsValidationService) Validator(ctx context.Context, orgID int64) (NotificationSettingsValidator, error) {
v.mtx.Lock()
defer v.mtx.Unlock()
result, ok := v.validators[orgID]
if !ok {
vd, err := v.srv.Validator(ctx, orgID)
if err != nil {
return nil, err
}
v.validators[orgID] = vd
result = vd
}
return result, nil
}