From 970cafa20f95ce1b674ffb7ca2ed0aae64cc90fc Mon Sep 17 00:00:00 2001 From: Yuri Tseretyan Date: Wed, 17 Jul 2024 10:53:54 -0400 Subject: [PATCH] Alerting: Time interval Delete API to check for usages in alert rules (#90500) * Check if a time interval is used in alert rules before deleting it * Add time interval to parameters of ListAlertRulesQuery and ListNotificationSettings of DbStore == Refacorings == * refactor isMuteTimeInUse to accept a single route * update getMuteTiming to not return err * update delete to get the mute timing from config first --- .../ngalert/api/api_provisioning_test.go | 2 +- pkg/services/ngalert/models/alert_rule.go | 3 +- pkg/services/ngalert/models/notifications.go | 5 +- pkg/services/ngalert/ngalert.go | 2 +- pkg/services/ngalert/provisioning/errors.go | 22 ++- .../ngalert/provisioning/mute_timings.go | 122 ++++++++------- .../ngalert/provisioning/mute_timings_test.go | 66 ++++---- pkg/services/ngalert/provisioning/testing.go | 37 +++++ pkg/services/ngalert/store/alert_rule.go | 56 +++++-- pkg/services/ngalert/store/alert_rule_test.go | 148 ++++++++++++++++-- pkg/services/provisioning/provisioning.go | 2 +- pkg/tests/api/alerting/testing.go | 11 ++ .../test-data/notification-settings.json | 47 ++++++ .../timeinterval/test-data/rulegroup-1.json | 37 +++++ .../timeinterval/timeinterval_test.go | 62 ++++++++ 15 files changed, 492 insertions(+), 130 deletions(-) create mode 100644 pkg/tests/apis/alerting/notifications/timeinterval/test-data/notification-settings.json create mode 100644 pkg/tests/apis/alerting/notifications/timeinterval/test-data/rulegroup-1.json diff --git a/pkg/services/ngalert/api/api_provisioning_test.go b/pkg/services/ngalert/api/api_provisioning_test.go index 527561f077b..9a05f2bd1ca 100644 --- a/pkg/services/ngalert/api/api_provisioning_test.go +++ b/pkg/services/ngalert/api/api_provisioning_test.go @@ -1891,7 +1891,7 @@ func createProvisioningSrvSutFromEnv(t *testing.T, env *testEnvironment) Provisi policies: newFakeNotificationPolicyService(), contactPointService: provisioning.NewContactPointService(env.configs, env.secrets, env.prov, env.xact, receiverSvc, env.log, env.store), templates: provisioning.NewTemplateService(env.configs, env.prov, env.xact, env.log), - muteTimings: provisioning.NewMuteTimingService(env.configs, env.prov, env.xact, env.log), + muteTimings: provisioning.NewMuteTimingService(env.configs, env.prov, env.xact, env.log, env.store), alertRules: provisioning.NewAlertRuleService(env.store, env.prov, env.folderService, env.quotas, env.xact, 60, 10, 100, env.log, &provisioning.NotificationSettingsValidatorProviderFake{}, env.rulesAuthz), folderSvc: env.folderService, featureManager: env.features, diff --git a/pkg/services/ngalert/models/alert_rule.go b/pkg/services/ngalert/models/alert_rule.go index 198153b711d..0c2590b7883 100644 --- a/pkg/services/ngalert/models/alert_rule.go +++ b/pkg/services/ngalert/models/alert_rule.go @@ -670,7 +670,8 @@ type ListAlertRulesQuery struct { DashboardUID string PanelID int64 - ReceiverName string + ReceiverName string + TimeIntervalName string } // CountAlertRulesQuery is the query for counting alert rules diff --git a/pkg/services/ngalert/models/notifications.go b/pkg/services/ngalert/models/notifications.go index 895a70da0a6..602d21acfa7 100644 --- a/pkg/services/ngalert/models/notifications.go +++ b/pkg/services/ngalert/models/notifications.go @@ -18,8 +18,9 @@ const GroupByAll = "..." var DefaultNotificationSettingsGroupBy = []string{FolderTitleLabel, model.AlertNameLabel} type ListNotificationSettingsQuery struct { - OrgID int64 - ReceiverName string + OrgID int64 + ReceiverName string + TimeIntervalName string } // NotificationSettings represents the settings for sending notifications for a single AlertRule. It is used to diff --git a/pkg/services/ngalert/ngalert.go b/pkg/services/ngalert/ngalert.go index 5fef66c558b..4242573c829 100644 --- a/pkg/services/ngalert/ngalert.go +++ b/pkg/services/ngalert/ngalert.go @@ -412,7 +412,7 @@ func (ng *AlertNG) init() error { policyService := provisioning.NewNotificationPolicyService(ng.store, ng.store, ng.store, ng.Cfg.UnifiedAlerting, ng.Log) contactPointService := provisioning.NewContactPointService(ng.store, ng.SecretsService, ng.store, ng.store, receiverService, ng.Log, ng.store) templateService := provisioning.NewTemplateService(ng.store, ng.store, ng.store, ng.Log) - muteTimingService := provisioning.NewMuteTimingService(ng.store, ng.store, ng.store, ng.Log) + muteTimingService := provisioning.NewMuteTimingService(ng.store, ng.store, ng.store, ng.Log, ng.store) alertRuleService := provisioning.NewAlertRuleService(ng.store, ng.store, ng.folderService, ng.QuotaService, ng.store, int64(ng.Cfg.UnifiedAlerting.DefaultRuleEvaluationInterval.Seconds()), int64(ng.Cfg.UnifiedAlerting.BaseInterval.Seconds()), diff --git a/pkg/services/ngalert/provisioning/errors.go b/pkg/services/ngalert/provisioning/errors.go index 3588e010712..39f762bfe22 100644 --- a/pkg/services/ngalert/provisioning/errors.go +++ b/pkg/services/ngalert/provisioning/errors.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/grafana/grafana/pkg/apimachinery/errutil" + "github.com/grafana/grafana/pkg/services/ngalert/models" ) var ErrValidation = fmt.Errorf("invalid object specification") @@ -20,7 +21,7 @@ var ( ErrTimeIntervalNotFound = errutil.NotFound("alerting.notifications.time-intervals.notFound") ErrTimeIntervalExists = errutil.BadRequest("alerting.notifications.time-intervals.nameExists", errutil.WithPublicMessage("Time interval with this name already exists. Use a different name or update existing one.")) ErrTimeIntervalInvalid = errutil.BadRequest("alerting.notifications.time-intervals.invalidFormat").MustTemplate("Invalid format of the submitted time interval", errutil.WithPublic("Time interval is in invalid format. Correct the payload and try again.")) - ErrTimeIntervalInUse = errutil.Conflict("alerting.notifications.time-intervals.used", errutil.WithPublicMessage("Time interval is used by one or many notification policies")) + ErrTimeIntervalInUse = errutil.Conflict("alerting.notifications.time-intervals.used").MustTemplate("Time interval is used") ErrContactPointReferenced = errutil.Conflict("alerting.notifications.contact-points.referenced", errutil.WithPublicMessage("Contact point is currently referenced by a notification policy.")) ErrContactPointUsedInRule = errutil.Conflict("alerting.notifications.contact-points.used-by-rule", errutil.WithPublicMessage("Contact point is currently used in the notification settings of one or many alert rules.")) @@ -47,3 +48,22 @@ func MakeErrTimeIntervalInvalid(err error) error { return ErrTimeIntervalInvalid.Build(data) } + +func MakeErrTimeIntervalInUse(usedByRoutes bool, rules []models.AlertRuleKey) error { + uids := make([]string, 0, len(rules)) + for _, key := range rules { + uids = append(uids, key.UID) + } + data := make(map[string]any, 2) + if len(uids) > 0 { + data["UsedByRules"] = uids + } + if usedByRoutes { + data["UsedByRoutes"] = true + } + + return ErrTimeIntervalInUse.Build(errutil.TemplateData{ + Public: data, + Error: nil, + }) +} diff --git a/pkg/services/ngalert/provisioning/mute_timings.go b/pkg/services/ngalert/provisioning/mute_timings.go index f4366e5541a..5bc0b9692c3 100644 --- a/pkg/services/ngalert/provisioning/mute_timings.go +++ b/pkg/services/ngalert/provisioning/mute_timings.go @@ -5,10 +5,12 @@ import ( "encoding/binary" "fmt" "hash/fnv" + "slices" "unsafe" "github.com/prometheus/alertmanager/config" "github.com/prometheus/alertmanager/timeinterval" + "golang.org/x/exp/maps" "github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" @@ -17,20 +19,22 @@ import ( ) type MuteTimingService struct { - configStore alertmanagerConfigStore - provenanceStore ProvisioningStore - xact TransactionManager - log log.Logger - validator validation.ProvenanceStatusTransitionValidator + configStore alertmanagerConfigStore + provenanceStore ProvisioningStore + xact TransactionManager + log log.Logger + validator validation.ProvenanceStatusTransitionValidator + ruleNotificationsStore AlertRuleNotificationSettingsStore } -func NewMuteTimingService(config AMConfigStore, prov ProvisioningStore, xact TransactionManager, log log.Logger) *MuteTimingService { +func NewMuteTimingService(config AMConfigStore, prov ProvisioningStore, xact TransactionManager, log log.Logger, ns AlertRuleNotificationSettingsStore) *MuteTimingService { return &MuteTimingService{ - configStore: &alertmanagerConfigStoreImpl{store: config}, - provenanceStore: prov, - xact: xact, - log: log, - validator: validation.ValidateProvenanceRelaxed, + configStore: &alertmanagerConfigStoreImpl{store: config}, + provenanceStore: prov, + xact: xact, + log: log, + validator: validation.ValidateProvenanceRelaxed, + ruleNotificationsStore: ns, } } @@ -69,9 +73,9 @@ func (svc *MuteTimingService) GetMuteTiming(ctx context.Context, name string, or return definitions.MuteTimeInterval{}, err } - mt, _, err := getMuteTiming(rev, name) - if err != nil { - return definitions.MuteTimeInterval{}, err + mt, idx := getMuteTiming(rev, name) + if idx == -1 { + return definitions.MuteTimeInterval{}, ErrTimeIntervalNotFound.Errorf("") } result := definitions.MuteTimeInterval{ @@ -98,13 +102,9 @@ func (svc *MuteTimingService) CreateMuteTiming(ctx context.Context, mt definitio return definitions.MuteTimeInterval{}, err } - if revision.cfg.AlertmanagerConfig.MuteTimeIntervals == nil { - revision.cfg.AlertmanagerConfig.MuteTimeIntervals = []config.MuteTimeInterval{} - } - for _, existing := range revision.cfg.AlertmanagerConfig.MuteTimeIntervals { - if mt.Name == existing.Name { - return definitions.MuteTimeInterval{}, ErrTimeIntervalExists.Errorf("") - } + _, idx := getMuteTiming(revision, mt.Name) + if idx != -1 { + return definitions.MuteTimeInterval{}, ErrTimeIntervalExists.Errorf("") } revision.cfg.AlertmanagerConfig.MuteTimeIntervals = append(revision.cfg.AlertmanagerConfig.MuteTimeIntervals, mt.MuteTimeInterval) @@ -148,9 +148,9 @@ func (svc *MuteTimingService) UpdateMuteTiming(ctx context.Context, mt definitio return definitions.MuteTimeInterval{}, nil } - old, idx, err := getMuteTiming(revision, mt.Name) - if err != nil { - return definitions.MuteTimeInterval{}, err + old, idx := getMuteTiming(revision, mt.Name) + if idx == -1 { + return definitions.MuteTimeInterval{}, ErrTimeIntervalNotFound.Errorf("") } err = svc.checkOptimisticConcurrency(old, models.Provenance(mt.Provenance), mt.Version, "update") @@ -179,7 +179,18 @@ func (svc *MuteTimingService) UpdateMuteTiming(ctx context.Context, mt definitio // DeleteMuteTiming deletes the mute timing with the given name in the given org. If the mute timing does not exist, no error is returned. func (svc *MuteTimingService) DeleteMuteTiming(ctx context.Context, name string, orgID int64, provenance definitions.Provenance, version string) error { - target := definitions.MuteTimeInterval{MuteTimeInterval: config.MuteTimeInterval{Name: name}, Provenance: provenance} + revision, err := svc.configStore.Get(ctx, orgID) + if err != nil { + return err + } + + existing, idx := getMuteTiming(revision, name) + if idx == -1 { + svc.log.FromContext(ctx).Debug("Time interval was not found. Skip deleting", "name", name) + return nil + } + + target := definitions.MuteTimeInterval{MuteTimeInterval: existing, Provenance: provenance} // check that provenance is not changed in an invalid way storedProvenance, err := svc.provenanceStore.GetProvenance(ctx, &target, orgID) if err != nil { @@ -189,30 +200,27 @@ func (svc *MuteTimingService) DeleteMuteTiming(ctx context.Context, name string, return err } - revision, err := svc.configStore.Get(ctx, orgID) + if isMuteTimeInUseInRoutes(name, revision.cfg.AlertmanagerConfig.Route) { + ns, _ := svc.ruleNotificationsStore.ListNotificationSettings(ctx, models.ListNotificationSettingsQuery{OrgID: orgID, TimeIntervalName: existing.Name}) + // ignore error here because it's not important + return MakeErrTimeIntervalInUse(true, maps.Keys(ns)) + } + + err = svc.checkOptimisticConcurrency(existing, models.Provenance(provenance), version, "delete") if err != nil { return err } + revision.cfg.AlertmanagerConfig.MuteTimeIntervals = slices.Delete(revision.cfg.AlertmanagerConfig.MuteTimeIntervals, idx, idx+1) - if revision.cfg.AlertmanagerConfig.MuteTimeIntervals == nil { - return nil - } - if isMuteTimeInUse(name, []*definitions.Route{revision.cfg.AlertmanagerConfig.Route}) { - return ErrTimeIntervalInUse.Errorf("") - } - for i, existing := range revision.cfg.AlertmanagerConfig.MuteTimeIntervals { - if name != existing.Name { - continue - } - err = svc.checkOptimisticConcurrency(existing, models.Provenance(provenance), version, "delete") + return svc.xact.InTransaction(ctx, func(ctx context.Context) error { + keys, err := svc.ruleNotificationsStore.ListNotificationSettings(ctx, models.ListNotificationSettingsQuery{OrgID: orgID, TimeIntervalName: existing.Name}) if err != nil { return err } - intervals := revision.cfg.AlertmanagerConfig.MuteTimeIntervals - revision.cfg.AlertmanagerConfig.MuteTimeIntervals = append(intervals[:i], intervals[i+1:]...) - } + if len(keys) > 0 { + return MakeErrTimeIntervalInUse(false, maps.Keys(keys)) + } - return svc.xact.InTransaction(ctx, func(ctx context.Context) error { if err := svc.configStore.Save(ctx, revision, orgID); err != nil { return err } @@ -220,33 +228,29 @@ func (svc *MuteTimingService) DeleteMuteTiming(ctx context.Context, name string, }) } -func isMuteTimeInUse(name string, routes []*definitions.Route) bool { - if len(routes) == 0 { +func isMuteTimeInUseInRoutes(name string, route *definitions.Route) bool { + if route == nil { return false } - for _, route := range routes { - for _, mtName := range route.MuteTimeIntervals { - if mtName == name { - return true - } - } - if isMuteTimeInUse(name, route.Routes) { + if slices.Contains(route.MuteTimeIntervals, name) { + return true + } + for _, route := range route.Routes { + if isMuteTimeInUseInRoutes(name, route) { return true } } return false } -func getMuteTiming(rev *cfgRevision, name string) (config.MuteTimeInterval, int, error) { - if rev.cfg.AlertmanagerConfig.MuteTimeIntervals == nil { - return config.MuteTimeInterval{}, -1, ErrTimeIntervalNotFound.Errorf("") +func getMuteTiming(rev *cfgRevision, name string) (config.MuteTimeInterval, int) { + idx := slices.IndexFunc(rev.cfg.AlertmanagerConfig.MuteTimeIntervals, func(interval config.MuteTimeInterval) bool { + return interval.Name == name + }) + if idx == -1 { + return config.MuteTimeInterval{}, idx } - for idx, mt := range rev.cfg.AlertmanagerConfig.MuteTimeIntervals { - if mt.Name == name { - return mt, idx, nil - } - } - return config.MuteTimeInterval{}, -1, ErrTimeIntervalNotFound.Errorf("") + return rev.cfg.AlertmanagerConfig.MuteTimeIntervals[idx], idx } func calculateMuteTimeIntervalFingerprint(interval config.MuteTimeInterval) string { diff --git a/pkg/services/ngalert/provisioning/mute_timings_test.go b/pkg/services/ngalert/provisioning/mute_timings_test.go index f79366bafef..5168fcd3189 100644 --- a/pkg/services/ngalert/provisioning/mute_timings_test.go +++ b/pkg/services/ngalert/provisioning/mute_timings_test.go @@ -9,6 +9,7 @@ import ( "github.com/prometheus/alertmanager/config" "github.com/prometheus/alertmanager/timeinterval" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" @@ -635,31 +636,7 @@ func TestDeleteMuteTimings(t *testing.T) { } } - t.Run("re-saves config and deletes provenance if mute timing does not exist", func(t *testing.T) { - sut, store, prov := createMuteTimingSvcSut() - store.GetFn = func(ctx context.Context, orgID int64) (*cfgRevision, error) { - return &cfgRevision{cfg: initialConfig()}, nil - } - prov.EXPECT().GetProvenance(mock.Anything, mock.Anything, mock.Anything).Return(models.ProvenanceAPI, nil) - prov.EXPECT().DeleteProvenance(mock.Anything, mock.Anything, mock.Anything).Return(nil) - - err := sut.DeleteMuteTiming(context.Background(), "no-timing", orgID, definitions.Provenance(models.ProvenanceAPI), "") - require.NoError(t, err) - - require.Len(t, store.Calls, 2) - require.Equal(t, "Get", store.Calls[0].Method) - require.Equal(t, orgID, store.Calls[0].Args[1]) - - require.Equal(t, "Save", store.Calls[1].Method) - require.Equal(t, orgID, store.Calls[1].Args[2]) - revision := store.Calls[1].Args[1].(*cfgRevision) - - require.EqualValues(t, initialConfig().AlertmanagerConfig.MuteTimeIntervals, revision.cfg.AlertmanagerConfig.MuteTimeIntervals) - - prov.AssertCalled(t, "DeleteProvenance", mock.Anything, &definitions.MuteTimeInterval{MuteTimeInterval: config.MuteTimeInterval{Name: "no-timing"}, Provenance: definitions.Provenance(models.ProvenanceAPI)}, orgID) - }) - - t.Run("fails if it was created with different provenance", func(t *testing.T) { + t.Run("fails if provenance check fails", func(t *testing.T) { sut, store, prov := createMuteTimingSvcSut() expectedErr := errors.New("test") sut.validator = func(from, to models.Provenance) error { @@ -669,15 +646,12 @@ func TestDeleteMuteTimings(t *testing.T) { return &cfgRevision{cfg: initialConfig()}, nil } prov.EXPECT().GetProvenance(mock.Anything, mock.Anything, mock.Anything).Return(models.ProvenanceAPI, nil) - prov.EXPECT().DeleteProvenance(mock.Anything, mock.Anything, mock.Anything).Return(nil) - err := sut.DeleteMuteTiming(context.Background(), "no-timing", orgID, definitions.Provenance(models.ProvenanceNone), correctVersion) + err := sut.DeleteMuteTiming(context.Background(), timingToDelete.Name, orgID, definitions.Provenance(models.ProvenanceNone), correctVersion) require.ErrorIs(t, err, expectedErr) - - require.Len(t, store.Calls, 0) }) - t.Run("returns ErrTimeIntervalInUse if mute timing is used", func(t *testing.T) { + t.Run("returns ErrTimeIntervalInUse if mute timing is used by a route", func(t *testing.T) { sut, store, prov := createMuteTimingSvcSut() store.GetFn = func(ctx context.Context, orgID int64) (*cfgRevision, error) { return &cfgRevision{cfg: initialConfig()}, nil @@ -689,7 +663,36 @@ func TestDeleteMuteTimings(t *testing.T) { require.Len(t, store.Calls, 1) require.Equal(t, "Get", store.Calls[0].Method) require.Equal(t, orgID, store.Calls[0].Args[1]) - require.Truef(t, ErrTimeIntervalInUse.Is(err), "expected ErrTimeIntervalInUse but got %s", err) + require.ErrorIs(t, err, ErrTimeIntervalInUse) + }) + + t.Run("returns ErrTimeIntervalInUse if mute timing is used by rules", func(t *testing.T) { + sut, store, prov := createMuteTimingSvcSut() + ruleNsStore := fakeAlertRuleNotificationStore{ + ListNotificationSettingsFn: func(ctx context.Context, q models.ListNotificationSettingsQuery) (map[models.AlertRuleKey][]models.NotificationSettings, error) { + assertInTransaction(t, ctx) + assert.Equal(t, orgID, q.OrgID) + assert.Equal(t, timingToDelete.Name, q.TimeIntervalName) + assert.Empty(t, q.ReceiverName) + return map[models.AlertRuleKey][]models.NotificationSettings{ + models.GenerateRuleKey(orgID): nil, + }, nil + }, + } + sut.ruleNotificationsStore = &ruleNsStore + store.GetFn = func(ctx context.Context, orgID int64) (*cfgRevision, error) { + return &cfgRevision{cfg: initialConfig()}, nil + } + prov.EXPECT().GetProvenance(mock.Anything, mock.Anything, mock.Anything).Return(models.ProvenanceAPI, nil) + + err := sut.DeleteMuteTiming(context.Background(), timingToDelete.Name, orgID, definitions.Provenance(models.ProvenanceAPI), correctVersion) + + require.Len(t, store.Calls, 1) + require.Equal(t, "Get", store.Calls[0].Method) + require.Equal(t, orgID, store.Calls[0].Args[1]) + require.ErrorIs(t, err, ErrTimeIntervalInUse) + require.Len(t, ruleNsStore.Calls, 1) + require.Equal(t, "ListNotificationSettings", ruleNsStore.Calls[0].Method) }) t.Run("returns ErrVersionConflict if provided version does not match", func(t *testing.T) { @@ -828,5 +831,6 @@ func createMuteTimingSvcSut() (*MuteTimingService, *alertmanagerConfigStoreFake, validator: func(from, to models.Provenance) error { return nil }, + ruleNotificationsStore: &fakeAlertRuleNotificationStore{}, }, store, prov } diff --git a/pkg/services/ngalert/provisioning/testing.go b/pkg/services/ngalert/provisioning/testing.go index 13422e79b50..58d305b0cc3 100644 --- a/pkg/services/ngalert/provisioning/testing.go +++ b/pkg/services/ngalert/provisioning/testing.go @@ -217,3 +217,40 @@ func (s *fakeRuleAccessControlService) CanWriteAllRules(ctx context.Context, use } return false, nil } + +type fakeAlertRuleNotificationStore struct { + Calls []call + + RenameReceiverInNotificationSettingsFn func(ctx context.Context, orgID int64, oldReceiver, newReceiver string) (int, error) + ListNotificationSettingsFn func(ctx context.Context, q models.ListNotificationSettingsQuery) (map[models.AlertRuleKey][]models.NotificationSettings, error) +} + +func (f *fakeAlertRuleNotificationStore) RenameReceiverInNotificationSettings(ctx context.Context, orgID int64, oldReceiver, newReceiver string) (int, error) { + call := call{ + Method: "RenameReceiverInNotificationSettings", + Args: []interface{}{ctx, orgID, oldReceiver, newReceiver}, + } + f.Calls = append(f.Calls, call) + + if f.RenameReceiverInNotificationSettingsFn != nil { + return f.RenameReceiverInNotificationSettingsFn(ctx, orgID, oldReceiver, newReceiver) + } + + // Default values when no function hook is provided + return 0, nil +} + +func (f *fakeAlertRuleNotificationStore) ListNotificationSettings(ctx context.Context, q models.ListNotificationSettingsQuery) (map[models.AlertRuleKey][]models.NotificationSettings, error) { + call := call{ + Method: "ListNotificationSettings", + Args: []interface{}{ctx, q}, + } + f.Calls = append(f.Calls, call) + + if f.ListNotificationSettingsFn != nil { + return f.ListNotificationSettingsFn(ctx, q) + } + + // Default values when no function hook is provided + return nil, nil +} diff --git a/pkg/services/ngalert/store/alert_rule.go b/pkg/services/ngalert/store/alert_rule.go index d86a149c6e5..6198f2f5e77 100644 --- a/pkg/services/ngalert/store/alert_rule.go +++ b/pkg/services/ngalert/store/alert_rule.go @@ -399,7 +399,14 @@ func (st DBstore) ListAlertRules(ctx context.Context, query *ngmodels.ListAlertR } if query.ReceiverName != "" { - q, err = st.filterByReceiverName(query.ReceiverName, q) + q, err = st.filterByContentInNotificationSettings(query.ReceiverName, q) + if err != nil { + return err + } + } + + if query.TimeIntervalName != "" { + q, err = st.filterByContentInNotificationSettings(query.TimeIntervalName, q) if err != nil { return err } @@ -432,6 +439,13 @@ func (st DBstore) ListAlertRules(ctx context.Context, query *ngmodels.ListAlertR continue } } + if query.TimeIntervalName != "" { + if !slices.ContainsFunc(rule.NotificationSettings, func(settings ngmodels.NotificationSettings) bool { + return slices.Contains(settings.MuteTimeIntervals, query.TimeIntervalName) + }) { + continue + } + } // MySQL (and potentially other databases) can use case-insensitive comparison. // This code makes sure we return groups that only exactly match the filter. if groupsMap != nil { @@ -720,13 +734,24 @@ func (st DBstore) ListNotificationSettings(ctx context.Context, q ngmodels.ListN var rules []ngmodels.AlertRule err := st.SQLStore.WithDbSession(ctx, func(sess *db.Session) error { query := sess.Table(ngmodels.AlertRule{}).Select("uid, notification_settings").Where("org_id = ?", q.OrgID) + hasFilter := false if q.ReceiverName != "" { var err error - query, err = st.filterByReceiverName(q.ReceiverName, query) + query, err = st.filterByContentInNotificationSettings(q.ReceiverName, query) if err != nil { return err } - } else { + hasFilter = true + } + if q.TimeIntervalName != "" { + var err error + query, err = st.filterByContentInNotificationSettings(q.TimeIntervalName, query) + if err != nil { + return err + } + hasFilter = true + } + if !hasFilter { query = query.And("notification_settings IS NOT NULL AND notification_settings <> 'null'") } return query.Find(&rules) @@ -736,16 +761,15 @@ func (st DBstore) ListNotificationSettings(ctx context.Context, q ngmodels.ListN } result := make(map[ngmodels.AlertRuleKey][]ngmodels.NotificationSettings, len(rules)) for _, rule := range rules { - var ns []ngmodels.NotificationSettings - if q.ReceiverName != "" { // if filter by receiver name is specified, perform fine filtering on client to avoid false-positives - for _, setting := range rule.NotificationSettings { - if q.ReceiverName == setting.Receiver { // currently, there can be only one setting. If in future there are more, we will return all settings of a rule that has a setting with receiver - ns = rule.NotificationSettings - break - } + ns := make([]ngmodels.NotificationSettings, 0, len(rule.NotificationSettings)) + for _, setting := range rule.NotificationSettings { + if q.ReceiverName != "" && q.ReceiverName != setting.Receiver { // currently, there can be only one setting. If in future there are more, we will return all settings of a rule that has a setting with receiver + continue } - } else { - ns = rule.NotificationSettings + if q.TimeIntervalName != "" && !slices.Contains(setting.MuteTimeIntervals, q.TimeIntervalName) { + continue + } + ns = append(ns, setting) } if len(ns) > 0 { key := ngmodels.AlertRuleKey{ @@ -758,14 +782,14 @@ func (st DBstore) ListNotificationSettings(ctx context.Context, q ngmodels.ListN return result, nil } -func (st DBstore) filterByReceiverName(receiver string, sess *xorm.Session) (*xorm.Session, error) { - if receiver == "" { +func (st DBstore) filterByContentInNotificationSettings(value string, sess *xorm.Session) (*xorm.Session, error) { + if value == "" { return sess, nil } // marshall string according to JSON rules so we follow escaping rules. - b, err := json.Marshal(receiver) + b, err := json.Marshal(value) if err != nil { - return nil, fmt.Errorf("failed to marshall receiver name query: %w", err) + return nil, fmt.Errorf("failed to marshall string for notification settings content filter: %w", err) } var search = string(b) if st.SQLStore.GetDialect().DriverName() != migrator.SQLite { diff --git a/pkg/services/ngalert/store/alert_rule_test.go b/pkg/services/ngalert/store/alert_rule_test.go index 45a4c859845..186cb3e85f6 100644 --- a/pkg/services/ngalert/store/alert_rule_test.go +++ b/pkg/services/ngalert/store/alert_rule_test.go @@ -12,6 +12,7 @@ import ( "github.com/google/uuid" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "golang.org/x/exp/maps" "golang.org/x/exp/rand" "github.com/grafana/grafana/pkg/bus" @@ -776,6 +777,14 @@ func TestIntegrationAlertRulesNotificationSettings(t *testing.T) { t.Skip("skipping integration test") } + getKeyMap := func(r []*models.AlertRule) map[models.AlertRuleKey]struct{} { + result := make(map[models.AlertRuleKey]struct{}, len(r)) + for _, rule := range r { + result[rule.GetKey()] = struct{}{} + } + return result + } + sqlStore := db.InitTestReplDB(t) cfg := setting.NewCfg() cfg.UnifiedAlerting.BaseInterval = 1 * time.Second @@ -787,15 +796,17 @@ func TestIntegrationAlertRulesNotificationSettings(t *testing.T) { } receiverName := "receiver\"-" + uuid.NewString() + timeIntervalName := "time-" + util.GenerateShortUID() gen := models.RuleGen gen = gen.With(gen.WithOrgID(1), gen.WithIntervalMatching(store.Cfg.BaseInterval)) rules := gen.GenerateManyRef(3) receiveRules := gen.With(gen.WithNotificationSettingsGen(models.NotificationSettingsGen(models.NSMuts.WithReceiver(receiverName)))).GenerateManyRef(3) - noise := gen.With(gen.WithNotificationSettingsGen(models.NotificationSettingsGen(models.NSMuts.WithMuteTimeIntervals(receiverName)))).GenerateManyRef(3) + timeIntervalRules := gen.With(gen.WithNotificationSettingsGen(models.NotificationSettingsGen(models.NSMuts.WithMuteTimeIntervals(timeIntervalName)))).GenerateManyRef(3) + noise := gen.With(gen.WithNotificationSettingsGen(models.NotificationSettingsGen(models.NSMuts.WithReceiver(timeIntervalName), models.NSMuts.WithMuteTimeIntervals(receiverName)))).GenerateManyRef(3) - deref := make([]models.AlertRule, 0, len(rules)+len(receiveRules)+len(noise)) - for _, rule := range append(append(rules, receiveRules...), noise...) { + deref := make([]models.AlertRule, 0, len(rules)+len(receiveRules)+len(timeIntervalRules)+len(noise)) + for _, rule := range append(append(append(rules, receiveRules...), noise...), timeIntervalRules...) { r := *rule r.ID = 0 deref = append(deref, r) @@ -805,18 +816,60 @@ func TestIntegrationAlertRulesNotificationSettings(t *testing.T) { require.NoError(t, err) t.Run("should find rules by receiver name", func(t *testing.T) { - expectedUIDs := map[string]struct{}{} - for _, rule := range receiveRules { - expectedUIDs[rule.UID] = struct{}{} - } + expected := getKeyMap(receiveRules) actual, err := store.ListAlertRules(context.Background(), &models.ListAlertRulesQuery{ OrgID: 1, ReceiverName: receiverName, }) require.NoError(t, err) - assert.Len(t, actual, len(expectedUIDs)) + assert.Len(t, actual, len(expected)) for _, rule := range actual { - assert.Contains(t, expectedUIDs, rule.UID) + assert.Contains(t, expected, rule.GetKey()) + } + }) + + t.Run("should find rules by time interval name", func(t *testing.T) { + expected := getKeyMap(timeIntervalRules) + actual, err := store.ListAlertRules(context.Background(), &models.ListAlertRulesQuery{ + OrgID: 1, + TimeIntervalName: timeIntervalName, + }) + require.NoError(t, err) + assert.Len(t, actual, len(expected)) + for _, rule := range actual { + assert.Contains(t, expected, rule.GetKey()) + } + }) + + t.Run("should find rules by receiver and time-interval name", func(t *testing.T) { + var receiver, intervalName string + var expected []models.AlertRuleKey + rand.Shuffle(len(deref), func(i, j int) { + deref[i], deref[j] = deref[j], deref[i] + }) + for _, rule := range deref { + if len(rule.NotificationSettings) == 0 || rule.NotificationSettings[0].Receiver == "" || len(rule.NotificationSettings[0].MuteTimeIntervals) == 0 { + continue + } + if len(expected) > 0 { + if rule.NotificationSettings[0].Receiver == receiver && slices.Contains(rule.NotificationSettings[0].MuteTimeIntervals, intervalName) { + expected = append(expected, rule.GetKey()) + } + } else { + receiver = rule.NotificationSettings[0].Receiver + intervalName = rule.NotificationSettings[0].MuteTimeIntervals[0] + expected = append(expected, rule.GetKey()) + } + } + actual, err := store.ListAlertRules(context.Background(), &models.ListAlertRulesQuery{ + OrgID: 1, + ReceiverName: receiver, + TimeIntervalName: intervalName, + }) + require.NoError(t, err) + assert.Len(t, actual, len(expected)) + for _, rule := range actual { + assert.Contains(t, expected, rule.GetKey()) } }) @@ -864,13 +917,17 @@ func TestIntegrationListNotificationSettings(t *testing.T) { Cfg: cfg.UnifiedAlerting, } - receiverName := `receiver%"-👍'test` + searchName := `name-%"-👍'test` gen := models.RuleGen gen = gen.With(gen.WithOrgID(1), gen.WithIntervalMatching(store.Cfg.BaseInterval)) - rulesWithNotifications := gen.With( - gen.WithNotificationSettingsGen(models.NotificationSettingsGen(models.NSMuts.WithReceiver(receiverName))), + rulesWithNotificationsAndReceiver := gen.With( + gen.WithNotificationSettingsGen(models.NotificationSettingsGen(models.NSMuts.WithReceiver(searchName))), ).GenerateMany(5) + rulesWithNotificationsAndTimeInterval := gen.With( + gen.WithNotificationSettingsGen(models.NotificationSettingsGen(models.NSMuts.WithMuteTimeIntervals(searchName))), + ).GenerateMany(5) + rulesInOtherOrg := gen.With( gen.WithOrgID(2), gen.WithNotificationSettingsGen(models.NotificationSettingsGen()), @@ -878,15 +935,18 @@ func TestIntegrationListNotificationSettings(t *testing.T) { rulesWithNoNotifications := gen.With(gen.WithNoNotificationSettings()).GenerateMany(5) - deref := append(append(rulesWithNotifications, rulesWithNoNotifications...), rulesInOtherOrg...) + deref := append(append(rulesWithNotificationsAndReceiver, rulesWithNoNotifications...), rulesInOtherOrg...) + deref = append(deref, rulesWithNotificationsAndTimeInterval...) + + orgRules := append(rulesWithNotificationsAndReceiver, rulesWithNotificationsAndTimeInterval...) _, err := store.InsertAlertRules(context.Background(), deref) require.NoError(t, err) result, err := store.ListNotificationSettings(context.Background(), models.ListNotificationSettingsQuery{OrgID: 1}) require.NoError(t, err) - require.Len(t, result, len(rulesWithNotifications)) - for _, rule := range rulesWithNotifications { + require.Len(t, result, len(orgRules)) + for _, rule := range rulesWithNotificationsAndReceiver { if !assert.Contains(t, result, rule.GetKey()) { continue } @@ -895,13 +955,13 @@ func TestIntegrationListNotificationSettings(t *testing.T) { t.Run("should list notification settings by receiver name", func(t *testing.T) { expectedUIDs := map[models.AlertRuleKey]struct{}{} - for _, rule := range rulesWithNotifications { + for _, rule := range rulesWithNotificationsAndReceiver { expectedUIDs[rule.GetKey()] = struct{}{} } actual, err := store.ListNotificationSettings(context.Background(), models.ListNotificationSettingsQuery{ OrgID: 1, - ReceiverName: receiverName, + ReceiverName: searchName, }) require.NoError(t, err) assert.Len(t, actual, len(expectedUIDs)) @@ -909,6 +969,60 @@ func TestIntegrationListNotificationSettings(t *testing.T) { assert.Contains(t, expectedUIDs, ruleKey) } }) + t.Run("should filter notification settings by time interval name", func(t *testing.T) { + expectedUIDs := map[models.AlertRuleKey]struct{}{} + for _, rule := range rulesWithNotificationsAndTimeInterval { + expectedUIDs[rule.GetKey()] = struct{}{} + } + + actual, err := store.ListNotificationSettings(context.Background(), models.ListNotificationSettingsQuery{ + OrgID: 1, + TimeIntervalName: searchName, + }) + require.NoError(t, err) + assert.Len(t, actual, len(expectedUIDs)) + for ruleKey := range actual { + assert.Contains(t, expectedUIDs, ruleKey) + } + }) + t.Run("should return nothing if filter does not match", func(t *testing.T) { + result, err := store.ListNotificationSettings(context.Background(), models.ListNotificationSettingsQuery{ + OrgID: 1, + ReceiverName: "not-found-receiver", + TimeIntervalName: "not-found-time-interval", + }) + require.NoError(t, err) + require.Empty(t, result) + }) + t.Run("should filter by time interval and receiver", func(t *testing.T) { + var receiver, timeInterval string + var expected []models.AlertRuleKey + rand.Shuffle(len(orgRules), func(i, j int) { + orgRules[i], orgRules[j] = orgRules[j], orgRules[i] + }) + for _, rule := range orgRules { + if len(rule.NotificationSettings) == 0 || rule.NotificationSettings[0].Receiver == "" || len(rule.NotificationSettings[0].MuteTimeIntervals) == 0 { + continue + } + if len(expected) > 0 { + if rule.NotificationSettings[0].Receiver == receiver && slices.Contains(rule.NotificationSettings[0].MuteTimeIntervals, timeInterval) { + expected = append(expected, rule.GetKey()) + } + } else { + receiver = rule.NotificationSettings[0].Receiver + timeInterval = rule.NotificationSettings[0].MuteTimeIntervals[0] + expected = append(expected, rule.GetKey()) + } + } + + actual, err := store.ListNotificationSettings(context.Background(), models.ListNotificationSettingsQuery{ + OrgID: 1, + ReceiverName: receiver, + TimeIntervalName: timeInterval, + }) + require.NoError(t, err) + require.EqualValuesf(t, expected, maps.Keys(actual), "got more rules than expected: %#v", actual) + }) } func TestIntegrationGetNamespacesByRuleUID(t *testing.T) { diff --git a/pkg/services/provisioning/provisioning.go b/pkg/services/provisioning/provisioning.go index 8a63fa3bdea..f3a49c726ff 100644 --- a/pkg/services/provisioning/provisioning.go +++ b/pkg/services/provisioning/provisioning.go @@ -275,7 +275,7 @@ func (ps *ProvisioningServiceImpl) ProvisionAlerting(ctx context.Context) error st, ps.SQLStore, receiverSvc, ps.log, &st) notificationPolicyService := provisioning.NewNotificationPolicyService(&st, st, ps.SQLStore, ps.Cfg.UnifiedAlerting, ps.log) - mutetimingsService := provisioning.NewMuteTimingService(&st, st, &st, ps.log) + mutetimingsService := provisioning.NewMuteTimingService(&st, st, &st, ps.log, &st) templateService := provisioning.NewTemplateService(&st, st, &st, ps.log) cfg := prov_alerting.ProvisionerConfig{ Path: alertingPath, diff --git a/pkg/tests/api/alerting/testing.go b/pkg/tests/api/alerting/testing.go index 2dd5327d47b..e9e6c57559b 100644 --- a/pkg/tests/api/alerting/testing.go +++ b/pkg/tests/api/alerting/testing.go @@ -240,6 +240,17 @@ type apiClient struct { url string } +type LegacyApiClient struct { + apiClient +} + +func NewAlertingLegacyAPIClient(host, user, pass string) LegacyApiClient { + cli := newAlertingApiClient(host, user, pass) + return LegacyApiClient{ + apiClient: cli, + } +} + func newAlertingApiClient(host, user, pass string) apiClient { if len(user) == 0 && len(pass) == 0 { return apiClient{url: fmt.Sprintf("http://%s", host)} diff --git a/pkg/tests/apis/alerting/notifications/timeinterval/test-data/notification-settings.json b/pkg/tests/apis/alerting/notifications/timeinterval/test-data/notification-settings.json new file mode 100644 index 00000000000..b113e45dd01 --- /dev/null +++ b/pkg/tests/apis/alerting/notifications/timeinterval/test-data/notification-settings.json @@ -0,0 +1,47 @@ +{ + "template_files": {}, + "template_file_provenances": {}, + "alertmanager_config": { + "route": { + "receiver": "grafana-default-email", + "routes": [ + { + "object_matchers": [ + [ + "test", + "=", + "test" + ] + ], + "mute_time_intervals": [ + "test-interval" + ] + } + ] + }, + "mute_time_intervals": [ + { + "name": "test-interval", + "time_intervals": [ + { + "start_time": "06:00", + "end_time": "23:59" + } + ] + } + ], + "receivers": [ + { + "name": "grafana-default-email", + "grafana_managed_receiver_configs": [ + { + "type": "email", + "settings": { + "addresses": "" + } + } + ] + } + ] + } +} \ No newline at end of file diff --git a/pkg/tests/apis/alerting/notifications/timeinterval/test-data/rulegroup-1.json b/pkg/tests/apis/alerting/notifications/timeinterval/test-data/rulegroup-1.json new file mode 100644 index 00000000000..7b3e22c2d20 --- /dev/null +++ b/pkg/tests/apis/alerting/notifications/timeinterval/test-data/rulegroup-1.json @@ -0,0 +1,37 @@ +{ + "name": "Group1", + "interval": "1m", + "rules": [ + { + "for": "0", + "labels": { + "label1": "test-label" + }, + "annotations": { + "annotation": "test-annotation" + }, + "grafana_alert": { + "title": "Rule1", + "condition": "A", + "data": [ + { + "refId": "A", + "datasourceUid": "__expr__", + "model": { + "expression": "0 > 0", + "type": "math" + } + } + ], + "no_data_state": "NoData", + "exec_err_state": "Alerting", + "notification_settings": { + "receiver": "grafana-default-email", + "mute_time_intervals": [ + "test-interval" + ] + } + } + } + ] +} diff --git a/pkg/tests/apis/alerting/notifications/timeinterval/timeinterval_test.go b/pkg/tests/apis/alerting/notifications/timeinterval/timeinterval_test.go index ae3cf8bc6b1..05a62032815 100644 --- a/pkg/tests/apis/alerting/notifications/timeinterval/timeinterval_test.go +++ b/pkg/tests/apis/alerting/notifications/timeinterval/timeinterval_test.go @@ -2,8 +2,11 @@ package timeinterval import ( "context" + "embed" "encoding/json" "fmt" + "net/http" + "path" "testing" "github.com/prometheus/alertmanager/config" @@ -24,12 +27,16 @@ import ( "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" "github.com/grafana/grafana/pkg/services/ngalert/store" "github.com/grafana/grafana/pkg/services/org" + "github.com/grafana/grafana/pkg/tests/api/alerting" "github.com/grafana/grafana/pkg/tests/apis" "github.com/grafana/grafana/pkg/tests/testinfra" "github.com/grafana/grafana/pkg/tests/testsuite" "github.com/grafana/grafana/pkg/util" ) +//go:embed test-data/*.* +var testData embed.FS + func TestMain(m *testing.M) { testsuite.Run(m) } @@ -626,3 +633,58 @@ func TestIntegrationTimeIntervalListSelector(t *testing.T) { require.Empty(t, list.Items) }) } + +func TestIntegrationTimeIntervalReferentialIntegrity(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + ctx := context.Background() + helper := getTestHelper(t) + + cliCfg := helper.Org1.Admin.NewRestConfig() + legacyCli := alerting.NewAlertingLegacyAPIClient(helper.GetEnv().Server.HTTPServer.Listener.Addr().String(), cliCfg.Username, cliCfg.Password) + + // Prepare environment and create notification policy and rule that use time interval + alertmanagerRaw, err := testData.ReadFile(path.Join("test-data", "notification-settings.json")) + require.NoError(t, err) + var amConfig definitions.PostableUserConfig + require.NoError(t, json.Unmarshal(alertmanagerRaw, &amConfig)) + + success, err := legacyCli.PostConfiguration(t, amConfig) + require.Truef(t, success, "Failed to post Alertmanager configuration: %s", err) + + postGroupRaw, err := testData.ReadFile(path.Join("test-data", "rulegroup-1.json")) + require.NoError(t, err) + var ruleGroup definitions.PostableRuleGroupConfig + require.NoError(t, json.Unmarshal(postGroupRaw, &ruleGroup)) + + folderUID := "test-folder" + legacyCli.CreateFolder(t, folderUID, "TEST") + _, status, data := legacyCli.PostRulesGroupWithStatus(t, folderUID, &ruleGroup) + require.Equalf(t, http.StatusAccepted, status, "Failed to post Rule: %s", data) + + adminK8sClient, err := versioned.NewForConfig(cliCfg) + require.NoError(t, err) + adminClient := adminK8sClient.NotificationsV0alpha1().TimeIntervals("default") + + intervals, err := adminClient.List(ctx, v1.ListOptions{}) + require.NoError(t, err) + require.Len(t, intervals.Items, 1) + + intervalToDelete := intervals.Items[0] + + t.Run("should fail to delete if time interval is used in rule and routes", func(t *testing.T) { + err := adminClient.Delete(ctx, intervalToDelete.Name, v1.DeleteOptions{}) + require.Truef(t, errors.IsConflict(err), "Expected Conflict, got: %s", err) + }) + + t.Run("should fail to delete if time interval is used in only rule", func(t *testing.T) { + amConfig.AlertmanagerConfig.Route.Routes[0].MuteTimeIntervals = nil + success, err := legacyCli.PostConfiguration(t, amConfig) + require.Truef(t, success, "Failed to post Alertmanager configuration: %s", err) + + err = adminClient.Delete(ctx, intervalToDelete.Name, v1.DeleteOptions{}) + require.Truef(t, errors.IsConflict(err), "Expected Conflict, got: %s", err) + }) +}