From 1eebd2a4ded5f7b573da75a53c9e0b21aa37a439 Mon Sep 17 00:00:00 2001 From: Yuri Tseretyan Date: Thu, 15 Feb 2024 09:45:10 -0500 Subject: [PATCH] Alerting: Support for simplified notification settings in rule API (#81011) * Add notification settings to storage\domain and API models. Settings are a slice to workaround XORM mapping * Support validation of notification settings when rules are updated * Implement route generator for Alertmanager configuration. That fetches all notification settings. * Update multi-tenant Alertmanager to run the generator before applying the configuration. * Add notification settings labels to state calculation * update the Multi-tenant Alertmanager to provide validation for notification settings * update GET API so only admins can see auto-gen --- conf/provisioning/alerting/sample.yaml | 39 +++ pkg/services/ngalert/api/api.go | 5 +- pkg/services/ngalert/api/api_alertmanager.go | 11 +- .../ngalert/api/api_alertmanager_test.go | 182 ++++++++++++- .../ngalert/api/api_provisioning_test.go | 85 +++--- pkg/services/ngalert/api/api_ruler.go | 76 ++++-- pkg/services/ngalert/api/api_ruler_test.go | 17 +- .../ngalert/api/api_ruler_validation.go | 38 +++ .../ngalert/api/api_ruler_validation_test.go | 114 ++++++++ pkg/services/ngalert/api/api_testing.go | 2 +- pkg/services/ngalert/api/compat.go | 143 +++++++--- .../test-data/post-rulegroup-101-export.hcl | 9 + .../test-data/post-rulegroup-101-export.json | 10 +- .../test-data/post-rulegroup-101-export.yaml | 11 + .../api/test-data/post-rulegroup-101.json | 10 +- .../api/tooling/definitions/alertmanager.go | 46 ++++ .../tooling/definitions/alertmanager_test.go | 73 ++++++ .../definitions/alertmanager_validation.go | 5 +- .../api/tooling/definitions/cortex-ruler.go | 90 +++++-- .../definitions/provisioning_alert_rules.go | 22 +- pkg/services/ngalert/models/alert_rule.go | 62 ++++- .../ngalert/models/alert_rule_test.go | 117 ++++++++- pkg/services/ngalert/models/notifications.go | 167 ++++++++++++ .../ngalert/models/notifications_test.go | 145 +++++++++++ pkg/services/ngalert/models/testing.go | 180 +++++++++++-- pkg/services/ngalert/ngalert.go | 6 +- pkg/services/ngalert/notifier/alertmanager.go | 41 ++- .../ngalert/notifier/alertmanager_config.go | 43 ++- .../ngalert/notifier/alertmanager_test.go | 2 +- .../ngalert/notifier/autogen_alertmanager.go | 185 +++++++++++++ .../notifier/autogen_alertmanager_test.go | 238 +++++++++++++++++ .../ngalert/notifier/multiorg_alertmanager.go | 45 ++-- .../multiorg_alertmanager_remote_test.go | 2 + .../notifier/multiorg_alertmanager_test.go | 9 +- pkg/services/ngalert/notifier/testing.go | 36 +++ pkg/services/ngalert/notifier/validation.go | 132 ++++++++++ .../ngalert/provisioning/alert_rules.go | 79 +++++- .../ngalert/provisioning/contactpoints.go | 65 +++-- .../provisioning/contactpoints_test.go | 26 +- pkg/services/ngalert/provisioning/testing.go | 8 + pkg/services/ngalert/schedule/registry.go | 5 + .../ngalert/schedule/registry_test.go | 6 + pkg/services/ngalert/schedule/schedule.go | 2 +- pkg/services/ngalert/sender/router_test.go | 3 +- pkg/services/ngalert/state/cache.go | 35 ++- pkg/services/ngalert/state/cache_test.go | 56 ++++ pkg/services/ngalert/state/state.go | 13 +- pkg/services/ngalert/state/state_test.go | 81 ++++++ pkg/services/ngalert/store/alert_rule.go | 174 ++++++++++--- pkg/services/ngalert/store/alert_rule_test.go | 164 ++++++++++++ pkg/services/ngalert/store/deltas.go | 21 ++ .../provisioning/alerting/provisioner.go | 26 +- .../provisioning/alerting/rules_types.go | 101 ++++++- .../provisioning/alerting/rules_types_test.go | 114 ++++++++ pkg/services/provisioning/provisioning.go | 8 +- .../sqlstore/migrations/migrations.go | 2 + .../ualert/rule_notification_settings_mig.go | 20 ++ pkg/tests/api/alerting/api_ruler_test.go | 246 ++++++++++++++++++ .../rule-notification-settings-1-post.json | 58 +++++ pkg/tests/api/alerting/testing.go | 59 ++++- 60 files changed, 3466 insertions(+), 304 deletions(-) create mode 100644 pkg/services/ngalert/models/notifications.go create mode 100644 pkg/services/ngalert/models/notifications_test.go create mode 100644 pkg/services/ngalert/notifier/autogen_alertmanager.go create mode 100644 pkg/services/ngalert/notifier/autogen_alertmanager_test.go create mode 100644 pkg/services/ngalert/notifier/validation.go create mode 100644 pkg/services/sqlstore/migrations/ualert/rule_notification_settings_mig.go create mode 100644 pkg/tests/api/alerting/test-data/rule-notification-settings-1-post.json diff --git a/conf/provisioning/alerting/sample.yaml b/conf/provisioning/alerting/sample.yaml index ec41552e531..51836f25b3f 100644 --- a/conf/provisioning/alerting/sample.yaml +++ b/conf/provisioning/alerting/sample.yaml @@ -67,6 +67,45 @@ apiVersion: 1 # labels: # team: sre_team_1 # isPaused: false +# # optional settings that let configure notification settings applied to alerts created by this rule +# notification_settings: +# # name of the receiver (contact-point) that should be used for this route +# receiver: grafana-default-email +# # > The labels by which incoming alerts are grouped together. For example, +# # multiple alerts coming in for cluster=A and alertname=LatencyHigh would +# # be batched into a single group. +# # +# # To aggregate by all possible labels, use the special value '...' as +# # the sole label name, for example: +# # group_by: ['...'] +# # This effectively disables aggregation entirely, passing through all +# # alerts as-is. This is unlikely to be what you want, unless you have +# # a very low alert volume or your upstream notification system performs +# # its own grouping. +# # If defined, must contain the labels 'alertname' and 'grafana_folder', except when contains '...' +# group_by: ["alertname", "grafana_folder", "region"] +# # Times when the route should be muted. These must match the name of a +# # mute time interval. +# # Additionally, the root node cannot have any mute times. +# # When a route is muted it will not send any notifications, but +# # otherwise acts normally (including ending the route-matching process +# # if the `continue` option is not set) +# mute_time_intervals: +# - abc +# # How long to initially wait to send a notification for a group +# # of alerts. Allows to collect more initial alerts for the same group. +# # (Usually ~0s to few minutes). +# # If not specified, the corresponding setting of the default policy is used. +# group_wait: 30s +# # How long to wait before sending a notification about new alerts that +# # are added to a group of alerts for which an initial notification has +# # already been sent. (Usually ~5m or more). +# # If not specified, the corresponding setting of the default policy is used. +# group_interval: 5m +# # How long to wait before sending a notification again if it has already +# # been sent successfully for an alert. (Usually ~3h or more) +# # If not specified, the corresponding setting of the default policy is used. +# repeat_interval: 4h # # List of alert rule UIDs that should be deleted # deleteRules: diff --git a/pkg/services/ngalert/api/api.go b/pkg/services/ngalert/api/api.go index b71531e6adb..64ebc734b77 100644 --- a/pkg/services/ngalert/api/api.go +++ b/pkg/services/ngalert/api/api.go @@ -57,7 +57,7 @@ type API struct { TransactionManager provisioning.TransactionManager ProvenanceStore provisioning.ProvisioningStore RuleStore RuleStore - AlertingStore AlertingStore + AlertingStore store.AlertingStore AdminConfigStore store.AdminConfigurationStore DataProxy *datasourceproxy.DataSourceProxyService MultiOrgAlertmanager *notifier.MultiOrgAlertmanager @@ -115,6 +115,9 @@ func (api *API) RegisterAPIEndpoints(m *metrics.API) { log: logger, cfg: &api.Cfg.UnifiedAlerting, authz: ruleAuthzService, + amConfigStore: api.AlertingStore, + amRefresher: api.MultiOrgAlertmanager, + featureManager: api.FeatureManager, }, ), m) api.RegisterTestingApiEndpoints(NewTestingApi( diff --git a/pkg/services/ngalert/api/api_alertmanager.go b/pkg/services/ngalert/api/api_alertmanager.go index 9856b1032f6..51398e72951 100644 --- a/pkg/services/ngalert/api/api_alertmanager.go +++ b/pkg/services/ngalert/api/api_alertmanager.go @@ -20,6 +20,7 @@ import ( apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" "github.com/grafana/grafana/pkg/services/ngalert/notifier" "github.com/grafana/grafana/pkg/services/ngalert/store" + "github.com/grafana/grafana/pkg/services/org" "github.com/grafana/grafana/pkg/util" ) @@ -124,7 +125,8 @@ func (srv AlertmanagerSrv) RouteDeleteSilence(c *contextmodel.ReqContext, silenc } func (srv AlertmanagerSrv) RouteGetAlertingConfig(c *contextmodel.ReqContext) response.Response { - config, err := srv.mam.GetAlertmanagerConfiguration(c.Req.Context(), c.SignedInUser.GetOrgID()) + canSeeAutogen := c.SignedInUser.HasRole(org.RoleAdmin) + config, err := srv.mam.GetAlertmanagerConfiguration(c.Req.Context(), c.SignedInUser.GetOrgID(), canSeeAutogen) if err != nil { if errors.Is(err, store.ErrNoAlertmanagerConfiguration) { return ErrResp(http.StatusNotFound, err, "") @@ -264,7 +266,10 @@ func (srv AlertmanagerSrv) RoutePostGrafanaAlertingConfigHistoryActivate(c *cont } func (srv AlertmanagerSrv) RoutePostAlertingConfig(c *contextmodel.ReqContext, body apimodels.PostableUserConfig) response.Response { - currentConfig, err := srv.mam.GetAlertmanagerConfiguration(c.Req.Context(), c.SignedInUser.GetOrgID()) + // Remove autogenerated config from the user config before checking provenance guard and eventually saving it. + // TODO: This and provenance guard should be moved to the notifier package. + notifier.RemoveAutogenConfigIfExists(body.AlertmanagerConfig.Route) + currentConfig, err := srv.mam.GetAlertmanagerConfiguration(c.Req.Context(), c.SignedInUser.GetOrgID(), false) // If a config is present and valid we proceed with the guard, otherwise we // just bypass the guard which is okay as we are anyway in an invalid state. if err == nil { @@ -272,7 +277,7 @@ func (srv AlertmanagerSrv) RoutePostAlertingConfig(c *contextmodel.ReqContext, b return ErrResp(http.StatusBadRequest, err, "") } } - err = srv.mam.ApplyAlertmanagerConfiguration(c.Req.Context(), c.SignedInUser.GetOrgID(), body) + err = srv.mam.SaveAndApplyAlertmanagerConfiguration(c.Req.Context(), c.SignedInUser.GetOrgID(), body) if err == nil { return response.JSON(http.StatusAccepted, util.DynMap{"message": "configuration created"}) } diff --git a/pkg/services/ngalert/api/api_alertmanager_test.go b/pkg/services/ngalert/api/api_alertmanager_test.go index 970e822add0..c6e8deadb8a 100644 --- a/pkg/services/ngalert/api/api_alertmanager_test.go +++ b/pkg/services/ngalert/api/api_alertmanager_test.go @@ -10,8 +10,11 @@ import ( "time" "github.com/go-openapi/strfmt" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" alertingNotify "github.com/grafana/alerting/notify" amv2 "github.com/prometheus/alertmanager/api/v2/models" + "github.com/prometheus/alertmanager/pkg/labels" "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/require" @@ -20,6 +23,7 @@ import ( "github.com/grafana/grafana/pkg/services/accesscontrol" "github.com/grafana/grafana/pkg/services/accesscontrol/acimpl" contextmodel "github.com/grafana/grafana/pkg/services/contexthandler/model" + "github.com/grafana/grafana/pkg/services/featuremgmt" apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" "github.com/grafana/grafana/pkg/services/ngalert/metrics" ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models" @@ -325,6 +329,84 @@ func TestAlertmanagerConfig(t *testing.T) { }) } +func TestAlertmanagerAutogenConfig(t *testing.T) { + createSutForAutogen := func(t *testing.T) (AlertmanagerSrv, map[int64]*ngmodels.AlertConfiguration) { + sut := createSut(t) + configs := map[int64]*ngmodels.AlertConfiguration{ + 1: {AlertmanagerConfiguration: validConfig, OrgID: 1}, + 2: {AlertmanagerConfiguration: validConfigWithoutAutogen, OrgID: 2}, + } + sut.mam = createMultiOrgAlertmanager(t, configs) + return sut, configs + } + + compare := func(t *testing.T, expectedAm string, testAm string) { + test, err := notifier.Load([]byte(testAm)) + require.NoError(t, err) + + exp, err := notifier.Load([]byte(expectedAm)) + require.NoError(t, err) + + cOpt := []cmp.Option{ + cmpopts.IgnoreUnexported(apimodels.PostableUserConfig{}, apimodels.Route{}, labels.Matcher{}), + cmpopts.IgnoreFields(apimodels.PostableGrafanaReceiver{}, "UID", "Settings"), + } + if !cmp.Equal(test, exp, cOpt...) { + t.Errorf("Unexpected AM Config: %v", cmp.Diff(test, exp, cOpt...)) + } + } + + t.Run("route POST config", func(t *testing.T) { + t.Run("does not save autogen routes", func(t *testing.T) { + sut, configs := createSutForAutogen(t) + rc := createRequestCtxInOrg(1) + request := createAmConfigRequest(t, validConfigWithAutogen) + response := sut.RoutePostAlertingConfig(rc, request) + require.Equal(t, 202, response.Status()) + + compare(t, validConfigWithoutAutogen, configs[1].AlertmanagerConfiguration) + }) + + t.Run("provenance guard ignores autogen routes", func(t *testing.T) { + sut := createSut(t) + rc := createRequestCtxInOrg(1) + request := createAmConfigRequest(t, validConfigWithoutAutogen) + _ = sut.RoutePostAlertingConfig(rc, request) + + setRouteProvenance(t, 1, sut.mam.ProvStore) + request = createAmConfigRequest(t, validConfigWithAutogen) + request.AlertmanagerConfig.Route.Provenance = apimodels.Provenance(ngmodels.ProvenanceAPI) + response := sut.RoutePostAlertingConfig(rc, request) + require.Equal(t, 202, response.Status()) + }) + }) + + t.Run("route GET config", func(t *testing.T) { + t.Run("when admin return autogen routes", func(t *testing.T) { + sut, _ := createSutForAutogen(t) + + rc := createRequestCtxInOrg(2) + rc.SignedInUser.OrgRole = org.RoleAdmin + + response := sut.RouteGetAlertingConfig(rc) + require.Equal(t, 200, response.Status()) + + compare(t, validConfigWithAutogen, string(response.Body())) + }) + + t.Run("when not admin return no autogen routes", func(t *testing.T) { + sut, _ := createSutForAutogen(t) + + rc := createRequestCtxInOrg(2) + + response := sut.RouteGetAlertingConfig(rc) + require.Equal(t, 200, response.Status()) + + compare(t, validConfigWithoutAutogen, string(response.Body())) + }) + }) +} + func TestRouteGetAlertingConfigHistory(t *testing.T) { sut := createSut(t) @@ -633,7 +715,12 @@ func TestRouteCreateSilence(t *testing.T) { func createSut(t *testing.T) AlertmanagerSrv { t.Helper() - mam := createMultiOrgAlertmanager(t) + configs := map[int64]*ngmodels.AlertConfiguration{ + 1: {AlertmanagerConfiguration: validConfig, OrgID: 1}, + 2: {AlertmanagerConfiguration: validConfig, OrgID: 2}, + 3: {AlertmanagerConfiguration: brokenConfig, OrgID: 3}, + } + mam := createMultiOrgAlertmanager(t, configs) log := log.NewNopLogger() return AlertmanagerSrv{ mam: mam, @@ -653,14 +740,9 @@ func createAmConfigRequest(t *testing.T, config string) apimodels.PostableUserCo return request } -func createMultiOrgAlertmanager(t *testing.T) *notifier.MultiOrgAlertmanager { +func createMultiOrgAlertmanager(t *testing.T, configs map[int64]*ngmodels.AlertConfiguration) *notifier.MultiOrgAlertmanager { t.Helper() - configs := map[int64]*ngmodels.AlertConfiguration{ - 1: {AlertmanagerConfiguration: validConfig, OrgID: 1}, - 2: {AlertmanagerConfiguration: validConfig, OrgID: 2}, - 3: {AlertmanagerConfiguration: brokenConfig, OrgID: 3}, - } configStore := notifier.NewFakeConfigStore(t, configs) orgStore := notifier.NewFakeOrgStore(t, []int64{1, 2, 3}) provStore := ngfakes.NewFakeProvisioningStore() @@ -679,7 +761,7 @@ func createMultiOrgAlertmanager(t *testing.T) *notifier.MultiOrgAlertmanager { }, // do not poll in tests. } - mam, err := notifier.NewMultiOrgAlertmanager(cfg, configStore, orgStore, kvStore, provStore, decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService) + mam, err := notifier.NewMultiOrgAlertmanager(cfg, configStore, orgStore, kvStore, provStore, decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService, featuremgmt.WithManager(featuremgmt.FlagAlertingSimplifiedRouting)) require.NoError(t, err) err = mam.LoadAndSyncAlertmanagersForOrgs(context.Background()) require.NoError(t, err) @@ -710,6 +792,90 @@ var validConfig = `{ } ` +var validConfigWithoutAutogen = `{ + "template_files": { + "a": "template" + }, + "alertmanager_config": { + "route": { + "receiver": "some email", + "routes": [{ + "receiver": "other email", + "object_matchers": [["a", "=", "b"]] + }] + }, + "receivers": [{ + "name": "some email", + "grafana_managed_receiver_configs": [{ + "name": "some email", + "type": "email", + "settings": { + "addresses": "" + } + }] + },{ + "name": "other email", + "grafana_managed_receiver_configs": [{ + "name": "other email", + "type": "email", + "settings": { + "addresses": "" + } + }] + }] + } +} +` + +var validConfigWithAutogen = `{ + "template_files": { + "a": "template" + }, + "alertmanager_config": { + "route": { + "receiver": "some email", + "routes": [{ + "receiver": "some email", + "object_matchers": [["__grafana_autogenerated__", "=", "true"]], + "routes": [{ + "receiver": "some email", + "group_by": ["grafana_folder", "alertname"], + "object_matchers": [["__grafana_receiver__", "=", "some email"]], + "continue": false + },{ + "receiver": "other email", + "group_by": ["grafana_folder", "alertname"], + "object_matchers": [["__grafana_receiver__", "=", "other email"]], + "continue": false + }] + },{ + "receiver": "other email", + "object_matchers": [["a", "=", "b"]] + }] + }, + "receivers": [{ + "name": "some email", + "grafana_managed_receiver_configs": [{ + "name": "some email", + "type": "email", + "settings": { + "addresses": "" + } + }] + },{ + "name": "other email", + "grafana_managed_receiver_configs": [{ + "name": "other email", + "type": "email", + "settings": { + "addresses": "" + } + }] + }] + } +} +` + var validConfigWithSecureSetting = `{ "template_files": { "a": "template" diff --git a/pkg/services/ngalert/api/api_provisioning_test.go b/pkg/services/ngalert/api/api_provisioning_test.go index c11f76905a9..df1d266747e 100644 --- a/pkg/services/ngalert/api/api_provisioning_test.go +++ b/pkg/services/ngalert/api/api_provisioning_test.go @@ -525,17 +525,14 @@ func TestProvisioningApi(t *testing.T) { t.Run("yaml body content is the default", func(t *testing.T) { sut := createProvisioningSrvSut(t) rc := createTestRequestCtx() - insertRule(t, sut, createTestAlertRule("rule1", 1)) + rule1 := createTestAlertRule("rule1", 1) + rule1.NotificationSettings = nil + insertRule(t, sut, rule1) insertRule(t, sut, createTestAlertRule("rule2", 1)) - expectedResponse := "apiVersion: 1\ngroups:\n - orgId: 1\n name: my-cool-group\n folder" + - ": Folder Title\n interval: 1m\n rules:\n - uid: rule1\n title: rule1\n" + - " condition: A\n data:\n - refId: A\n datasourceUid" + - ": \"\"\n model:\n conditions:\n - evaluator:\n" + - " params:\n - 3\n type: gt\n operator:\n type: and\n query:\n params:\n - A\n reducer:\n type: last\n type: query\n datasource:\n type: __expr__\n uid: __expr__\n expression: 1==0\n intervalMs: 1000\n maxDataPoints: 43200\n refId: A\n type: math\n noDataState: OK\n execErrState: OK\n for: 0s\n isPaused: false\n - uid: rule2\n title: rule2\n condition: A\n data:\n - refId: A\n datasourceUid: \"\"\n model:\n conditions:\n - evaluator:\n params:\n - 3\n type: gt\n operator:\n type: and\n query:\n params:\n - A\n reducer:\n type: last\n type: query\n datasource:\n type: __expr__\n uid: __expr__\n expression: 1==0\n intervalMs: 1000\n maxDataPoints: 43200\n refId: A\n type: math\n noDataState: OK\n execErrState: OK\n for: 0s\n isPaused: false\n" + expectedResponse := "apiVersion: 1\ngroups:\n - orgId: 1\n name: my-cool-group\n folder: Folder Title\n interval: 1m\n rules:\n - uid: rule1\n title: rule1\n condition: A\n data:\n - refId: A\n datasourceUid: \"\"\n model:\n conditions:\n - evaluator:\n params:\n - 3\n type: gt\n operator:\n type: and\n query:\n params:\n - A\n reducer:\n type: last\n type: query\n datasource:\n type: __expr__\n uid: __expr__\n expression: 1==0\n intervalMs: 1000\n maxDataPoints: 43200\n refId: A\n type: math\n noDataState: OK\n execErrState: OK\n for: 0s\n isPaused: false\n - uid: rule2\n title: rule2\n condition: A\n data:\n - refId: A\n datasourceUid: \"\"\n model:\n conditions:\n - evaluator:\n params:\n - 3\n type: gt\n operator:\n type: and\n query:\n params:\n - A\n reducer:\n type: last\n type: query\n datasource:\n type: __expr__\n uid: __expr__\n expression: 1==0\n intervalMs: 1000\n maxDataPoints: 43200\n refId: A\n type: math\n noDataState: OK\n execErrState: OK\n for: 0s\n isPaused: false\n notification_settings:\n receiver: Test-Receiver\n group_by:\n - alertname\n - grafana_folder\n - test\n group_wait: 1s\n group_interval: 5s\n repeat_interval: 5m\n mute_time_intervals:\n - test-mute\n" response := sut.RouteGetAlertRuleGroupExport(&rc, "folder-uid", "my-cool-group") - require.Equal(t, 200, response.Status()) require.Equal(t, expectedResponse, string(response.Body())) }) @@ -543,14 +540,15 @@ func TestProvisioningApi(t *testing.T) { t.Run("json body content is as expected", func(t *testing.T) { sut := createProvisioningSrvSut(t) rc := createTestRequestCtx() - insertRule(t, sut, createTestAlertRule("rule1", 1)) + rule1 := createTestAlertRule("rule1", 1) + rule1.NotificationSettings = nil + insertRule(t, sut, rule1) insertRule(t, sut, createTestAlertRule("rule2", 1)) rc.Context.Req.Header.Add("Accept", "application/json") - expectedResponse := `{"apiVersion":1,"groups":[{"orgId":1,"name":"my-cool-group","folder":"Folder Title","interval":"1m","rules":[{"uid":"rule1","title":"rule1","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false},{"uid":"rule2","title":"rule2","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false}]}]}` + expectedResponse := `{"apiVersion":1,"groups":[{"orgId":1,"name":"my-cool-group","folder":"Folder Title","interval":"1m","rules":[{"uid":"rule1","title":"rule1","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false},{"uid":"rule2","title":"rule2","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false,"notification_settings":{"receiver":"Test-Receiver","group_by":["alertname","grafana_folder","test"],"group_wait":"1s","group_interval":"5s","repeat_interval":"5m","mute_time_intervals":["test-mute"]}}]}]}` response := sut.RouteGetAlertRuleGroupExport(&rc, "folder-uid", "my-cool-group") - require.Equal(t, 200, response.Status()) require.Equal(t, expectedResponse, string(response.Body())) }) @@ -566,7 +564,7 @@ func TestProvisioningApi(t *testing.T) { ": Folder Title\n interval: 1m\n rules:\n - uid: rule1\n title: rule1\n" + " condition: A\n data:\n - refId: A\n datasourceUid" + ": \"\"\n model:\n conditions:\n - evaluator:\n" + - " params:\n - 3\n type: gt\n operator:\n type: and\n query:\n params:\n - A\n reducer:\n type: last\n type: query\n datasource:\n type: __expr__\n uid: __expr__\n expression: 1==0\n intervalMs: 1000\n maxDataPoints: 43200\n refId: A\n type: math\n noDataState: OK\n execErrState: OK\n for: 0s\n isPaused: false\n - uid: rule2\n title: rule2\n condition: A\n data:\n - refId: A\n datasourceUid: \"\"\n model:\n conditions:\n - evaluator:\n params:\n - 3\n type: gt\n operator:\n type: and\n query:\n params:\n - A\n reducer:\n type: last\n type: query\n datasource:\n type: __expr__\n uid: __expr__\n expression: 1==0\n intervalMs: 1000\n maxDataPoints: 43200\n refId: A\n type: math\n noDataState: OK\n execErrState: OK\n for: 0s\n isPaused: false\n" + " params:\n - 3\n type: gt\n operator:\n type: and\n query:\n params:\n - A\n reducer:\n type: last\n type: query\n datasource:\n type: __expr__\n uid: __expr__\n expression: 1==0\n intervalMs: 1000\n maxDataPoints: 43200\n refId: A\n type: math\n noDataState: OK\n execErrState: OK\n for: 0s\n isPaused: false\n notification_settings:\n receiver: Test-Receiver\n group_by:\n - alertname\n - grafana_folder\n - test\n group_wait: 1s\n group_interval: 5s\n repeat_interval: 5m\n mute_time_intervals:\n - test-mute\n - uid: rule2\n title: rule2\n condition: A\n data:\n - refId: A\n datasourceUid: \"\"\n model:\n conditions:\n - evaluator:\n params:\n - 3\n type: gt\n operator:\n type: and\n query:\n params:\n - A\n reducer:\n type: last\n type: query\n datasource:\n type: __expr__\n uid: __expr__\n expression: 1==0\n intervalMs: 1000\n maxDataPoints: 43200\n refId: A\n type: math\n noDataState: OK\n execErrState: OK\n for: 0s\n isPaused: false\n notification_settings:\n receiver: Test-Receiver\n group_by:\n - alertname\n - grafana_folder\n - test\n group_wait: 1s\n group_interval: 5s\n repeat_interval: 5m\n mute_time_intervals:\n - test-mute\n" response := sut.RouteGetAlertRuleGroupExport(&rc, "folder-uid", "my-cool-group") @@ -585,6 +583,7 @@ func TestProvisioningApi(t *testing.T) { } rule1.NoDataState = definitions.Alerting rule1.ExecErrState = definitions.ErrorErrState + rule1.NotificationSettings = nil insertRule(t, sut, rule1) insertRule(t, sut, createTestAlertRule("rule2", 1)) @@ -641,6 +640,15 @@ func TestProvisioningApi(t *testing.T) { exec_err_state = "OK" for = "0s" is_paused = false + + notification_settings { + receiver = "Test-Receiver" + group_by = ["alertname", "grafana_folder", "test"] + group_wait = "1s" + group_interval = "5s" + repeat_interval = "5m" + mute_time_intervals = ["test-mute"] + } } } ` @@ -774,7 +782,7 @@ func TestProvisioningApi(t *testing.T) { rc := createTestRequestCtx() insertRule(t, sut, createTestAlertRule("rule1", 1)) - expectedResponse := `{"apiVersion":1,"groups":[{"orgId":1,"name":"my-cool-group","folder":"Folder Title","interval":"1m","rules":[{"uid":"rule1","title":"rule1","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false}]}]}` + expectedResponse := `{"apiVersion":1,"groups":[{"orgId":1,"name":"my-cool-group","folder":"Folder Title","interval":"1m","rules":[{"uid":"rule1","title":"rule1","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false,"notification_settings":{"receiver":"Test-Receiver","group_by":["alertname","grafana_folder","test"],"group_wait":"1s","group_interval":"5s","repeat_interval":"5m","mute_time_intervals":["test-mute"]}}]}]}` rc.Context.Req.Header.Add("Accept", "application/json") response := sut.RouteGetAlertRuleExport(&rc, "rule1") @@ -789,7 +797,7 @@ func TestProvisioningApi(t *testing.T) { insertRule(t, sut, createTestAlertRule("rule1", 1)) rc.Context.Req.Header.Add("Accept", "application/yaml") - expectedResponse := "apiVersion: 1\ngroups:\n - orgId: 1\n name: my-cool-group\n folder: Folder Title\n interval: 1m\n rules:\n - uid: rule1\n title: rule1\n condition: A\n data:\n - refId: A\n datasourceUid: \"\"\n model:\n conditions:\n - evaluator:\n params:\n - 3\n type: gt\n operator:\n type: and\n query:\n params:\n - A\n reducer:\n type: last\n type: query\n datasource:\n type: __expr__\n uid: __expr__\n expression: 1==0\n intervalMs: 1000\n maxDataPoints: 43200\n refId: A\n type: math\n noDataState: OK\n execErrState: OK\n for: 0s\n isPaused: false\n" + expectedResponse := "apiVersion: 1\ngroups:\n - orgId: 1\n name: my-cool-group\n folder: Folder Title\n interval: 1m\n rules:\n - uid: rule1\n title: rule1\n condition: A\n data:\n - refId: A\n datasourceUid: \"\"\n model:\n conditions:\n - evaluator:\n params:\n - 3\n type: gt\n operator:\n type: and\n query:\n params:\n - A\n reducer:\n type: last\n type: query\n datasource:\n type: __expr__\n uid: __expr__\n expression: 1==0\n intervalMs: 1000\n maxDataPoints: 43200\n refId: A\n type: math\n noDataState: OK\n execErrState: OK\n for: 0s\n isPaused: false\n notification_settings:\n receiver: Test-Receiver\n group_by:\n - alertname\n - grafana_folder\n - test\n group_wait: 1s\n group_interval: 5s\n repeat_interval: 5m\n mute_time_intervals:\n - test-mute\n" response := sut.RouteGetAlertRuleExport(&rc, "rule1") @@ -889,15 +897,19 @@ func TestProvisioningApi(t *testing.T) { t.Run("json body content is as expected", func(t *testing.T) { sut := createProvisioningSrvSut(t) rc := createTestRequestCtx() - insertRule(t, sut, createTestAlertRuleWithFolderAndGroup("rule1", 1, "folder-uid", "groupa")) - insertRule(t, sut, createTestAlertRuleWithFolderAndGroup("rule2", 1, "folder-uid", "groupb")) - insertRule(t, sut, createTestAlertRuleWithFolderAndGroup("rule3", 1, "folder-uid2", "groupb")) + rule1 := createTestAlertRuleWithFolderAndGroup("rule1", 1, "folder-uid", "groupa") + rule1.NotificationSettings = nil + rule2 := createTestAlertRuleWithFolderAndGroup("rule2", 1, "folder-uid", "groupb") + rule1.NotificationSettings = &definitions.AlertRuleNotificationSettings{Receiver: "Email"} + rule3 := createTestAlertRuleWithFolderAndGroup("rule3", 1, "folder-uid2", "groupb") + insertRule(t, sut, rule1) + insertRule(t, sut, rule2) + insertRule(t, sut, rule3) rc.Context.Req.Header.Add("Accept", "application/json") - expectedResponse := `{"apiVersion":1,"groups":[{"orgId":1,"name":"groupa","folder":"Folder Title","interval":"1m","rules":[{"uid":"rule1","title":"rule1","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false}]},{"orgId":1,"name":"groupb","folder":"Folder Title","interval":"1m","rules":[{"uid":"rule2","title":"rule2","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false}]},{"orgId":1,"name":"groupb","folder":"Folder Title2","interval":"1m","rules":[{"uid":"rule3","title":"rule3","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false}]}]}` + expectedResponse := `{"apiVersion":1,"groups":[{"orgId":1,"name":"groupa","folder":"Folder Title","interval":"1m","rules":[{"uid":"rule1","title":"rule1","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false,"notification_settings":{"receiver":"Email"}}]},{"orgId":1,"name":"groupb","folder":"Folder Title","interval":"1m","rules":[{"uid":"rule2","title":"rule2","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false,"notification_settings":{"receiver":"Test-Receiver","group_by":["alertname","grafana_folder","test"],"group_wait":"1s","group_interval":"5s","repeat_interval":"5m","mute_time_intervals":["test-mute"]}}]},{"orgId":1,"name":"groupb","folder":"Folder Title2","interval":"1m","rules":[{"uid":"rule3","title":"rule3","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false,"notification_settings":{"receiver":"Test-Receiver","group_by":["alertname","grafana_folder","test"],"group_wait":"1s","group_interval":"5s","repeat_interval":"5m","mute_time_intervals":["test-mute"]}}]}]}` response := sut.RouteGetAlertRulesExport(&rc) - require.Equal(t, 200, response.Status()) require.Equal(t, expectedResponse, string(response.Body())) }) @@ -905,15 +917,19 @@ func TestProvisioningApi(t *testing.T) { t.Run("yaml body content is as expected", func(t *testing.T) { sut := createProvisioningSrvSut(t) rc := createTestRequestCtx() - insertRule(t, sut, createTestAlertRuleWithFolderAndGroup("rule1", 1, "folder-uid", "groupa")) - insertRule(t, sut, createTestAlertRuleWithFolderAndGroup("rule2", 1, "folder-uid", "groupb")) - insertRule(t, sut, createTestAlertRuleWithFolderAndGroup("rule3", 1, "folder-uid2", "groupb")) + rule1 := createTestAlertRuleWithFolderAndGroup("rule1", 1, "folder-uid", "groupa") + rule1.NotificationSettings = nil + rule2 := createTestAlertRuleWithFolderAndGroup("rule2", 1, "folder-uid", "groupb") + rule1.NotificationSettings = &definitions.AlertRuleNotificationSettings{Receiver: "Email"} + rule3 := createTestAlertRuleWithFolderAndGroup("rule3", 1, "folder-uid2", "groupb") + insertRule(t, sut, rule1) + insertRule(t, sut, rule2) + insertRule(t, sut, rule3) rc.Context.Req.Header.Add("Accept", "application/yaml") - expectedResponse := "apiVersion: 1\ngroups:\n - orgId: 1\n name: groupa\n folder: Folder Title\n interval: 1m\n rules:\n - uid: rule1\n title: rule1\n condition: A\n data:\n - refId: A\n datasourceUid: \"\"\n model:\n conditions:\n - evaluator:\n params:\n - 3\n type: gt\n operator:\n type: and\n query:\n params:\n - A\n reducer:\n type: last\n type: query\n datasource:\n type: __expr__\n uid: __expr__\n expression: 1==0\n intervalMs: 1000\n maxDataPoints: 43200\n refId: A\n type: math\n noDataState: OK\n execErrState: OK\n for: 0s\n isPaused: false\n - orgId: 1\n name: groupb\n folder: Folder Title\n interval: 1m\n rules:\n - uid: rule2\n title: rule2\n condition: A\n data:\n - refId: A\n datasourceUid: \"\"\n model:\n conditions:\n - evaluator:\n params:\n - 3\n type: gt\n operator:\n type: and\n query:\n params:\n - A\n reducer:\n type: last\n type: query\n datasource:\n type: __expr__\n uid: __expr__\n expression: 1==0\n intervalMs: 1000\n maxDataPoints: 43200\n refId: A\n type: math\n noDataState: OK\n execErrState: OK\n for: 0s\n isPaused: false\n - orgId: 1\n name: groupb\n folder: Folder Title2\n interval: 1m\n rules:\n - uid: rule3\n title: rule3\n condition: A\n data:\n - refId: A\n datasourceUid: \"\"\n model:\n conditions:\n - evaluator:\n params:\n - 3\n type: gt\n operator:\n type: and\n query:\n params:\n - A\n reducer:\n type: last\n type: query\n datasource:\n type: __expr__\n uid: __expr__\n expression: 1==0\n intervalMs: 1000\n maxDataPoints: 43200\n refId: A\n type: math\n noDataState: OK\n execErrState: OK\n for: 0s\n isPaused: false\n" + expectedResponse := "apiVersion: 1\ngroups:\n - orgId: 1\n name: groupa\n folder: Folder Title\n interval: 1m\n rules:\n - uid: rule1\n title: rule1\n condition: A\n data:\n - refId: A\n datasourceUid: \"\"\n model:\n conditions:\n - evaluator:\n params:\n - 3\n type: gt\n operator:\n type: and\n query:\n params:\n - A\n reducer:\n type: last\n type: query\n datasource:\n type: __expr__\n uid: __expr__\n expression: 1==0\n intervalMs: 1000\n maxDataPoints: 43200\n refId: A\n type: math\n noDataState: OK\n execErrState: OK\n for: 0s\n isPaused: false\n notification_settings:\n receiver: Email\n - orgId: 1\n name: groupb\n folder: Folder Title\n interval: 1m\n rules:\n - uid: rule2\n title: rule2\n condition: A\n data:\n - refId: A\n datasourceUid: \"\"\n model:\n conditions:\n - evaluator:\n params:\n - 3\n type: gt\n operator:\n type: and\n query:\n params:\n - A\n reducer:\n type: last\n type: query\n datasource:\n type: __expr__\n uid: __expr__\n expression: 1==0\n intervalMs: 1000\n maxDataPoints: 43200\n refId: A\n type: math\n noDataState: OK\n execErrState: OK\n for: 0s\n isPaused: false\n notification_settings:\n receiver: Test-Receiver\n group_by:\n - alertname\n - grafana_folder\n - test\n group_wait: 1s\n group_interval: 5s\n repeat_interval: 5m\n mute_time_intervals:\n - test-mute\n - orgId: 1\n name: groupb\n folder: Folder Title2\n interval: 1m\n rules:\n - uid: rule3\n title: rule3\n condition: A\n data:\n - refId: A\n datasourceUid: \"\"\n model:\n conditions:\n - evaluator:\n params:\n - 3\n type: gt\n operator:\n type: and\n query:\n params:\n - A\n reducer:\n type: last\n type: query\n datasource:\n type: __expr__\n uid: __expr__\n expression: 1==0\n intervalMs: 1000\n maxDataPoints: 43200\n refId: A\n type: math\n noDataState: OK\n execErrState: OK\n for: 0s\n isPaused: false\n notification_settings:\n receiver: Test-Receiver\n group_by:\n - alertname\n - grafana_folder\n - test\n group_wait: 1s\n group_interval: 5s\n repeat_interval: 5m\n mute_time_intervals:\n - test-mute\n" response := sut.RouteGetAlertRulesExport(&rc) - require.Equal(t, 200, response.Status()) require.Equal(t, expectedResponse, string(response.Body())) }) @@ -927,10 +943,9 @@ func TestProvisioningApi(t *testing.T) { rc.Context.Req.Header.Add("Accept", "application/json") rc.Context.Req.Form.Set("folderUid", "folder-uid") - expectedResponse := `{"apiVersion":1,"groups":[{"orgId":1,"name":"groupa","folder":"Folder Title","interval":"1m","rules":[{"uid":"rule1","title":"rule1","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false}]},{"orgId":1,"name":"groupb","folder":"Folder Title","interval":"1m","rules":[{"uid":"rule2","title":"rule2","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false}]}]}` + expectedResponse := `{"apiVersion":1,"groups":[{"orgId":1,"name":"groupa","folder":"Folder Title","interval":"1m","rules":[{"uid":"rule1","title":"rule1","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false,"notification_settings":{"receiver":"Test-Receiver","group_by":["alertname","grafana_folder","test"],"group_wait":"1s","group_interval":"5s","repeat_interval":"5m","mute_time_intervals":["test-mute"]}}]},{"orgId":1,"name":"groupb","folder":"Folder Title","interval":"1m","rules":[{"uid":"rule2","title":"rule2","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false,"notification_settings":{"receiver":"Test-Receiver","group_by":["alertname","grafana_folder","test"],"group_wait":"1s","group_interval":"5s","repeat_interval":"5m","mute_time_intervals":["test-mute"]}}]}]}` response := sut.RouteGetAlertRulesExport(&rc) - require.Equal(t, 200, response.Status()) require.Equal(t, expectedResponse, string(response.Body())) }) @@ -945,10 +960,9 @@ func TestProvisioningApi(t *testing.T) { rc.Context.Req.Header.Add("Accept", "application/json") rc.Context.Req.Form.Set("folder_uid", "folder-uid") rc.Context.Req.Form.Add("folder_uid", "folder-uid2") - expectedResponse := `{"apiVersion":1,"groups":[{"orgId":1,"name":"groupa","folder":"Folder Title","interval":"1m","rules":[{"uid":"rule1","title":"rule1","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false}]},{"orgId":1,"name":"groupb","folder":"Folder Title","interval":"1m","rules":[{"uid":"rule2","title":"rule2","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false}]},{"orgId":1,"name":"groupb","folder":"Folder Title2","interval":"1m","rules":[{"uid":"rule3","title":"rule3","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false}]}]}` + expectedResponse := `{"apiVersion":1,"groups":[{"orgId":1,"name":"groupa","folder":"Folder Title","interval":"1m","rules":[{"uid":"rule1","title":"rule1","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false,"notification_settings":{"receiver":"Test-Receiver","group_by":["alertname","grafana_folder","test"],"group_wait":"1s","group_interval":"5s","repeat_interval":"5m","mute_time_intervals":["test-mute"]}}]},{"orgId":1,"name":"groupb","folder":"Folder Title","interval":"1m","rules":[{"uid":"rule2","title":"rule2","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false,"notification_settings":{"receiver":"Test-Receiver","group_by":["alertname","grafana_folder","test"],"group_wait":"1s","group_interval":"5s","repeat_interval":"5m","mute_time_intervals":["test-mute"]}}]},{"orgId":1,"name":"groupb","folder":"Folder Title2","interval":"1m","rules":[{"uid":"rule3","title":"rule3","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false,"notification_settings":{"receiver":"Test-Receiver","group_by":["alertname","grafana_folder","test"],"group_wait":"1s","group_interval":"5s","repeat_interval":"5m","mute_time_intervals":["test-mute"]}}]}]}` response := sut.RouteGetAlertRulesExport(&rc) - require.Equal(t, 200, response.Status()) require.Equal(t, expectedResponse, string(response.Body())) }) @@ -964,10 +978,9 @@ func TestProvisioningApi(t *testing.T) { rc.Context.Req.Form.Set("folderUid", "folder-uid") rc.Context.Req.Form.Set("group", "groupa") - expectedResponse := `{"apiVersion":1,"groups":[{"orgId":1,"name":"groupa","folder":"Folder Title","interval":"1m","rules":[{"uid":"rule1","title":"rule1","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false}]}]}` + expectedResponse := `{"apiVersion":1,"groups":[{"orgId":1,"name":"groupa","folder":"Folder Title","interval":"1m","rules":[{"uid":"rule1","title":"rule1","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false,"notification_settings":{"receiver":"Test-Receiver","group_by":["alertname","grafana_folder","test"],"group_wait":"1s","group_interval":"5s","repeat_interval":"5m","mute_time_intervals":["test-mute"]}}]}]}` response := sut.RouteGetAlertRulesExport(&rc) - require.Equal(t, 200, response.Status()) require.Equal(t, expectedResponse, string(response.Body())) @@ -1003,7 +1016,7 @@ func TestProvisioningApi(t *testing.T) { rc.Context.Req.Header.Add("Accept", "application/json") rc.Context.Req.Form.Set("ruleUid", "rule1") - expectedResponse := `{"apiVersion":1,"groups":[{"orgId":1,"name":"groupa","folder":"Folder Title","interval":"1m","rules":[{"uid":"rule1","title":"rule1","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false}]}]}` + expectedResponse := `{"apiVersion":1,"groups":[{"orgId":1,"name":"groupa","folder":"Folder Title","interval":"1m","rules":[{"uid":"rule1","title":"rule1","condition":"A","data":[{"refId":"A","relativeTimeRange":{"from":0,"to":0},"datasourceUid":"","model":{"conditions":[{"evaluator":{"params":[3],"type":"gt"},"operator":{"type":"and"},"query":{"params":["A"]},"reducer":{"type":"last"},"type":"query"}],"datasource":{"type":"__expr__","uid":"__expr__"},"expression":"1==0","intervalMs":1000,"maxDataPoints":43200,"refId":"A","type":"math"}}],"noDataState":"OK","execErrState":"OK","for":"0s","isPaused":false,"notification_settings":{"receiver":"Test-Receiver","group_by":["alertname","grafana_folder","test"],"group_wait":"1s","group_interval":"5s","repeat_interval":"5m","mute_time_intervals":["test-mute"]}}]}]}` response := sut.RouteGetAlertRulesExport(&rc) @@ -1629,10 +1642,10 @@ func createProvisioningSrvSutFromEnv(t *testing.T, env *testEnvironment) Provisi return ProvisioningSrv{ log: env.log, policies: newFakeNotificationPolicyService(), - contactPointService: provisioning.NewContactPointService(env.configs, env.secrets, env.prov, env.xact, receiverSvc, env.log), + contactPointService: provisioning.NewContactPointService(env.configs, env.secrets, env.prov, env.xact, receiverSvc, env.log, env.store), templates: provisioning.NewTemplateService(env.configs, env.prov, env.xact, env.log), muteTimings: provisioning.NewMuteTimingService(env.configs, env.prov, env.xact, env.log), - alertRules: provisioning.NewAlertRuleService(env.store, env.prov, env.dashboardService, env.quotas, env.xact, 60, 10, 100, env.log), + alertRules: provisioning.NewAlertRuleService(env.store, env.prov, env.dashboardService, env.quotas, env.xact, 60, 10, 100, env.log, &provisioning.NotificationSettingsValidatorProviderFake{}), } } @@ -1819,6 +1832,14 @@ func createTestAlertRule(title string, orgID int64) definitions.ProvisionedAlert For: model.Duration(60), NoDataState: definitions.OK, ExecErrState: definitions.OkErrState, + NotificationSettings: &definitions.AlertRuleNotificationSettings{ + Receiver: "Test-Receiver", + GroupBy: []string{"alertname", "grafana_folder", "test"}, + GroupWait: util.Pointer(model.Duration(1 * time.Second)), + GroupInterval: util.Pointer(model.Duration(5 * time.Second)), + RepeatInterval: util.Pointer(model.Duration(5 * time.Minute)), + MuteTimeIntervals: []string{"test-mute"}, + }, } } diff --git a/pkg/services/ngalert/api/api_ruler.go b/pkg/services/ngalert/api/api_ruler.go index f9d7a49ac16..c1688241153 100644 --- a/pkg/services/ngalert/api/api_ruler.go +++ b/pkg/services/ngalert/api/api_ruler.go @@ -16,10 +16,12 @@ import ( "github.com/grafana/grafana/pkg/services/auth/identity" contextmodel "github.com/grafana/grafana/pkg/services/contexthandler/model" "github.com/grafana/grafana/pkg/services/dashboards" + "github.com/grafana/grafana/pkg/services/featuremgmt" "github.com/grafana/grafana/pkg/services/ngalert/accesscontrol" apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" "github.com/grafana/grafana/pkg/services/ngalert/eval" ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models" + "github.com/grafana/grafana/pkg/services/ngalert/notifier" "github.com/grafana/grafana/pkg/services/ngalert/provisioning" "github.com/grafana/grafana/pkg/services/ngalert/store" "github.com/grafana/grafana/pkg/services/quota" @@ -33,6 +35,14 @@ type ConditionValidator interface { Validate(ctx eval.EvaluationContext, condition ngmodels.Condition) error } +type AMConfigStore interface { + GetLatestAlertmanagerConfiguration(ctx context.Context, orgID int64) (*ngmodels.AlertConfiguration, error) +} + +type AMRefresher interface { + ApplyConfig(ctx context.Context, orgId int64, dbConfig *ngmodels.AlertConfiguration) error +} + type RulerSrv struct { xactManager provisioning.TransactionManager provenanceStore provisioning.ProvisioningStore @@ -42,6 +52,10 @@ type RulerSrv struct { cfg *setting.UnifiedAlertingSettings conditionValidator ConditionValidator authz RuleAccessControlService + + amConfigStore AMConfigStore + amRefresher AMRefresher + featureManager featuremgmt.FeatureToggles } var ( @@ -283,8 +297,11 @@ func (srv RulerSrv) checkGroupLimits(group apimodels.PostableRuleGroupConfig) er // updateAlertRulesInGroup calculates changes (rules to add,update,delete), verifies that the user is authorized to do the calculated changes and updates database. // All operations are performed in a single transaction +// +//nolint:gocyclo func (srv RulerSrv) updateAlertRulesInGroup(c *contextmodel.ReqContext, groupKey ngmodels.AlertRuleGroupKey, rules []*ngmodels.AlertRuleWithOptionals) response.Response { var finalChanges *store.GroupDelta + var dbConfig *ngmodels.AlertConfiguration err := srv.xactManager.InTransaction(c.Req.Context(), func(tranCtx context.Context) error { userNamespace, id := c.SignedInUser.GetNamespacedID() logger := srv.log.New("namespace_uid", groupKey.NamespaceUID, "group", @@ -309,6 +326,24 @@ func (srv RulerSrv) updateAlertRulesInGroup(c *contextmodel.ReqContext, groupKey return err } + newOrUpdatedNotificationSettings := groupChanges.NewOrUpdatedNotificationSettings() + if len(newOrUpdatedNotificationSettings) > 0 { + dbConfig, err = srv.amConfigStore.GetLatestAlertmanagerConfiguration(c.Req.Context(), groupChanges.GroupKey.OrgID) + if err != nil { + return fmt.Errorf("failed to get latest configuration: %w", err) + } + cfg, err := notifier.Load([]byte(dbConfig.AlertmanagerConfiguration)) + if err != nil { + return fmt.Errorf("failed to parse configuration: %w", err) + } + validator := notifier.NewNotificationSettingsValidator(&cfg.AlertmanagerConfig) + for _, s := range newOrUpdatedNotificationSettings { + if err := validator.Validate(s); err != nil { + return errors.Join(ngmodels.ErrAlertRuleFailedValidation, err) + } + } + } + if err := verifyProvisionedRulesNotAffected(c.Req.Context(), srv.provenanceStore, c.SignedInUser.GetOrgID(), groupChanges); err != nil { return err } @@ -392,6 +427,15 @@ func (srv RulerSrv) updateAlertRulesInGroup(c *contextmodel.ReqContext, groupKey } return ErrResp(http.StatusInternalServerError, err, "failed to update rule group") } + + if srv.featureManager.IsEnabled(c.Req.Context(), featuremgmt.FlagAlertingSimplifiedRouting) && dbConfig != nil { + // This isn't strictly necessary since the alertmanager config is periodically synced. + err := srv.amRefresher.ApplyConfig(c.Req.Context(), groupKey.OrgID, dbConfig) + if err != nil { + srv.log.Warn("Failed to refresh Alertmanager config for org after change in notification settings", "org", c.SignedInUser.GetOrgID(), "error", err) + } + } + return changesToResponse(finalChanges) } @@ -440,23 +484,25 @@ func toGettableExtendedRuleNode(r ngmodels.AlertRule, provenanceRecords map[stri if prov, exists := provenanceRecords[r.ResourceID()]; exists { provenance = prov } + gettableExtendedRuleNode := apimodels.GettableExtendedRuleNode{ GrafanaManagedAlert: &apimodels.GettableGrafanaRule{ - ID: r.ID, - OrgID: r.OrgID, - Title: r.Title, - Condition: r.Condition, - Data: ApiAlertQueriesFromAlertQueries(r.Data), - Updated: r.Updated, - IntervalSeconds: r.IntervalSeconds, - Version: r.Version, - UID: r.UID, - NamespaceUID: r.NamespaceUID, - RuleGroup: r.RuleGroup, - NoDataState: apimodels.NoDataState(r.NoDataState), - ExecErrState: apimodels.ExecutionErrorState(r.ExecErrState), - Provenance: apimodels.Provenance(provenance), - IsPaused: r.IsPaused, + ID: r.ID, + OrgID: r.OrgID, + Title: r.Title, + Condition: r.Condition, + Data: ApiAlertQueriesFromAlertQueries(r.Data), + Updated: r.Updated, + IntervalSeconds: r.IntervalSeconds, + Version: r.Version, + UID: r.UID, + NamespaceUID: r.NamespaceUID, + RuleGroup: r.RuleGroup, + NoDataState: apimodels.NoDataState(r.NoDataState), + ExecErrState: apimodels.ExecutionErrorState(r.ExecErrState), + Provenance: apimodels.Provenance(provenance), + IsPaused: r.IsPaused, + NotificationSettings: AlertRuleNotificationSettingsFromNotificationSettings(r.NotificationSettings), }, } forDuration := model.Duration(r.For) diff --git a/pkg/services/ngalert/api/api_ruler_test.go b/pkg/services/ngalert/api/api_ruler_test.go index 322ea7b2e9b..326fa6102b3 100644 --- a/pkg/services/ngalert/api/api_ruler_test.go +++ b/pkg/services/ngalert/api/api_ruler_test.go @@ -20,6 +20,7 @@ import ( "github.com/grafana/grafana/pkg/services/accesscontrol/acimpl" contextmodel "github.com/grafana/grafana/pkg/services/contexthandler/model" "github.com/grafana/grafana/pkg/services/datasources" + "github.com/grafana/grafana/pkg/services/featuremgmt" "github.com/grafana/grafana/pkg/services/folder" "github.com/grafana/grafana/pkg/services/ngalert/accesscontrol" apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" @@ -603,10 +604,24 @@ func createService(store *fakes.RuleStore) *RulerSrv { cfg: &setting.UnifiedAlertingSettings{ BaseInterval: 10 * time.Second, }, - authz: accesscontrol.NewRuleService(acimpl.ProvideAccessControl(setting.NewCfg())), + authz: accesscontrol.NewRuleService(acimpl.ProvideAccessControl(setting.NewCfg())), + amConfigStore: &fakeAMRefresher{}, + amRefresher: &fakeAMRefresher{}, + featureManager: &featuremgmt.FeatureManager{}, } } +type fakeAMRefresher struct { +} + +func (f *fakeAMRefresher) ApplyConfig(ctx context.Context, orgId int64, dbConfig *models.AlertConfiguration) error { + return nil +} + +func (f *fakeAMRefresher) GetLatestAlertmanagerConfiguration(ctx context.Context, orgID int64) (*models.AlertConfiguration, error) { + return nil, nil +} + func createRequestContext(orgID int64, params map[string]string) *contextmodel.ReqContext { defaultPerms := map[int64]map[string][]string{orgID: {datasources.ActionQuery: []string{datasources.ScopeAll}}} return createRequestContextWithPerms(orgID, defaultPerms, params) diff --git a/pkg/services/ngalert/api/api_ruler_validation.go b/pkg/services/ngalert/api/api_ruler_validation.go index 583a39718bc..0d13fd0c2c2 100644 --- a/pkg/services/ngalert/api/api_ruler_validation.go +++ b/pkg/services/ngalert/api/api_ruler_validation.go @@ -97,6 +97,13 @@ func validateRuleNode( ExecErrState: errorState, } + if ruleNode.GrafanaManagedAlert.NotificationSettings != nil { + newAlertRule.NotificationSettings, err = validateNotificationSettings(ruleNode.GrafanaManagedAlert.NotificationSettings) + if err != nil { + return nil, err + } + } + newAlertRule.For, err = validateForInterval(ruleNode) if err != nil { return nil, err @@ -104,6 +111,10 @@ func validateRuleNode( if ruleNode.ApiRuleNode != nil { newAlertRule.Annotations = ruleNode.ApiRuleNode.Annotations + err = validateLabels(ruleNode.Labels) + if err != nil { + return nil, err + } newAlertRule.Labels = ruleNode.ApiRuleNode.Labels err = newAlertRule.SetDashboardAndPanelFromAnnotations() @@ -114,6 +125,15 @@ func validateRuleNode( return &newAlertRule, nil } +func validateLabels(l map[string]string) error { + for key := range l { + if _, ok := ngmodels.LabelsUserCannotSpecify[key]; ok { + return fmt.Errorf("system reserved labels cannot be defined in the rule. Label %s is the reserved", key) + } + } + return nil +} + func validateCondition(condition string, queries []apimodels.AlertQuery) error { if condition == "" { return errors.New("condition cannot be empty") @@ -235,3 +255,21 @@ func validateRuleGroup( } return result, nil } + +func validateNotificationSettings(n *apimodels.AlertRuleNotificationSettings) ([]ngmodels.NotificationSettings, error) { + s := ngmodels.NotificationSettings{ + Receiver: n.Receiver, + GroupBy: n.GroupBy, + GroupWait: n.GroupWait, + GroupInterval: n.GroupInterval, + RepeatInterval: n.RepeatInterval, + MuteTimeIntervals: n.MuteTimeIntervals, + } + + if err := s.Validate(); err != nil { + return nil, fmt.Errorf("invalid notification settings: %w", err) + } + return []ngmodels.NotificationSettings{ + s, + }, nil +} diff --git a/pkg/services/ngalert/api/api_ruler_validation_test.go b/pkg/services/ngalert/api/api_ruler_validation_test.go index bddcec5e3fb..d763f9e2c85 100644 --- a/pkg/services/ngalert/api/api_ruler_validation_test.go +++ b/pkg/services/ngalert/api/api_ruler_validation_test.go @@ -793,3 +793,117 @@ func TestValidateRuleNodeIntervalFailures(t *testing.T) { }) } } + +func TestValidateRuleNodeNotificationSettings(t *testing.T) { + cfg := config(t) + + validNotificationSettings := models.NotificationSettingsGen(models.NSMuts.WithGroupBy(model.AlertNameLabel, models.FolderTitleLabel)) + + testCases := []struct { + name string + notificationSettings models.NotificationSettings + expErrorContains string + }{ + { + name: "valid notification settings", + notificationSettings: validNotificationSettings(), + }, + { + name: "missing receiver is invalid", + notificationSettings: models.CopyNotificationSettings(validNotificationSettings(), models.NSMuts.WithReceiver("")), + expErrorContains: "receiver", + }, + { + name: "group by empty is valid", + notificationSettings: models.CopyNotificationSettings(validNotificationSettings(), models.NSMuts.WithGroupBy()), + }, + { + name: "group by ... is valid", + notificationSettings: models.CopyNotificationSettings(validNotificationSettings(), models.NSMuts.WithGroupBy("...")), + }, + { + name: "group by with alert name and folder name labels is valid", + notificationSettings: models.CopyNotificationSettings(validNotificationSettings(), models.NSMuts.WithGroupBy(model.AlertNameLabel, models.FolderTitleLabel)), + }, + { + name: "group by missing alert name label is invalid", + notificationSettings: models.CopyNotificationSettings(validNotificationSettings(), models.NSMuts.WithGroupBy(models.FolderTitleLabel)), + expErrorContains: model.AlertNameLabel, + }, + { + name: "group by missing folder name label is invalid", + notificationSettings: models.CopyNotificationSettings(validNotificationSettings(), models.NSMuts.WithGroupBy(model.AlertNameLabel)), + expErrorContains: models.FolderTitleLabel, + }, + { + name: "group wait empty is valid", + notificationSettings: models.CopyNotificationSettings(validNotificationSettings(), models.NSMuts.WithGroupWait(nil)), + }, + { + name: "group wait positive is valid", + notificationSettings: models.CopyNotificationSettings(validNotificationSettings(), models.NSMuts.WithGroupWait(util.Pointer(1*time.Second))), + }, + { + name: "group wait negative is invalid", + notificationSettings: models.CopyNotificationSettings(validNotificationSettings(), models.NSMuts.WithGroupWait(util.Pointer(-1*time.Second))), + expErrorContains: "group wait", + }, + { + name: "group interval empty is valid", + notificationSettings: models.CopyNotificationSettings(validNotificationSettings(), models.NSMuts.WithGroupInterval(nil)), + }, + { + name: "group interval positive is valid", + notificationSettings: models.CopyNotificationSettings(validNotificationSettings(), models.NSMuts.WithGroupInterval(util.Pointer(1*time.Second))), + }, + { + name: "group interval negative is invalid", + notificationSettings: models.CopyNotificationSettings(validNotificationSettings(), models.NSMuts.WithGroupInterval(util.Pointer(-1*time.Second))), + expErrorContains: "group interval", + }, + { + name: "repeat interval empty is valid", + notificationSettings: models.CopyNotificationSettings(validNotificationSettings(), models.NSMuts.WithRepeatInterval(nil)), + }, + { + name: "repeat interval positive is valid", + notificationSettings: models.CopyNotificationSettings(validNotificationSettings(), models.NSMuts.WithRepeatInterval(util.Pointer(1*time.Second))), + }, + { + name: "repeat interval negative is invalid", + notificationSettings: models.CopyNotificationSettings(validNotificationSettings(), models.NSMuts.WithRepeatInterval(util.Pointer(-1*time.Second))), + expErrorContains: "repeat interval", + }, + } + + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + r := validRule() + r.GrafanaManagedAlert.NotificationSettings = AlertRuleNotificationSettingsFromNotificationSettings([]models.NotificationSettings{tt.notificationSettings}) + _, err := validateRuleNode(&r, util.GenerateShortUID(), cfg.BaseInterval*time.Duration(rand.Int63n(10)+1), rand.Int63(), randFolder(), cfg) + + if tt.expErrorContains != "" { + require.Error(t, err) + require.ErrorContains(t, err, tt.expErrorContains) + } else { + require.NoError(t, err) + } + }) + } +} + +func TestValidateRuleNodeReservedLabels(t *testing.T) { + cfg := config(t) + + for label := range models.LabelsUserCannotSpecify { + t.Run(label, func(t *testing.T) { + r := validRule() + r.ApiRuleNode.Labels = map[string]string{ + label: "true", + } + _, err := validateRuleNode(&r, util.GenerateShortUID(), cfg.BaseInterval*time.Duration(rand.Int63n(10)+1), rand.Int63(), randFolder(), cfg) + require.Error(t, err) + require.ErrorContains(t, err, label) + }) + } +} diff --git a/pkg/services/ngalert/api/api_testing.go b/pkg/services/ngalert/api/api_testing.go index ae427cbf8f9..5e54dc8869c 100644 --- a/pkg/services/ngalert/api/api_testing.go +++ b/pkg/services/ngalert/api/api_testing.go @@ -111,7 +111,7 @@ func (srv TestingApiSrv) RouteTestGrafanaRuleConfig(c *contextmodel.ReqContext, now, rule, results, - state.GetRuleExtraLabels(rule, folder.Fullpath, includeFolder), + state.GetRuleExtraLabels(log.New("testing"), rule, folder.Fullpath, includeFolder), ) alerts := make([]*amv2.PostableAlert, 0, len(transitions)) diff --git a/pkg/services/ngalert/api/compat.go b/pkg/services/ngalert/api/compat.go index d27980d0968..8f671b11341 100644 --- a/pkg/services/ngalert/api/compat.go +++ b/pkg/services/ngalert/api/compat.go @@ -15,43 +15,45 @@ import ( // AlertRuleFromProvisionedAlertRule converts definitions.ProvisionedAlertRule to models.AlertRule func AlertRuleFromProvisionedAlertRule(a definitions.ProvisionedAlertRule) (models.AlertRule, error) { return models.AlertRule{ - ID: a.ID, - UID: a.UID, - OrgID: a.OrgID, - NamespaceUID: a.FolderUID, - RuleGroup: a.RuleGroup, - Title: a.Title, - Condition: a.Condition, - Data: AlertQueriesFromApiAlertQueries(a.Data), - Updated: a.Updated, - NoDataState: models.NoDataState(a.NoDataState), // TODO there must be a validation - ExecErrState: models.ExecutionErrorState(a.ExecErrState), // TODO there must be a validation - For: time.Duration(a.For), - Annotations: a.Annotations, - Labels: a.Labels, - IsPaused: a.IsPaused, + ID: a.ID, + UID: a.UID, + OrgID: a.OrgID, + NamespaceUID: a.FolderUID, + RuleGroup: a.RuleGroup, + Title: a.Title, + Condition: a.Condition, + Data: AlertQueriesFromApiAlertQueries(a.Data), + Updated: a.Updated, + NoDataState: models.NoDataState(a.NoDataState), // TODO there must be a validation + ExecErrState: models.ExecutionErrorState(a.ExecErrState), // TODO there must be a validation + For: time.Duration(a.For), + Annotations: a.Annotations, + Labels: a.Labels, + IsPaused: a.IsPaused, + NotificationSettings: NotificationSettingsFromAlertRuleNotificationSettings(a.NotificationSettings), }, nil } // ProvisionedAlertRuleFromAlertRule converts models.AlertRule to definitions.ProvisionedAlertRule and sets provided provenance status func ProvisionedAlertRuleFromAlertRule(rule models.AlertRule, provenance models.Provenance) definitions.ProvisionedAlertRule { return definitions.ProvisionedAlertRule{ - ID: rule.ID, - UID: rule.UID, - OrgID: rule.OrgID, - FolderUID: rule.NamespaceUID, - RuleGroup: rule.RuleGroup, - Title: rule.Title, - For: model.Duration(rule.For), - Condition: rule.Condition, - Data: ApiAlertQueriesFromAlertQueries(rule.Data), - Updated: rule.Updated, - NoDataState: definitions.NoDataState(rule.NoDataState), // TODO there may be a validation - ExecErrState: definitions.ExecutionErrorState(rule.ExecErrState), // TODO there may be a validation - Annotations: rule.Annotations, - Labels: rule.Labels, - Provenance: definitions.Provenance(provenance), // TODO validate enum conversion? - IsPaused: rule.IsPaused, + ID: rule.ID, + UID: rule.UID, + OrgID: rule.OrgID, + FolderUID: rule.NamespaceUID, + RuleGroup: rule.RuleGroup, + Title: rule.Title, + For: model.Duration(rule.For), + Condition: rule.Condition, + Data: ApiAlertQueriesFromAlertQueries(rule.Data), + Updated: rule.Updated, + NoDataState: definitions.NoDataState(rule.NoDataState), // TODO there may be a validation + ExecErrState: definitions.ExecutionErrorState(rule.ExecErrState), // TODO there may be a validation + Annotations: rule.Annotations, + Labels: rule.Labels, + Provenance: definitions.Provenance(provenance), // TODO validate enum conversion? + IsPaused: rule.IsPaused, + NotificationSettings: AlertRuleNotificationSettingsFromNotificationSettings(rule.NotificationSettings), } } @@ -175,16 +177,17 @@ func AlertRuleExportFromAlertRule(rule models.AlertRule) (definitions.AlertRuleE } result := definitions.AlertRuleExport{ - UID: rule.UID, - Title: rule.Title, - For: model.Duration(rule.For), - Condition: rule.Condition, - Data: data, - DashboardUID: rule.DashboardUID, - PanelID: rule.PanelID, - NoDataState: definitions.NoDataState(rule.NoDataState), - ExecErrState: definitions.ExecutionErrorState(rule.ExecErrState), - IsPaused: rule.IsPaused, + UID: rule.UID, + Title: rule.Title, + For: model.Duration(rule.For), + Condition: rule.Condition, + Data: data, + DashboardUID: rule.DashboardUID, + PanelID: rule.PanelID, + NoDataState: definitions.NoDataState(rule.NoDataState), + ExecErrState: definitions.ExecutionErrorState(rule.ExecErrState), + IsPaused: rule.IsPaused, + NotificationSettings: AlertRuleNotificationSettingsExportFromNotificationSettings(rule.NotificationSettings), } if rule.For.Seconds() > 0 { result.ForString = util.Pointer(model.Duration(rule.For).String()) @@ -373,3 +376,61 @@ func MuteTimingIntervalToMuteTimeIntervalHclExport(m definitions.MuteTimeInterva err = j.Unmarshal(mdata, &result) return result, err } + +// AlertRuleNotificationSettingsFromNotificationSettings converts []models.NotificationSettings to definitions.AlertRuleNotificationSettings +func AlertRuleNotificationSettingsFromNotificationSettings(ns []models.NotificationSettings) *definitions.AlertRuleNotificationSettings { + if len(ns) == 0 { + return nil + } + m := ns[0] + return &definitions.AlertRuleNotificationSettings{ + Receiver: m.Receiver, + GroupBy: m.GroupBy, + GroupWait: m.GroupWait, + GroupInterval: m.GroupInterval, + RepeatInterval: m.RepeatInterval, + MuteTimeIntervals: m.MuteTimeIntervals, + } +} + +// AlertRuleNotificationSettingsFromNotificationSettings converts []models.NotificationSettings to definitions.AlertRuleNotificationSettingsExport +func AlertRuleNotificationSettingsExportFromNotificationSettings(ns []models.NotificationSettings) *definitions.AlertRuleNotificationSettingsExport { + if len(ns) == 0 { + return nil + } + m := ns[0] + + toStringIfNotNil := func(d *model.Duration) *string { + if d == nil { + return nil + } + s := d.String() + return &s + } + + return &definitions.AlertRuleNotificationSettingsExport{ + Receiver: m.Receiver, + GroupBy: m.GroupBy, + GroupWait: toStringIfNotNil(m.GroupWait), + GroupInterval: toStringIfNotNil(m.GroupInterval), + RepeatInterval: toStringIfNotNil(m.RepeatInterval), + MuteTimeIntervals: m.MuteTimeIntervals, + } +} + +// NotificationSettingsFromAlertRuleNotificationSettings converts definitions.AlertRuleNotificationSettings to []models.NotificationSettings +func NotificationSettingsFromAlertRuleNotificationSettings(ns *definitions.AlertRuleNotificationSettings) []models.NotificationSettings { + if ns == nil { + return nil + } + return []models.NotificationSettings{ + { + Receiver: ns.Receiver, + GroupBy: ns.GroupBy, + GroupWait: ns.GroupWait, + GroupInterval: ns.GroupInterval, + RepeatInterval: ns.RepeatInterval, + MuteTimeIntervals: ns.MuteTimeIntervals, + }, + } +} diff --git a/pkg/services/ngalert/api/test-data/post-rulegroup-101-export.hcl b/pkg/services/ngalert/api/test-data/post-rulegroup-101-export.hcl index 33f84709de6..2cfb1ba5b0e 100644 --- a/pkg/services/ngalert/api/test-data/post-rulegroup-101-export.hcl +++ b/pkg/services/ngalert/api/test-data/post-rulegroup-101-export.hcl @@ -77,5 +77,14 @@ resource "grafana_rule_group" "rule_group_0000" { no_data_state = "NoData" exec_err_state = "Alerting" is_paused = false + + notification_settings { + receiver = "Test-Receiver" + group_by = ["alertname", "grafana_folder", "test"] + group_wait = "1s" + group_interval = "5s" + repeat_interval = "5m" + mute_time_intervals = ["test-mute"] + } } } diff --git a/pkg/services/ngalert/api/test-data/post-rulegroup-101-export.json b/pkg/services/ngalert/api/test-data/post-rulegroup-101-export.json index 2feec5bab39..6af0194cb8a 100644 --- a/pkg/services/ngalert/api/test-data/post-rulegroup-101-export.json +++ b/pkg/services/ngalert/api/test-data/post-rulegroup-101-export.json @@ -109,7 +109,15 @@ "noDataState": "NoData", "execErrState": "Alerting", "for": "0s", - "isPaused": false + "isPaused": false, + "notification_settings":{ + "receiver":"Test-Receiver", + "group_by":["alertname","grafana_folder","test"], + "group_wait":"1s", + "group_interval":"5s", + "repeat_interval":"5m", + "mute_time_intervals":["test-mute"] + } } ] } diff --git a/pkg/services/ngalert/api/test-data/post-rulegroup-101-export.yaml b/pkg/services/ngalert/api/test-data/post-rulegroup-101-export.yaml index 9f87a264fc2..f91d13ec635 100644 --- a/pkg/services/ngalert/api/test-data/post-rulegroup-101-export.yaml +++ b/pkg/services/ngalert/api/test-data/post-rulegroup-101-export.yaml @@ -83,3 +83,14 @@ groups: execErrState: Alerting for: 0s isPaused: false + notification_settings: + receiver: Test-Receiver + group_by: + - alertname + - grafana_folder + - test + group_wait: 1s + group_interval: 5s + repeat_interval: 5m + mute_time_intervals: + - test-mute diff --git a/pkg/services/ngalert/api/test-data/post-rulegroup-101.json b/pkg/services/ngalert/api/test-data/post-rulegroup-101.json index 61f7c165289..2871b1f5787 100644 --- a/pkg/services/ngalert/api/test-data/post-rulegroup-101.json +++ b/pkg/services/ngalert/api/test-data/post-rulegroup-101.json @@ -109,7 +109,15 @@ } ], "no_data_state": "NoData", - "exec_err_state": "Alerting" + "exec_err_state": "Alerting", + "notification_settings":{ + "receiver":"Test-Receiver", + "group_by":["alertname","grafana_folder","test"], + "group_wait":"1s", + "group_interval":"5s", + "repeat_interval":"5m", + "mute_time_intervals":["test-mute"] + } } } ] diff --git a/pkg/services/ngalert/api/tooling/definitions/alertmanager.go b/pkg/services/ngalert/api/tooling/definitions/alertmanager.go index 2013c5daea2..26b540e4017 100644 --- a/pkg/services/ngalert/api/tooling/definitions/alertmanager.go +++ b/pkg/services/ngalert/api/tooling/definitions/alertmanager.go @@ -735,6 +735,18 @@ type GettableApiAlertingConfig struct { Receivers []*GettableApiReceiver `yaml:"receivers,omitempty" json:"receivers,omitempty"` } +func (c *GettableApiAlertingConfig) GetReceivers() []*GettableApiReceiver { + return c.Receivers +} + +func (c *GettableApiAlertingConfig) GetMuteTimeIntervals() []config.MuteTimeInterval { + return c.MuteTimeIntervals +} + +func (c *GettableApiAlertingConfig) GetRoute() *Route { + return c.Route +} + func (c *GettableApiAlertingConfig) UnmarshalJSON(b []byte) error { type plain GettableApiAlertingConfig if err := json.Unmarshal(b, (*plain)(c)); err != nil { @@ -960,6 +972,18 @@ type PostableApiAlertingConfig struct { Receivers []*PostableApiReceiver `yaml:"receivers,omitempty" json:"receivers,omitempty"` } +func (c *PostableApiAlertingConfig) GetReceivers() []*PostableApiReceiver { + return c.Receivers +} + +func (c *PostableApiAlertingConfig) GetMuteTimeIntervals() []config.MuteTimeInterval { + return c.MuteTimeIntervals +} + +func (c *PostableApiAlertingConfig) GetRoute() *Route { + return c.Route +} + func (c *PostableApiAlertingConfig) UnmarshalJSON(b []byte) error { type plain PostableApiAlertingConfig if err := json.Unmarshal(b, (*plain)(c)); err != nil { @@ -1047,6 +1071,12 @@ func AllReceivers(route *config.Route) (res []string) { if route == nil { return res } + // TODO: Consider removing this check when new resource-specific AM APIs are implemented. + // Skip autogenerated routes. This helps cover the case where an admin POSTs the autogenerated route back to us. + // For example, when deleting a contact point that is unused but still referenced in the autogenerated route. + if isAutogeneratedRoot(route) { + return nil + } if route.Receiver != "" { res = append(res, route.Receiver) @@ -1058,6 +1088,14 @@ func AllReceivers(route *config.Route) (res []string) { return res } +// autogeneratedRouteLabel a label name used to distinguish alerts that are supposed to be handled by the autogenerated policy. Only expected value is `true`. +const autogeneratedRouteLabel = "__grafana_autogenerated__" + +// isAutogeneratedRoot returns true if the route is the root of an autogenerated route. +func isAutogeneratedRoot(route *config.Route) bool { + return len(route.Matchers) == 1 && route.Matchers[0].Name == autogeneratedRouteLabel +} + type RawMessage json.RawMessage // This type alias adds YAML marshaling to the json.RawMessage. // MarshalJSON returns m as the JSON encoding of m. @@ -1195,6 +1233,10 @@ func (r *GettableApiReceiver) Type() ReceiverType { return AlertmanagerReceiverType } +func (r *GettableApiReceiver) GetName() string { + return r.Receiver.Name +} + type PostableApiReceiver struct { config.Receiver `yaml:",inline"` PostableGrafanaReceivers `yaml:",inline"` @@ -1263,6 +1305,10 @@ func (r *PostableApiReceiver) Type() ReceiverType { return AlertmanagerReceiverType } +func (r *PostableApiReceiver) GetName() string { + return r.Receiver.Name +} + type GettableGrafanaReceivers struct { GrafanaManagedReceivers []*GettableGrafanaReceiver `yaml:"grafana_managed_receiver_configs,omitempty" json:"grafana_managed_receiver_configs,omitempty"` } diff --git a/pkg/services/ngalert/api/tooling/definitions/alertmanager_test.go b/pkg/services/ngalert/api/tooling/definitions/alertmanager_test.go index 224c484fd40..c549eca7109 100644 --- a/pkg/services/ngalert/api/tooling/definitions/alertmanager_test.go +++ b/pkg/services/ngalert/api/tooling/definitions/alertmanager_test.go @@ -8,6 +8,7 @@ import ( "testing" "github.com/prometheus/alertmanager/config" + "github.com/prometheus/alertmanager/pkg/labels" "github.com/prometheus/common/model" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -340,6 +341,78 @@ func Test_ApiAlertingConfig_Marshaling(t *testing.T) { }, err: true, }, + { + desc: "success undefined am receiver in autogenerated route is ignored", + input: PostableApiAlertingConfig{ + Config: Config{ + Route: &Route{ + Receiver: "am", + Routes: []*Route{ + { + Matchers: config.Matchers{ + { + Name: autogeneratedRouteLabel, + Type: labels.MatchEqual, + Value: "true", + }, + }, + Routes: []*Route{ + { + Receiver: "unmentioned", + }, + }, + }, + }, + }, + }, + Receivers: []*PostableApiReceiver{ + { + Receiver: config.Receiver{ + Name: "am", + EmailConfigs: []*config.EmailConfig{{}}, + }, + }, + }, + }, + err: false, + }, + { + desc: "success undefined graf receiver in autogenerated route is ignored", + input: PostableApiAlertingConfig{ + Config: Config{ + Route: &Route{ + Receiver: "graf", + Routes: []*Route{ + { + Matchers: config.Matchers{ + { + Name: autogeneratedRouteLabel, + Type: labels.MatchEqual, + Value: "true", + }, + }, + Routes: []*Route{ + { + Receiver: "unmentioned", + }, + }, + }, + }, + }, + }, + Receivers: []*PostableApiReceiver{ + { + Receiver: config.Receiver{ + Name: "graf", + }, + PostableGrafanaReceivers: PostableGrafanaReceivers{ + GrafanaManagedReceivers: []*PostableGrafanaReceiver{{}}, + }, + }, + }, + }, + err: false, + }, } { t.Run(tc.desc, func(t *testing.T) { encoded, err := json.Marshal(tc.input) diff --git a/pkg/services/ngalert/api/tooling/definitions/alertmanager_validation.go b/pkg/services/ngalert/api/tooling/definitions/alertmanager_validation.go index 276f30ee2eb..9bec82b6d79 100644 --- a/pkg/services/ngalert/api/tooling/definitions/alertmanager_validation.go +++ b/pkg/services/ngalert/api/tooling/definitions/alertmanager_validation.go @@ -13,12 +13,15 @@ import ( "gopkg.in/yaml.v3" ) +// groupByAll is a special value defined by alertmanager that can be used in a Route's GroupBy field to aggregate by all possible labels. +const groupByAll = "..." + // Validate normalizes a possibly nested Route r, and returns errors if r is invalid. func (r *Route) validateChild() error { r.GroupBy = nil r.GroupByAll = false for _, l := range r.GroupByStr { - if l == "..." { + if l == groupByAll { r.GroupByAll = true } else { r.GroupBy = append(r.GroupBy, model.LabelName(l)) diff --git a/pkg/services/ngalert/api/tooling/definitions/cortex-ruler.go b/pkg/services/ngalert/api/tooling/definitions/cortex-ruler.go index 98e2837921e..21e091d9cdf 100644 --- a/pkg/services/ngalert/api/tooling/definitions/cortex-ruler.go +++ b/pkg/services/ngalert/api/tooling/definitions/cortex-ruler.go @@ -406,34 +406,80 @@ const ( ErrorErrState ExecutionErrorState = "Error" ) +// swagger: model +type AlertRuleNotificationSettings struct { + // Name of the receiver to send notifications to. + // required: true + // example: grafana-default-email + Receiver string `json:"receiver"` + + // Optional settings + + // Override the labels by which incoming alerts are grouped together. For example, multiple alerts coming in for + // cluster=A and alertname=LatencyHigh would be batched into a single group. To aggregate by all possible labels + // use the special value '...' as the sole label name. + // This effectively disables aggregation entirely, passing through all alerts as-is. This is unlikely to be what + // you want, unless you have a very low alert volume or your upstream notification system performs its own grouping. + // Must include 'alertname' and 'grafana_folder' if not using '...'. + // default: ["alertname", "grafana_folder"] + // example: ["alertname", "grafana_folder", "cluster"] + GroupBy []string `json:"group_by,omitempty"` + + // Override how long to initially wait to send a notification for a group of alerts. Allows to wait for an + // inhibiting alert to arrive or collect more initial alerts for the same group. (Usually ~0s to few minutes.) + // example: 30s + GroupWait *model.Duration `json:"group_wait,omitempty"` + + // Override how long to wait before sending a notification about new alerts that are added to a group of alerts for + // which an initial notification has already been sent. (Usually ~5m or more.) + // example: 5m + GroupInterval *model.Duration `json:"group_interval,omitempty"` + + // Override how long to wait before sending a notification again if it has already been sent successfully for an + // alert. (Usually ~3h or more). + // Note that this parameter is implicitly bound by Alertmanager's `--data.retention` configuration flag. + // Notifications will be resent after either repeat_interval or the data retention period have passed, whichever + // occurs first. `repeat_interval` should not be less than `group_interval`. + // example: 4h + RepeatInterval *model.Duration `json:"repeat_interval,omitempty"` + + // Override the times when notifications should be muted. These must match the name of a mute time interval defined + // in the alertmanager configuration mute_time_intervals section. When muted it will not send any notifications, but + // otherwise acts normally. + // example: ["maintenance"] + MuteTimeIntervals []string `json:"mute_time_intervals,omitempty"` +} + // swagger:model type PostableGrafanaRule struct { - Title string `json:"title" yaml:"title"` - Condition string `json:"condition" yaml:"condition"` - Data []AlertQuery `json:"data" yaml:"data"` - UID string `json:"uid" yaml:"uid"` - NoDataState NoDataState `json:"no_data_state" yaml:"no_data_state"` - ExecErrState ExecutionErrorState `json:"exec_err_state" yaml:"exec_err_state"` - IsPaused *bool `json:"is_paused" yaml:"is_paused"` + Title string `json:"title" yaml:"title"` + Condition string `json:"condition" yaml:"condition"` + Data []AlertQuery `json:"data" yaml:"data"` + UID string `json:"uid" yaml:"uid"` + NoDataState NoDataState `json:"no_data_state" yaml:"no_data_state"` + ExecErrState ExecutionErrorState `json:"exec_err_state" yaml:"exec_err_state"` + IsPaused *bool `json:"is_paused" yaml:"is_paused"` + NotificationSettings *AlertRuleNotificationSettings `json:"notification_settings" yaml:"notification_settings"` } // swagger:model type GettableGrafanaRule struct { - ID int64 `json:"id" yaml:"id"` - OrgID int64 `json:"orgId" yaml:"orgId"` - Title string `json:"title" yaml:"title"` - Condition string `json:"condition" yaml:"condition"` - Data []AlertQuery `json:"data" yaml:"data"` - Updated time.Time `json:"updated" yaml:"updated"` - IntervalSeconds int64 `json:"intervalSeconds" yaml:"intervalSeconds"` - Version int64 `json:"version" yaml:"version"` - UID string `json:"uid" yaml:"uid"` - NamespaceUID string `json:"namespace_uid" yaml:"namespace_uid"` - RuleGroup string `json:"rule_group" yaml:"rule_group"` - NoDataState NoDataState `json:"no_data_state" yaml:"no_data_state"` - ExecErrState ExecutionErrorState `json:"exec_err_state" yaml:"exec_err_state"` - Provenance Provenance `json:"provenance,omitempty" yaml:"provenance,omitempty"` - IsPaused bool `json:"is_paused" yaml:"is_paused"` + ID int64 `json:"id" yaml:"id"` + OrgID int64 `json:"orgId" yaml:"orgId"` + Title string `json:"title" yaml:"title"` + Condition string `json:"condition" yaml:"condition"` + Data []AlertQuery `json:"data" yaml:"data"` + Updated time.Time `json:"updated" yaml:"updated"` + IntervalSeconds int64 `json:"intervalSeconds" yaml:"intervalSeconds"` + Version int64 `json:"version" yaml:"version"` + UID string `json:"uid" yaml:"uid"` + NamespaceUID string `json:"namespace_uid" yaml:"namespace_uid"` + RuleGroup string `json:"rule_group" yaml:"rule_group"` + NoDataState NoDataState `json:"no_data_state" yaml:"no_data_state"` + ExecErrState ExecutionErrorState `json:"exec_err_state" yaml:"exec_err_state"` + Provenance Provenance `json:"provenance,omitempty" yaml:"provenance,omitempty"` + IsPaused bool `json:"is_paused" yaml:"is_paused"` + NotificationSettings *AlertRuleNotificationSettings `json:"notification_settings,omitempty" yaml:"notification_settings,omitempty"` } // AlertQuery represents a single query associated with an alert definition. diff --git a/pkg/services/ngalert/api/tooling/definitions/provisioning_alert_rules.go b/pkg/services/ngalert/api/tooling/definitions/provisioning_alert_rules.go index 1471a7c7ade..bc1a1d40e16 100644 --- a/pkg/services/ngalert/api/tooling/definitions/provisioning_alert_rules.go +++ b/pkg/services/ngalert/api/tooling/definitions/provisioning_alert_rules.go @@ -156,6 +156,8 @@ type ProvisionedAlertRule struct { Provenance Provenance `json:"provenance,omitempty"` // example: false IsPaused bool `json:"isPaused"` + // example: {"receiver":"email","group_by":["alertname","grafana_folder","cluster"],"group_wait":"30s","group_interval":"1m","repeat_interval":"4d","mute_time_intervals":["Weekends","Holidays"]} + NotificationSettings *AlertRuleNotificationSettings `json:"notification_settings"` } // swagger:route GET /v1/provisioning/folder/{FolderUID}/rule-groups/{Group} provisioning stable RouteGetAlertRuleGroup @@ -246,10 +248,11 @@ type AlertRuleExport struct { // ForString is used to: // - Only export the for field for HCL if it is non-zero. // - Format the Prometheus model.Duration type properly for HCL. - ForString *string `json:"-" yaml:"-" hcl:"for"` - Annotations *map[string]string `json:"annotations,omitempty" yaml:"annotations,omitempty" hcl:"annotations"` - Labels *map[string]string `json:"labels,omitempty" yaml:"labels,omitempty" hcl:"labels"` - IsPaused bool `json:"isPaused" yaml:"isPaused" hcl:"is_paused"` + ForString *string `json:"-" yaml:"-" hcl:"for"` + Annotations *map[string]string `json:"annotations,omitempty" yaml:"annotations,omitempty" hcl:"annotations"` + Labels *map[string]string `json:"labels,omitempty" yaml:"labels,omitempty" hcl:"labels"` + IsPaused bool `json:"isPaused" yaml:"isPaused" hcl:"is_paused"` + NotificationSettings *AlertRuleNotificationSettingsExport `json:"notification_settings,omitempty" yaml:"notification_settings,omitempty" hcl:"notification_settings,block"` } // AlertQueryExport is the provisioned export of models.AlertQuery. @@ -266,3 +269,14 @@ type RelativeTimeRangeExport struct { FromSeconds int64 `json:"from" yaml:"from" hcl:"from"` ToSeconds int64 `json:"to" yaml:"to" hcl:"to"` } + +// AlertRuleNotificationSettingsExport is the provisioned export of models.NotificationSettings. +type AlertRuleNotificationSettingsExport struct { + Receiver string `yaml:"receiver,omitempty" json:"receiver,omitempty" hcl:"receiver"` + + GroupBy []string `yaml:"group_by,omitempty" json:"group_by,omitempty" hcl:"group_by"` + GroupWait *string `yaml:"group_wait,omitempty" json:"group_wait,omitempty" hcl:"group_wait,optional"` + GroupInterval *string `yaml:"group_interval,omitempty" json:"group_interval,omitempty" hcl:"group_interval,optional"` + RepeatInterval *string `yaml:"repeat_interval,omitempty" json:"repeat_interval,omitempty" hcl:"repeat_interval,optional"` + MuteTimeIntervals []string `yaml:"mute_time_intervals,omitempty" json:"mute_time_intervals,omitempty" hcl:"mute_time_intervals"` +} diff --git a/pkg/services/ngalert/models/alert_rule.go b/pkg/services/ngalert/models/alert_rule.go index f93eb3a5484..c73c21b04bf 100644 --- a/pkg/services/ngalert/models/alert_rule.go +++ b/pkg/services/ngalert/models/alert_rule.go @@ -120,6 +120,14 @@ const ( MigratedAlertIdAnnotation = "__alertId__" // MigratedMessageAnnotation is created during legacy migration to store the migrated alert message. MigratedMessageAnnotation = "message" + + // AutogeneratedRouteLabel a label name used to distinguish alerts that are supposed to be handled by the autogenerated policy. Only expected value is `true`. + AutogeneratedRouteLabel = "__grafana_autogenerated__" + // AutogeneratedRouteReceiverNameLabel a label name that contains the name of the receiver that should be used to send notifications for the alert. + AutogeneratedRouteReceiverNameLabel = "__grafana_receiver__" + // AutogeneratedRouteSettingsHashLabel a label name that contains the hash of the notification settings that will be used to send notifications for the alert. + // This should uniquely identify the notification settings (group_by, group_wait, group_interval, repeat_interval, mute_time_intervals) for the alert. + AutogeneratedRouteSettingsHashLabel = "__grafana_route_settings_hash__" ) const ( @@ -142,6 +150,13 @@ var ( PanelIDAnnotation: {}, alertingModels.ImageTokenAnnotation: {}, } + + // LabelsUserCannotSpecify are labels that the user cannot specify when creating an alert rule. + LabelsUserCannotSpecify = map[string]struct{}{ + AutogeneratedRouteLabel: {}, + AutogeneratedRouteReceiverNameLabel: {}, + AutogeneratedRouteSettingsHashLabel: {}, + } ) // AlertRuleGroup is the base model for a rule group in unified alerting. @@ -217,10 +232,11 @@ type AlertRule struct { ExecErrState ExecutionErrorState // ideally this field should have been apimodels.ApiDuration // but this is currently not possible because of circular dependencies - For time.Duration - Annotations map[string]string - Labels map[string]string - IsPaused bool + For time.Duration + Annotations map[string]string + Labels map[string]string + IsPaused bool + NotificationSettings []NotificationSettings `xorm:"notification_settings"` // we use slice to workaround xorm mapping that does not serialize a struct to JSON unless it's a slice } // AlertRuleWithOptionals This is to avoid having to pass in additional arguments deep in the call stack. Alert rule @@ -314,13 +330,19 @@ func (alertRule *AlertRule) GetEvalCondition() Condition { // Diff calculates diff between two alert rules. Returns nil if two rules are equal. Otherwise, returns cmputil.DiffReport func (alertRule *AlertRule) Diff(rule *AlertRule, ignore ...string) cmputil.DiffReport { var reporter cmputil.DiffReporter - ops := make([]cmp.Option, 0, 5) + ops := make([]cmp.Option, 0, 6) // json.RawMessage is a slice of bytes and therefore cmp's default behavior is to compare it by byte, which is not really useful var jsonCmp = cmp.Transformer("", func(in json.RawMessage) string { return string(in) }) - ops = append(ops, cmp.Reporter(&reporter), cmpopts.IgnoreFields(AlertQuery{}, "modelProps"), jsonCmp, cmpopts.EquateEmpty()) + ops = append( + ops, + cmp.Reporter(&reporter), + cmpopts.IgnoreFields(AlertQuery{}, "modelProps"), + jsonCmp, + cmpopts.EquateEmpty(), + ) if len(ignore) > 0 { ops = append(ops, cmpopts.IgnoreFields(AlertRule{}, ignore...)) @@ -467,6 +489,23 @@ func (alertRule *AlertRule) ValidateAlertRule(cfg setting.UnifiedAlertingSetting if alertRule.For < 0 { return fmt.Errorf("%w: field `for` cannot be negative", ErrAlertRuleFailedValidation) } + + if len(alertRule.Labels) > 0 { + for label := range alertRule.Labels { + if _, ok := LabelsUserCannotSpecify[label]; ok { + return fmt.Errorf("%w: system reserved label %s cannot be defined", ErrAlertRuleFailedValidation, label) + } + } + } + + if len(alertRule.NotificationSettings) > 0 { + if len(alertRule.NotificationSettings) != 1 { + return fmt.Errorf("%w: only one notification settings entry is allowed", ErrAlertRuleFailedValidation) + } + if err := alertRule.NotificationSettings[0].Validate(); err != nil { + return errors.Join(ErrAlertRuleFailedValidation, fmt.Errorf("invalid notification settings: %w", err)) + } + } return nil } @@ -510,10 +549,11 @@ type AlertRuleVersion struct { ExecErrState ExecutionErrorState // ideally this field should have been apimodels.ApiDuration // but this is currently not possible because of circular dependencies - For time.Duration - Annotations map[string]string - Labels map[string]string - IsPaused bool + For time.Duration + Annotations map[string]string + Labels map[string]string + IsPaused bool + NotificationSettings []NotificationSettings `xorm:"notification_settings"` // we use slice to workaround xorm mapping that does not serialize a struct to JSON unless it's a slice } // GetAlertRuleByUIDQuery is the query for retrieving/deleting an alert rule by UID and organisation ID. @@ -539,6 +579,8 @@ type ListAlertRulesQuery struct { // to return just those for a dashboard and panel. DashboardUID string PanelID int64 + + ReceiverName string } // CountAlertRulesQuery is the query for counting alert rules diff --git a/pkg/services/ngalert/models/alert_rule_test.go b/pkg/services/ngalert/models/alert_rule_test.go index b8c75c2ab3d..32836ba4d67 100644 --- a/pkg/services/ngalert/models/alert_rule_test.go +++ b/pkg/services/ngalert/models/alert_rule_test.go @@ -4,17 +4,21 @@ import ( "encoding/json" "fmt" "math/rand" + "reflect" "sort" "strings" "testing" "time" "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/prometheus/common/model" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "gopkg.in/yaml.v3" "github.com/grafana/grafana/pkg/util" + "github.com/grafana/grafana/pkg/util/cmputil" ) func TestSortAlertRulesByGroupKeyAndIndex(t *testing.T) { @@ -384,7 +388,7 @@ func TestDiff(t *testing.T) { rule1 := AlertRuleGen()() rule2 := AlertRuleGen()() - diffs := rule1.Diff(rule2, "Data", "Annotations", "Labels") // these fields will be tested separately + diffs := rule1.Diff(rule2, "Data", "Annotations", "Labels", "NotificationSettings") // these fields will be tested separately difCnt := 0 if rule1.ID != rule2.ID { @@ -678,6 +682,117 @@ func TestDiff(t *testing.T) { } }) }) + + t.Run("should detect changes in NotificationSettings", func(t *testing.T) { + rule1 := AlertRuleGen()() + + baseSettings := NotificationSettingsGen(NSMuts.WithGroupBy("test1", "test2"))() + rule1.NotificationSettings = []NotificationSettings{baseSettings} + + addTime := func(d *model.Duration, duration time.Duration) *time.Duration { + dur := time.Duration(*d) + dur += duration + return &dur + } + + testCases := []struct { + name string + notificationSettings NotificationSettings + diffs cmputil.DiffReport + }{ + { + name: "should detect changes in Receiver", + notificationSettings: CopyNotificationSettings(baseSettings, NSMuts.WithReceiver(baseSettings.Receiver+"-modified")), + diffs: []cmputil.Diff{ + { + Path: "NotificationSettings[0].Receiver", + Left: reflect.ValueOf(baseSettings.Receiver), + Right: reflect.ValueOf(baseSettings.Receiver + "-modified"), + }, + }, + }, + { + name: "should detect changes in GroupWait", + notificationSettings: CopyNotificationSettings(baseSettings, NSMuts.WithGroupWait(addTime(baseSettings.GroupWait, 1*time.Second))), + diffs: []cmputil.Diff{ + { + Path: "NotificationSettings[0].GroupWait", + Left: reflect.ValueOf(*baseSettings.GroupWait), + Right: reflect.ValueOf(model.Duration(*addTime(baseSettings.GroupWait, 1*time.Second))), + }, + }, + }, + { + name: "should detect changes in GroupInterval", + notificationSettings: CopyNotificationSettings(baseSettings, NSMuts.WithGroupInterval(addTime(baseSettings.GroupInterval, 1*time.Second))), + diffs: []cmputil.Diff{ + { + Path: "NotificationSettings[0].GroupInterval", + Left: reflect.ValueOf(*baseSettings.GroupInterval), + Right: reflect.ValueOf(model.Duration(*addTime(baseSettings.GroupInterval, 1*time.Second))), + }, + }, + }, + { + name: "should detect changes in RepeatInterval", + notificationSettings: CopyNotificationSettings(baseSettings, NSMuts.WithRepeatInterval(addTime(baseSettings.RepeatInterval, 1*time.Second))), + diffs: []cmputil.Diff{ + { + Path: "NotificationSettings[0].RepeatInterval", + Left: reflect.ValueOf(*baseSettings.RepeatInterval), + Right: reflect.ValueOf(model.Duration(*addTime(baseSettings.RepeatInterval, 1*time.Second))), + }, + }, + }, + { + name: "should detect changes in GroupBy", + notificationSettings: CopyNotificationSettings(baseSettings, NSMuts.WithGroupBy(baseSettings.GroupBy[0]+"-modified", baseSettings.GroupBy[1]+"-modified")), + diffs: []cmputil.Diff{ + { + Path: "NotificationSettings[0].GroupBy[0]", + Left: reflect.ValueOf(baseSettings.GroupBy[0]), + Right: reflect.ValueOf(baseSettings.GroupBy[0] + "-modified"), + }, + { + Path: "NotificationSettings[0].GroupBy[1]", + Left: reflect.ValueOf(baseSettings.GroupBy[1]), + Right: reflect.ValueOf(baseSettings.GroupBy[1] + "-modified"), + }, + }, + }, + { + name: "should detect changes in MuteTimeIntervals", + notificationSettings: CopyNotificationSettings(baseSettings, NSMuts.WithMuteTimeIntervals(baseSettings.MuteTimeIntervals[0]+"-modified", baseSettings.MuteTimeIntervals[1]+"-modified")), + diffs: []cmputil.Diff{ + { + Path: "NotificationSettings[0].MuteTimeIntervals[0]", + Left: reflect.ValueOf(baseSettings.MuteTimeIntervals[0]), + Right: reflect.ValueOf(baseSettings.MuteTimeIntervals[0] + "-modified"), + }, + { + Path: "NotificationSettings[0].MuteTimeIntervals[1]", + Left: reflect.ValueOf(baseSettings.MuteTimeIntervals[1]), + Right: reflect.ValueOf(baseSettings.MuteTimeIntervals[1] + "-modified"), + }, + }, + }, + } + + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + rule2 := CopyRule(rule1) + rule2.NotificationSettings = []NotificationSettings{tt.notificationSettings} + diffs := rule1.Diff(rule2) + + cOpt := []cmp.Option{ + cmpopts.IgnoreUnexported(cmputil.Diff{}), + } + if !cmp.Equal(diffs, tt.diffs, cOpt...) { + t.Errorf("Unexpected Diffs: %v", cmp.Diff(diffs, tt.diffs, cOpt...)) + } + }) + } + }) } func TestSortByGroupIndex(t *testing.T) { diff --git a/pkg/services/ngalert/models/notifications.go b/pkg/services/ngalert/models/notifications.go new file mode 100644 index 00000000000..3271eb6d0ba --- /dev/null +++ b/pkg/services/ngalert/models/notifications.go @@ -0,0 +1,167 @@ +package models + +import ( + "encoding/binary" + "errors" + "fmt" + "hash/fnv" + "slices" + "unsafe" + + "github.com/grafana/grafana-plugin-sdk-go/data" + "github.com/prometheus/common/model" +) + +// groupByAll is a special value defined by alertmanager that can be used in a Route's GroupBy field to aggregate by all possible labels. +const groupByAll = "..." + +type ListNotificationSettingsQuery struct { + OrgID int64 + ReceiverName string +} + +// NotificationSettings represents the settings for sending notifications for a single AlertRule. It is used to +// automatically generate labels and an associated matching route containing the given settings. +type NotificationSettings struct { + Receiver string `json:"receiver"` + + GroupBy []string `json:"group_by,omitempty"` + GroupWait *model.Duration `json:"group_wait,omitempty"` + GroupInterval *model.Duration `json:"group_interval,omitempty"` + RepeatInterval *model.Duration `json:"repeat_interval,omitempty"` + MuteTimeIntervals []string `json:"mute_time_intervals,omitempty"` +} + +// Validate checks if the NotificationSettings object is valid. +// It returns an error if any of the validation checks fail. +// The receiver must be specified. +// If GroupBy is not empty, it must contain both model.AlertNameLabel and FolderTitleLabel or the special label '...'. +// GroupWait, GroupInterval, RepeatInterval must be positive durations. +func (s *NotificationSettings) Validate() error { + if s.Receiver == "" { + return errors.New("receiver must be specified") + } + if len(s.GroupBy) > 0 { + alertName, folderTitle := false, false + for _, lbl := range s.GroupBy { + if lbl == groupByAll { + alertName, folderTitle = true, true + break + } + if lbl == model.AlertNameLabel { + alertName = true + } + if lbl == FolderTitleLabel { + folderTitle = true + } + } + if !alertName || !folderTitle { + return fmt.Errorf("group by override must contain two required labels: '%s' and '%s' or '...' (group by all)", model.AlertNameLabel, FolderTitleLabel) + } + } + if s.GroupWait != nil && *s.GroupWait < 0 { + return errors.New("group wait must be a positive duration") + } + if s.GroupInterval != nil && *s.GroupInterval < 0 { + return errors.New("group interval must be a positive duration") + } + if s.RepeatInterval != nil && *s.RepeatInterval < 0 { + return errors.New("repeat interval must be a positive duration") + } + return nil +} + +// ToLabels converts the NotificationSettings into data.Labels. When added to an AlertRule these labels ensure it will +// match an autogenerated route with the correct settings. +// Labels returned: +// - AutogeneratedRouteLabel: "true" +// - AutogeneratedRouteReceiverNameLabel: Receiver +// - AutogeneratedRouteSettingsHashLabel: Fingerprint (if the NotificationSettings are not all default) +func (s *NotificationSettings) ToLabels() data.Labels { + result := make(data.Labels, 3) + result[AutogeneratedRouteLabel] = "true" + result[AutogeneratedRouteReceiverNameLabel] = s.Receiver + if !s.IsAllDefault() { + result[AutogeneratedRouteSettingsHashLabel] = s.Fingerprint().String() + } + return result +} + +func (s *NotificationSettings) Equals(other *NotificationSettings) bool { + durationEqual := func(d1, d2 *model.Duration) bool { + if d1 == nil || d2 == nil { + return d1 == d2 + } + return *d1 == *d2 + } + if s == nil || other == nil { + return s == nil && other == nil + } + if s.Receiver != other.Receiver { + return false + } + if !durationEqual(s.GroupWait, other.GroupWait) { + return false + } + if !durationEqual(s.GroupInterval, other.GroupInterval) { + return false + } + if !durationEqual(s.RepeatInterval, other.RepeatInterval) { + return false + } + if !slices.Equal(s.MuteTimeIntervals, other.MuteTimeIntervals) { + return false + } + sGr := s.GroupBy + oGr := other.GroupBy + return slices.Equal(sGr, oGr) +} + +// IsAllDefault checks if the NotificationSettings object has all default values for optional fields (all except Receiver) . +func (s *NotificationSettings) IsAllDefault() bool { + return len(s.GroupBy) == 0 && s.GroupWait == nil && s.GroupInterval == nil && s.RepeatInterval == nil && len(s.MuteTimeIntervals) == 0 +} + +// NewDefaultNotificationSettings creates a new default NotificationSettings with the specified receiver. +func NewDefaultNotificationSettings(receiver string) NotificationSettings { + return NotificationSettings{ + Receiver: receiver, + } +} + +// Fingerprint calculates a hash value to uniquely identify a NotificationSettings by its attributes. +// The hash is calculated by concatenating the strings and durations of the NotificationSettings attributes +// and using an invalid UTF-8 sequence as a separator. +func (s *NotificationSettings) Fingerprint() data.Fingerprint { + h := fnv.New64() + tmp := make([]byte, 8) + + writeString := func(s string) { + // save on extra slice allocation when string is converted to bytes. + _, _ = h.Write(unsafe.Slice(unsafe.StringData(s), len(s))) //nolint:gosec + // ignore errors returned by Write method because fnv never returns them. + _, _ = h.Write([]byte{255}) // use an invalid utf-8 sequence as separator + } + writeDuration := func(d *model.Duration) { + if d == nil { + _, _ = h.Write([]byte{255}) + } else { + binary.LittleEndian.PutUint64(tmp, uint64(*d)) + _, _ = h.Write(tmp) + _, _ = h.Write([]byte{255}) + } + } + + writeString(s.Receiver) + // TODO: Should we sort the group by labels? + for _, gb := range s.GroupBy { + writeString(gb) + } + writeDuration(s.GroupWait) + writeDuration(s.GroupInterval) + writeDuration(s.RepeatInterval) + for _, interval := range s.MuteTimeIntervals { + writeString(interval) + } + return data.Fingerprint(h.Sum64()) +} diff --git a/pkg/services/ngalert/models/notifications_test.go b/pkg/services/ngalert/models/notifications_test.go new file mode 100644 index 00000000000..34cd1b034c7 --- /dev/null +++ b/pkg/services/ngalert/models/notifications_test.go @@ -0,0 +1,145 @@ +package models + +import ( + "testing" + "time" + + "github.com/grafana/grafana-plugin-sdk-go/data" + "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" + + "github.com/grafana/grafana/pkg/util" +) + +func TestValidate(t *testing.T) { + validNotificationSettings := NotificationSettingsGen(NSMuts.WithGroupBy(model.AlertNameLabel, FolderTitleLabel)) + + testCases := []struct { + name string + notificationSettings NotificationSettings + expErrorContains string + }{ + { + name: "valid notification settings", + notificationSettings: validNotificationSettings(), + }, + { + name: "missing receiver is invalid", + notificationSettings: CopyNotificationSettings(validNotificationSettings(), NSMuts.WithReceiver("")), + expErrorContains: "receiver", + }, + { + name: "group by empty is valid", + notificationSettings: CopyNotificationSettings(validNotificationSettings(), NSMuts.WithGroupBy()), + }, + { + name: "group by ... is valid", + notificationSettings: CopyNotificationSettings(validNotificationSettings(), NSMuts.WithGroupBy("...")), + }, + { + name: "group by with alert name and folder name labels is valid", + notificationSettings: CopyNotificationSettings(validNotificationSettings(), NSMuts.WithGroupBy(model.AlertNameLabel, FolderTitleLabel)), + }, + { + name: "group by missing alert name label is invalid", + notificationSettings: CopyNotificationSettings(validNotificationSettings(), NSMuts.WithGroupBy(FolderTitleLabel)), + expErrorContains: model.AlertNameLabel, + }, + { + name: "group by missing folder name label is invalid", + notificationSettings: CopyNotificationSettings(validNotificationSettings(), NSMuts.WithGroupBy(model.AlertNameLabel)), + expErrorContains: FolderTitleLabel, + }, + { + name: "group wait empty is valid", + notificationSettings: CopyNotificationSettings(validNotificationSettings(), NSMuts.WithGroupWait(nil)), + }, + { + name: "group wait positive is valid", + notificationSettings: CopyNotificationSettings(validNotificationSettings(), NSMuts.WithGroupWait(util.Pointer(1*time.Second))), + }, + { + name: "group wait negative is invalid", + notificationSettings: CopyNotificationSettings(validNotificationSettings(), NSMuts.WithGroupWait(util.Pointer(-1*time.Second))), + expErrorContains: "group wait", + }, + { + name: "group interval empty is valid", + notificationSettings: CopyNotificationSettings(validNotificationSettings(), NSMuts.WithGroupInterval(nil)), + }, + { + name: "group interval positive is valid", + notificationSettings: CopyNotificationSettings(validNotificationSettings(), NSMuts.WithGroupInterval(util.Pointer(1*time.Second))), + }, + { + name: "group interval negative is invalid", + notificationSettings: CopyNotificationSettings(validNotificationSettings(), NSMuts.WithGroupInterval(util.Pointer(-1*time.Second))), + expErrorContains: "group interval", + }, + { + name: "repeat interval empty is valid", + notificationSettings: CopyNotificationSettings(validNotificationSettings(), NSMuts.WithRepeatInterval(nil)), + }, + { + name: "repeat interval positive is valid", + notificationSettings: CopyNotificationSettings(validNotificationSettings(), NSMuts.WithRepeatInterval(util.Pointer(1*time.Second))), + }, + { + name: "repeat interval negative is invalid", + notificationSettings: CopyNotificationSettings(validNotificationSettings(), NSMuts.WithRepeatInterval(util.Pointer(-1*time.Second))), + expErrorContains: "repeat interval", + }, + } + + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + err := tt.notificationSettings.Validate() + if tt.expErrorContains != "" { + require.Error(t, err) + require.ErrorContains(t, err, tt.expErrorContains) + } else { + require.NoError(t, err) + } + }) + } +} + +func TestNotificationSettingsLabels(t *testing.T) { + testCases := []struct { + name string + notificationSettings NotificationSettings + labels data.Labels + }{ + { + name: "default notification settings", + notificationSettings: NewDefaultNotificationSettings("receiver name"), + labels: data.Labels{ + AutogeneratedRouteLabel: "true", + AutogeneratedRouteReceiverNameLabel: "receiver name", + }, + }, + { + name: "custom notification settings", + notificationSettings: NotificationSettings{ + Receiver: "receiver name", + GroupBy: []string{"label1", "label2"}, + GroupWait: util.Pointer(model.Duration(1 * time.Minute)), + GroupInterval: util.Pointer(model.Duration(2 * time.Minute)), + RepeatInterval: util.Pointer(model.Duration(3 * time.Minute)), + MuteTimeIntervals: []string{"maintenance1", "maintenance2"}, + }, + labels: data.Labels{ + AutogeneratedRouteLabel: "true", + AutogeneratedRouteReceiverNameLabel: "receiver name", + AutogeneratedRouteSettingsHashLabel: "f0e23250cefc4a31", + }, + }, + } + + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + labels := tt.notificationSettings.ToLabels() + require.Equal(t, tt.labels, labels) + }) + } +} diff --git a/pkg/services/ngalert/models/testing.go b/pkg/services/ngalert/models/testing.go index 50ee3e4f60b..71ffd9312a9 100644 --- a/pkg/services/ngalert/models/testing.go +++ b/pkg/services/ngalert/models/testing.go @@ -11,6 +11,7 @@ import ( "github.com/google/uuid" "github.com/grafana/grafana-plugin-sdk-go/data" + "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "github.com/grafana/grafana/pkg/expr" @@ -64,26 +65,32 @@ func AlertRuleGen(mutators ...AlertRuleMutator) func() *AlertRule { panelID = &p } + var ns []NotificationSettings + if rand.Int63()%2 == 0 { + ns = append(ns, NotificationSettingsGen()()) + } + rule := &AlertRule{ - ID: rand.Int63n(1500), - OrgID: rand.Int63n(1500) + 1, // Prevent OrgID=0 as this does not pass alert rule validation. - Title: "TEST-ALERT-" + util.GenerateShortUID(), - Condition: "A", - Data: []AlertQuery{GenerateAlertQuery()}, - Updated: time.Now().Add(-time.Duration(rand.Intn(100) + 1)), - IntervalSeconds: rand.Int63n(60) + 1, - Version: rand.Int63n(1500), // Don't generate a rule ID too big for postgres - UID: util.GenerateShortUID(), - NamespaceUID: util.GenerateShortUID(), - DashboardUID: dashUID, - PanelID: panelID, - RuleGroup: "TEST-GROUP-" + util.GenerateShortUID(), - RuleGroupIndex: rand.Intn(1500), - NoDataState: randNoDataState(), - ExecErrState: randErrState(), - For: forInterval, - Annotations: annotations, - Labels: labels, + ID: rand.Int63n(1500), + OrgID: rand.Int63n(1500) + 1, // Prevent OrgID=0 as this does not pass alert rule validation. + Title: "TEST-ALERT-" + util.GenerateShortUID(), + Condition: "A", + Data: []AlertQuery{GenerateAlertQuery()}, + Updated: time.Now().Add(-time.Duration(rand.Intn(100) + 1)), + IntervalSeconds: rand.Int63n(60) + 1, + Version: rand.Int63n(1500), // Don't generate a rule ID too big for postgres + UID: util.GenerateShortUID(), + NamespaceUID: util.GenerateShortUID(), + DashboardUID: dashUID, + PanelID: panelID, + RuleGroup: "TEST-GROUP-" + util.GenerateShortUID(), + RuleGroupIndex: rand.Intn(1500), + NoDataState: randNoDataState(), + ExecErrState: randErrState(), + For: forInterval, + Annotations: annotations, + Labels: labels, + NotificationSettings: ns, } for _, mutator := range mutators { @@ -266,6 +273,20 @@ func WithUniqueUID(knownUids *sync.Map) AlertRuleMutator { } } +func WithUniqueTitle(knownTitles *sync.Map) AlertRuleMutator { + return func(rule *AlertRule) { + title := rule.Title + for { + _, ok := knownTitles.LoadOrStore(title, struct{}{}) + if !ok { + rule.Title = title + return + } + title = uuid.NewString() + } + } +} + func WithQuery(query ...AlertQuery) AlertRuleMutator { return func(rule *AlertRule) { rule.Data = query @@ -283,6 +304,18 @@ func WithGroupKey(groupKey AlertRuleGroupKey) AlertRuleMutator { } } +func WithNotificationSettingsGen(ns func() NotificationSettings) AlertRuleMutator { + return func(rule *AlertRule) { + rule.NotificationSettings = []NotificationSettings{ns()} + } +} + +func WithNoNotificationSettings() AlertRuleMutator { + return func(rule *AlertRule) { + rule.NotificationSettings = nil + } +} + func GenerateAlertLabels(count int, prefix string) data.Labels { labels := make(data.Labels, count) for i := 0; i < count; i++ { @@ -412,6 +445,10 @@ func CopyRule(r *AlertRule) *AlertRule { } } + for _, s := range r.NotificationSettings { + result.NotificationSettings = append(result.NotificationSettings, CopyNotificationSettings(s)) + } + return &result } @@ -601,3 +638,108 @@ func AlertInstanceGen(mutators ...AlertInstanceMutator) *AlertInstance { } return instance } + +type Mutator[T any] func(*T) + +// CopyNotificationSettings creates a deep copy of NotificationSettings. +func CopyNotificationSettings(ns NotificationSettings, mutators ...Mutator[NotificationSettings]) NotificationSettings { + c := NotificationSettings{ + Receiver: ns.Receiver, + } + if ns.GroupWait != nil { + c.GroupWait = util.Pointer(*ns.GroupWait) + } + if ns.GroupInterval != nil { + c.GroupInterval = util.Pointer(*ns.GroupInterval) + } + if ns.RepeatInterval != nil { + c.RepeatInterval = util.Pointer(*ns.RepeatInterval) + } + if ns.GroupBy != nil { + c.GroupBy = make([]string, len(ns.GroupBy)) + copy(c.GroupBy, ns.GroupBy) + } + if ns.MuteTimeIntervals != nil { + c.MuteTimeIntervals = make([]string, len(ns.MuteTimeIntervals)) + copy(c.MuteTimeIntervals, ns.MuteTimeIntervals) + } + for _, mutator := range mutators { + mutator(&c) + } + return c +} + +// NotificationSettingsGen generates NotificationSettings using a base and mutators. +func NotificationSettingsGen(mutators ...Mutator[NotificationSettings]) func() NotificationSettings { + return func() NotificationSettings { + c := NotificationSettings{ + Receiver: util.GenerateShortUID(), + GroupBy: []string{model.AlertNameLabel, FolderTitleLabel, util.GenerateShortUID()}, + GroupWait: util.Pointer(model.Duration(time.Duration(rand.Intn(100)+1) * time.Second)), + GroupInterval: util.Pointer(model.Duration(time.Duration(rand.Intn(100)+1) * time.Second)), + RepeatInterval: util.Pointer(model.Duration(time.Duration(rand.Intn(100)+1) * time.Second)), + MuteTimeIntervals: []string{util.GenerateShortUID(), util.GenerateShortUID()}, + } + for _, mutator := range mutators { + mutator(&c) + } + return c + } +} + +var ( + NSMuts = NotificationSettingsMutators{} +) + +type NotificationSettingsMutators struct{} + +func (n NotificationSettingsMutators) WithReceiver(receiver string) Mutator[NotificationSettings] { + return func(ns *NotificationSettings) { + ns.Receiver = receiver + } +} + +func (n NotificationSettingsMutators) WithGroupWait(groupWait *time.Duration) Mutator[NotificationSettings] { + return func(ns *NotificationSettings) { + if groupWait == nil { + ns.GroupWait = nil + return + } + dur := model.Duration(*groupWait) + ns.GroupWait = &dur + } +} + +func (n NotificationSettingsMutators) WithGroupInterval(groupInterval *time.Duration) Mutator[NotificationSettings] { + return func(ns *NotificationSettings) { + if groupInterval == nil { + ns.GroupInterval = nil + return + } + dur := model.Duration(*groupInterval) + ns.GroupInterval = &dur + } +} + +func (n NotificationSettingsMutators) WithRepeatInterval(repeatInterval *time.Duration) Mutator[NotificationSettings] { + return func(ns *NotificationSettings) { + if repeatInterval == nil { + ns.RepeatInterval = nil + return + } + dur := model.Duration(*repeatInterval) + ns.RepeatInterval = &dur + } +} + +func (n NotificationSettingsMutators) WithGroupBy(groupBy ...string) Mutator[NotificationSettings] { + return func(ns *NotificationSettings) { + ns.GroupBy = groupBy + } +} + +func (n NotificationSettingsMutators) WithMuteTimeIntervals(muteTimeIntervals ...string) Mutator[NotificationSettings] { + return func(ns *NotificationSettings) { + ns.MuteTimeIntervals = muteTimeIntervals + } +} diff --git a/pkg/services/ngalert/ngalert.go b/pkg/services/ngalert/ngalert.go index 8ea504f0001..d5ee1ff59a1 100644 --- a/pkg/services/ngalert/ngalert.go +++ b/pkg/services/ngalert/ngalert.go @@ -232,7 +232,7 @@ func (ng *AlertNG) init() error { decryptFn := ng.SecretsService.GetDecryptedValue multiOrgMetrics := ng.Metrics.GetMultiOrgAlertmanagerMetrics() - moa, err := notifier.NewMultiOrgAlertmanager(ng.Cfg, ng.store, ng.store, ng.KVStore, ng.store, decryptFn, multiOrgMetrics, ng.NotificationService, moaLogger, ng.SecretsService, overrides...) + moa, err := notifier.NewMultiOrgAlertmanager(ng.Cfg, ng.store, ng.store, ng.KVStore, ng.store, decryptFn, multiOrgMetrics, ng.NotificationService, moaLogger, ng.SecretsService, ng.FeatureToggles, overrides...) if err != nil { return err } @@ -326,13 +326,13 @@ func (ng *AlertNG) init() error { // Provisioning policyService := provisioning.NewNotificationPolicyService(ng.store, ng.store, ng.store, ng.Cfg.UnifiedAlerting, ng.Log) - contactPointService := provisioning.NewContactPointService(ng.store, ng.SecretsService, ng.store, ng.store, receiverService, ng.Log) + contactPointService := provisioning.NewContactPointService(ng.store, ng.SecretsService, ng.store, ng.store, receiverService, ng.Log, ng.store) templateService := provisioning.NewTemplateService(ng.store, ng.store, ng.store, ng.Log) muteTimingService := provisioning.NewMuteTimingService(ng.store, ng.store, ng.store, ng.Log) alertRuleService := provisioning.NewAlertRuleService(ng.store, ng.store, ng.dashboardService, ng.QuotaService, ng.store, int64(ng.Cfg.UnifiedAlerting.DefaultRuleEvaluationInterval.Seconds()), int64(ng.Cfg.UnifiedAlerting.BaseInterval.Seconds()), - ng.Cfg.UnifiedAlerting.RulesPerRuleGroupLimit, ng.Log) + ng.Cfg.UnifiedAlerting.RulesPerRuleGroupLimit, ng.Log, notifier.NewNotificationSettingsValidationService(ng.store)) ng.api = &api.API{ Cfg: ng.Cfg, diff --git a/pkg/services/ngalert/notifier/alertmanager.go b/pkg/services/ngalert/notifier/alertmanager.go index 980a66905cf..3f0dffb038f 100644 --- a/pkg/services/ngalert/notifier/alertmanager.go +++ b/pkg/services/ngalert/notifier/alertmanager.go @@ -43,6 +43,7 @@ var silenceMaintenanceInterval = 15 * time.Minute type AlertingStore interface { store.AlertingStore store.ImageStore + autogenRuleStore } type alertmanager struct { @@ -57,6 +58,8 @@ type alertmanager struct { decryptFn alertingNotify.GetDecryptedValueFn orgID int64 + + withAutogen bool } // maintenanceOptions represent the options for components that need maintenance on a frequency within the Alertmanager. @@ -86,7 +89,7 @@ func (m maintenanceOptions) MaintenanceFunc(state alertingNotify.State) (int64, func NewAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store AlertingStore, kvStore kvstore.KVStore, peer alertingNotify.ClusterPeer, decryptFn alertingNotify.GetDecryptedValueFn, ns notifications.Service, - m *metrics.Alertmanager) (*alertmanager, error) { + m *metrics.Alertmanager, withAutogen bool) (*alertmanager, error) { workingPath := filepath.Join(cfg.DataPath, workingDir, strconv.Itoa(int(orgID))) fileStore := NewFileStore(orgID, kvStore, workingPath) @@ -144,6 +147,9 @@ func NewAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store A decryptFn: decryptFn, fileStore: fileStore, logger: l, + + // TODO: Preferably, logic around autogen would be outside of the specific alertmanager implementation so that remote alertmanager will get it for free. + withAutogen: withAutogen, } return am, nil @@ -180,11 +186,17 @@ func (am *alertmanager) SaveAndApplyDefaultConfig(ctx context.Context) error { } err = am.Store.SaveAlertmanagerConfigurationWithCallback(ctx, cmd, func() error { - _, err := am.applyConfig(cfg) + if am.withAutogen { + err := AddAutogenConfig(ctx, am.logger, am.Store, am.orgID, &cfg.AlertmanagerConfig, true) + if err != nil { + return err + } + } + _, err = am.applyConfig(cfg) return err }) if err != nil { - outerErr = nil + outerErr = err return } }) @@ -195,6 +207,9 @@ func (am *alertmanager) SaveAndApplyDefaultConfig(ctx context.Context) error { // SaveAndApplyConfig saves the configuration the database and applies the configuration to the Alertmanager. // It rollbacks the save if we fail to apply the configuration. func (am *alertmanager) SaveAndApplyConfig(ctx context.Context, cfg *apimodels.PostableUserConfig) error { + // Remove autogenerated config from the user config before saving it, may not be necessary as we already remove + // the autogenerated config before provenance guard. However, this is low impact and a good safety net. + RemoveAutogenConfigIfExists(cfg.AlertmanagerConfig.Route) rawConfig, err := json.Marshal(&cfg) if err != nil { return fmt.Errorf("failed to serialize to the Alertmanager configuration: %w", err) @@ -210,7 +225,14 @@ func (am *alertmanager) SaveAndApplyConfig(ctx context.Context, cfg *apimodels.P } err = am.Store.SaveAlertmanagerConfigurationWithCallback(ctx, cmd, func() error { - _, err := am.applyConfig(cfg) + if am.withAutogen { + err := AddAutogenConfig(ctx, am.logger, am.Store, am.orgID, &cfg.AlertmanagerConfig, false) + if err != nil { + return err + } + } + + _, err = am.applyConfig(cfg) return err }) if err != nil { @@ -232,6 +254,17 @@ func (am *alertmanager) ApplyConfig(ctx context.Context, dbCfg *ngmodels.AlertCo var outerErr error am.Base.WithLock(func() { + if am.withAutogen { + err := AddAutogenConfig(ctx, am.logger, am.Store, am.orgID, &cfg.AlertmanagerConfig, true) + if err != nil { + outerErr = err + return + } + } + // Note: Adding the autogen config here causes alert_configuration_history to update last_applied more often. + // Since we will now update last_applied when autogen changes even if the user-created config remains the same. + // To fix this however, the local alertmanager needs to be able to tell the difference between user-created and + // autogen config, which may introduce cross-cutting complexity. if err := am.applyAndMarkConfig(ctx, dbCfg.ConfigurationHash, cfg); err != nil { outerErr = fmt.Errorf("unable to apply configuration: %w", err) return diff --git a/pkg/services/ngalert/notifier/alertmanager_config.go b/pkg/services/ngalert/notifier/alertmanager_config.go index cf500c86419..32ae92baea2 100644 --- a/pkg/services/ngalert/notifier/alertmanager_config.go +++ b/pkg/services/ngalert/notifier/alertmanager_config.go @@ -8,6 +8,7 @@ import ( "github.com/go-openapi/strfmt" + "github.com/grafana/grafana/pkg/services/featuremgmt" "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" "github.com/grafana/grafana/pkg/services/ngalert/models" "github.com/grafana/grafana/pkg/services/ngalert/store" @@ -34,13 +35,48 @@ type configurationStore interface { GetLatestAlertmanagerConfiguration(ctx context.Context, orgID int64) (*models.AlertConfiguration, error) } -func (moa *MultiOrgAlertmanager) GetAlertmanagerConfiguration(ctx context.Context, org int64) (definitions.GettableUserConfig, error) { +// ApplyConfig will apply the given alertmanager configuration for a given org. +// Can be used to force regeneration of autogenerated routes. +func (moa *MultiOrgAlertmanager) ApplyConfig(ctx context.Context, orgId int64, dbConfig *models.AlertConfiguration) error { + am, err := moa.AlertmanagerFor(orgId) + if err != nil { + // It's okay if the alertmanager isn't ready yet, we're changing its config anyway. + if !errors.Is(err, ErrAlertmanagerNotReady) { + return err + } + } + + err = am.ApplyConfig(ctx, dbConfig) + if err != nil { + return fmt.Errorf("failed to apply configuration: %w", err) + } + return nil +} + +// GetAlertmanagerConfiguration returns the latest alertmanager configuration for a given org. +// If withAutogen is true, the configuration will be augmented with autogenerated routes. +func (moa *MultiOrgAlertmanager) GetAlertmanagerConfiguration(ctx context.Context, org int64, withAutogen bool) (definitions.GettableUserConfig, error) { amConfig, err := moa.configStore.GetLatestAlertmanagerConfiguration(ctx, org) if err != nil { return definitions.GettableUserConfig{}, fmt.Errorf("failed to get latest configuration: %w", err) } - return moa.gettableUserConfigFromAMConfigString(ctx, org, amConfig.AlertmanagerConfiguration) + cfg, err := moa.gettableUserConfigFromAMConfigString(ctx, org, amConfig.AlertmanagerConfiguration) + if err != nil { + return definitions.GettableUserConfig{}, err + } + + if moa.featureManager.IsEnabled(ctx, featuremgmt.FlagAlertingSimplifiedRouting) && withAutogen { + // We validate the notification settings in a similar way to when we POST. + // Otherwise, broken settings (e.g. a receiver that doesn't exist) will cause the config returned here to be + // different than the config currently in-use. + // TODO: Preferably, we'd be getting the config directly from the in-memory AM so adding the autogen config would not be necessary. + err := AddAutogenConfig(ctx, moa.logger, moa.configStore, org, &cfg.AlertmanagerConfig, true) + if err != nil { + return definitions.GettableUserConfig{}, err + } + } + return cfg, nil } // ActivateHistoricalConfiguration will set the current alertmanager configuration to a previous value based on the provided @@ -108,6 +144,7 @@ func (moa *MultiOrgAlertmanager) gettableUserConfigFromAMConfigString(ctx contex if err != nil { return definitions.GettableUserConfig{}, fmt.Errorf("failed to unmarshal alertmanager configuration: %w", err) } + result := definitions.GettableUserConfig{ TemplateFiles: cfg.TemplateFiles, AlertmanagerConfig: definitions.GettableApiAlertingConfig{ @@ -155,7 +192,7 @@ func (moa *MultiOrgAlertmanager) gettableUserConfigFromAMConfigString(ctx contex return result, nil } -func (moa *MultiOrgAlertmanager) ApplyAlertmanagerConfiguration(ctx context.Context, org int64, config definitions.PostableUserConfig) error { +func (moa *MultiOrgAlertmanager) SaveAndApplyAlertmanagerConfiguration(ctx context.Context, org int64, config definitions.PostableUserConfig) error { // We cannot add this validation to PostableUserConfig as that struct is used for both // Grafana Alertmanager (where inhibition rules are not supported) and External Alertmanagers // (including Mimir) where inhibition rules are supported. diff --git a/pkg/services/ngalert/notifier/alertmanager_test.go b/pkg/services/ngalert/notifier/alertmanager_test.go index 17e3af804e7..19e21b1c9ea 100644 --- a/pkg/services/ngalert/notifier/alertmanager_test.go +++ b/pkg/services/ngalert/notifier/alertmanager_test.go @@ -46,7 +46,7 @@ func setupAMTest(t *testing.T) *alertmanager { kvStore := fakes.NewFakeKVStore(t) secretsService := secretsManager.SetupTestService(t, database.ProvideSecretsStore(sqlStore)) decryptFn := secretsService.GetDecryptedValue - am, err := NewAlertmanager(context.Background(), 1, cfg, s, kvStore, &NilPeer{}, decryptFn, nil, m) + am, err := NewAlertmanager(context.Background(), 1, cfg, s, kvStore, &NilPeer{}, decryptFn, nil, m, false) require.NoError(t, err) return am } diff --git a/pkg/services/ngalert/notifier/autogen_alertmanager.go b/pkg/services/ngalert/notifier/autogen_alertmanager.go new file mode 100644 index 00000000000..af5de7947a5 --- /dev/null +++ b/pkg/services/ngalert/notifier/autogen_alertmanager.go @@ -0,0 +1,185 @@ +package notifier + +import ( + "context" + "errors" + "fmt" + "slices" + + "github.com/grafana/grafana-plugin-sdk-go/data" + "github.com/prometheus/alertmanager/pkg/labels" + "github.com/prometheus/common/model" + "golang.org/x/exp/maps" + + "github.com/grafana/grafana/pkg/infra/log" + "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" + "github.com/grafana/grafana/pkg/services/ngalert/models" +) + +type autogenRuleStore interface { + ListNotificationSettings(ctx context.Context, q models.ListNotificationSettingsQuery) (map[models.AlertRuleKey][]models.NotificationSettings, error) +} + +// AddAutogenConfig creates the autogenerated configuration and adds it to the given apiAlertingConfig. +// If skipInvalid is true, then invalid notification settings are skipped, otherwise an error is returned. +func AddAutogenConfig[R receiver](ctx context.Context, logger log.Logger, store autogenRuleStore, orgId int64, cfg apiAlertingConfig[R], skipInvalid bool) error { + autogenRoute, err := newAutogeneratedRoute(ctx, logger, store, orgId, cfg, skipInvalid) + if err != nil { + return err + } + + err = autogenRoute.addToRoute(cfg.GetRoute()) + if err != nil { + return err + } + + return nil +} + +// newAutogeneratedRoute creates a new autogenerated route based on the notification settings for the given org. +// cfg is used to construct the settings validator and to ensure we create a dedicated route for each receiver. +// skipInvalid is used to skip invalid settings instead of returning an error. +func newAutogeneratedRoute[R receiver](ctx context.Context, logger log.Logger, store autogenRuleStore, orgId int64, cfg apiAlertingConfig[R], skipInvalid bool) (autogeneratedRoute, error) { + settings, err := store.ListNotificationSettings(ctx, models.ListNotificationSettingsQuery{OrgID: orgId}) + if err != nil { + return autogeneratedRoute{}, fmt.Errorf("failed to list alert rules: %w", err) + } + + notificationSettings := make(map[data.Fingerprint]models.NotificationSettings) + // Add a default notification setting for each contact point. This is to ensure that we always have a route for each + // contact point even if no rules are using it. This will prevent race conditions between AM sync and rule sync. + for _, receiver := range cfg.GetReceivers() { + setting := models.NewDefaultNotificationSettings(receiver.GetName()) + fp := setting.Fingerprint() + notificationSettings[fp] = setting + } + + validator := NewNotificationSettingsValidator(cfg) + for ruleKey, ruleSettings := range settings { + for _, setting := range ruleSettings { + // TODO we should register this errors and somehow present to the users or make sure the config is always valid. + if err = validator.Validate(setting); err != nil { + if skipInvalid { + logger.Error("Rule notification settings are invalid. Skipping", append(ruleKey.LogContext(), "error", err)...) + continue + } + return autogeneratedRoute{}, fmt.Errorf("invalid notification settings for rule %s: %w", ruleKey.UID, err) + } + fp := setting.Fingerprint() + // Keep only unique settings. + if _, ok := notificationSettings[fp]; ok { + continue + } + notificationSettings[fp] = setting + } + } + if len(notificationSettings) == 0 { + return autogeneratedRoute{}, nil + } + newAutogenRoute, err := generateRouteFromSettings(cfg.GetRoute().Receiver, notificationSettings) + if err != nil { + return autogeneratedRoute{}, fmt.Errorf("failed to create autogenerated route: %w", err) + } + return newAutogenRoute, nil +} + +type autogeneratedRoute struct { + Route *definitions.Route +} + +// generateRouteFromSettings generates a route and fingerprint for this route. The route is a tree of 3 layers: +// 1. with matcher by label models.AutogeneratedRouteLabel equals 'true'. +// 2. with matcher by receiver name. +// 3. with matcher by unique combination of optional settings. It is created only if there are optional settings. +func generateRouteFromSettings(defaultReceiver string, settings map[data.Fingerprint]models.NotificationSettings) (autogeneratedRoute, error) { + keys := maps.Keys(settings) + // sort keys to make sure that the hash we calculate using it is stable + slices.Sort(keys) + + rootMatcher, err := labels.NewMatcher(labels.MatchEqual, models.AutogeneratedRouteLabel, "true") + if err != nil { + return autogeneratedRoute{}, err + } + + autoGenRoot := &definitions.Route{ + Receiver: defaultReceiver, + ObjectMatchers: definitions.ObjectMatchers{rootMatcher}, + Continue: false, // We explicitly don't continue toward user-created routes if this matches. + } + + receiverRoutes := make(map[string]*definitions.Route) + for _, fingerprint := range keys { + s := settings[fingerprint] + receiverRoute, ok := receiverRoutes[s.Receiver] + if !ok { + contactMatcher, err := labels.NewMatcher(labels.MatchEqual, models.AutogeneratedRouteReceiverNameLabel, s.Receiver) + if err != nil { + return autogeneratedRoute{}, err + } + receiverRoute = &definitions.Route{ + Receiver: s.Receiver, + ObjectMatchers: definitions.ObjectMatchers{contactMatcher}, + Continue: false, + // Since we'll have many rules from different folders using this policy, we ensure it has these necessary groupings. + GroupByStr: []string{models.FolderTitleLabel, model.AlertNameLabel}, + } + receiverRoutes[s.Receiver] = receiverRoute + autoGenRoot.Routes = append(autoGenRoot.Routes, receiverRoute) + } + + // Do not create hash specific route if all group settings such as mute timings, group_wait, group_interval, etc are default + if s.IsAllDefault() { + continue + } + settingMatcher, err := labels.NewMatcher(labels.MatchEqual, models.AutogeneratedRouteSettingsHashLabel, fingerprint.String()) + if err != nil { + return autogeneratedRoute{}, err + } + receiverRoute.Routes = append(receiverRoute.Routes, &definitions.Route{ + Receiver: s.Receiver, + ObjectMatchers: definitions.ObjectMatchers{settingMatcher}, + Continue: false, // Only a single setting-specific route should match. + + GroupByStr: s.GroupBy, // Note: in order to pass validation at least FolderTitleLabel and AlertNameLabel are always included. + MuteTimeIntervals: s.MuteTimeIntervals, + GroupWait: s.GroupWait, + GroupInterval: s.GroupInterval, + RepeatInterval: s.RepeatInterval, + }) + } + + return autogeneratedRoute{ + Route: autoGenRoot, + }, nil +} + +// addToRoute adds this autogenerated route to the given route as the first top-level route under the root. +func (ar *autogeneratedRoute) addToRoute(route *definitions.Route) error { + if route == nil { + return errors.New("route does not exist") + } + if ar == nil || ar.Route == nil { + return nil + } + // Combine autogenerated route with the user-created route. + ar.Route.Receiver = route.Receiver + + // Remove existing autogenerated route if it exists. + RemoveAutogenConfigIfExists(route) + + route.Routes = append([]*definitions.Route{ar.Route}, route.Routes...) + return nil +} + +// RemoveAutogenConfigIfExists removes all top-level autogenerated routes from the provided route. +// If no autogenerated routes exist, this function does nothing. +func RemoveAutogenConfigIfExists(route *definitions.Route) { + route.Routes = slices.DeleteFunc(route.Routes, func(route *definitions.Route) bool { + return isAutogeneratedRoot(route) + }) +} + +// isAutogeneratedRoot returns true if the route is the root of an autogenerated route. +func isAutogeneratedRoot(route *definitions.Route) bool { + return len(route.ObjectMatchers) == 1 && route.ObjectMatchers[0].Name == models.AutogeneratedRouteLabel +} diff --git a/pkg/services/ngalert/notifier/autogen_alertmanager_test.go b/pkg/services/ngalert/notifier/autogen_alertmanager_test.go new file mode 100644 index 00000000000..6ac486dbeb3 --- /dev/null +++ b/pkg/services/ngalert/notifier/autogen_alertmanager_test.go @@ -0,0 +1,238 @@ +package notifier + +import ( + "context" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/prometheus/alertmanager/config" + "github.com/prometheus/alertmanager/pkg/labels" + "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" + + "github.com/grafana/grafana/pkg/infra/log/logtest" + "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" + "github.com/grafana/grafana/pkg/services/ngalert/models" + "github.com/grafana/grafana/pkg/util" +) + +func TestAddAutogenConfig(t *testing.T) { + rootRoute := func() *definitions.Route { + return &definitions.Route{ + Receiver: "default", + } + } + configGen := func(receivers []string, muteIntervals []string) *definitions.PostableApiAlertingConfig { + cfg := &definitions.PostableApiAlertingConfig{ + Config: definitions.Config{ + Route: rootRoute(), + }, + } + for _, receiver := range receivers { + cfg.Receivers = append(cfg.Receivers, &definitions.PostableApiReceiver{ + Receiver: config.Receiver{ + Name: receiver, + }, + }) + } + for _, muteInterval := range muteIntervals { + cfg.MuteTimeIntervals = append(cfg.MuteTimeIntervals, config.MuteTimeInterval{ + Name: muteInterval, + }) + } + return cfg + } + + withChildRoutes := func(route *definitions.Route, children ...*definitions.Route) *definitions.Route { + route.Routes = append(route.Routes, children...) + return route + } + + matcher := func(key, val string) definitions.ObjectMatchers { + m, err := labels.NewMatcher(labels.MatchEqual, key, val) + require.NoError(t, err) + return definitions.ObjectMatchers{m} + } + + basicContactRoute := func(receiver string) *definitions.Route { + return &definitions.Route{ + Receiver: receiver, + ObjectMatchers: matcher(models.AutogeneratedRouteReceiverNameLabel, receiver), + GroupByStr: []string{models.FolderTitleLabel, model.AlertNameLabel}, + } + } + + testCases := []struct { + name string + existingConfig *definitions.PostableApiAlertingConfig + storeSettings []models.NotificationSettings + skipInvalid bool + expRoute *definitions.Route + expErrorContains string + }{ + { + name: "no settings or receivers, no change", + existingConfig: configGen(nil, nil), + storeSettings: []models.NotificationSettings{}, + expRoute: rootRoute(), + }, + { + name: "no settings but some receivers, add default routes for receivers", + existingConfig: configGen([]string{"receiver1", "receiver2", "receiver3"}, nil), + storeSettings: []models.NotificationSettings{}, + expRoute: withChildRoutes(rootRoute(), &definitions.Route{ + Receiver: "default", + ObjectMatchers: matcher(models.AutogeneratedRouteLabel, "true"), + Routes: []*definitions.Route{ + basicContactRoute("receiver1"), + basicContactRoute("receiver3"), + basicContactRoute("receiver2"), + }, + }), + }, + { + name: "settings with no custom options, add default routes only", + existingConfig: configGen([]string{"receiver1", "receiver2", "receiver3"}, nil), + storeSettings: []models.NotificationSettings{models.NewDefaultNotificationSettings("receiver1"), models.NewDefaultNotificationSettings("receiver2")}, + expRoute: withChildRoutes(rootRoute(), &definitions.Route{ + Receiver: "default", + ObjectMatchers: matcher(models.AutogeneratedRouteLabel, "true"), + Routes: []*definitions.Route{ + basicContactRoute("receiver1"), + basicContactRoute("receiver3"), + basicContactRoute("receiver2"), + }, + }), + }, + { + name: "settings with custom options, add option-specific routes", + existingConfig: configGen([]string{"receiver1", "receiver2", "receiver3", "receiver4", "receiver5"}, []string{"maintenance"}), + storeSettings: []models.NotificationSettings{ + models.CopyNotificationSettings(models.NewDefaultNotificationSettings("receiver1"), models.NSMuts.WithGroupInterval(util.Pointer(1*time.Minute))), + models.CopyNotificationSettings(models.NewDefaultNotificationSettings("receiver2"), models.NSMuts.WithGroupWait(util.Pointer(2*time.Minute))), + models.CopyNotificationSettings(models.NewDefaultNotificationSettings("receiver3"), models.NSMuts.WithRepeatInterval(util.Pointer(3*time.Minute))), + models.CopyNotificationSettings(models.NewDefaultNotificationSettings("receiver4"), models.NSMuts.WithGroupBy(model.AlertNameLabel, models.FolderTitleLabel, "custom")), + models.CopyNotificationSettings(models.NewDefaultNotificationSettings("receiver5"), models.NSMuts.WithMuteTimeIntervals("maintenance")), + { + Receiver: "receiver1", + GroupBy: []string{model.AlertNameLabel, models.FolderTitleLabel, "custom"}, + GroupInterval: util.Pointer(model.Duration(1 * time.Minute)), + GroupWait: util.Pointer(model.Duration(2 * time.Minute)), + RepeatInterval: util.Pointer(model.Duration(3 * time.Minute)), + MuteTimeIntervals: []string{"maintenance"}, + }, + }, + expRoute: withChildRoutes(rootRoute(), &definitions.Route{ + Receiver: "default", + ObjectMatchers: matcher(models.AutogeneratedRouteLabel, "true"), + Routes: []*definitions.Route{ + withChildRoutes(basicContactRoute("receiver5"), &definitions.Route{ + Receiver: "receiver5", + ObjectMatchers: matcher(models.AutogeneratedRouteSettingsHashLabel, "030d6474aec0b553"), + MuteTimeIntervals: []string{"maintenance"}, + }), + withChildRoutes(basicContactRoute("receiver1"), &definitions.Route{ + Receiver: "receiver1", + ObjectMatchers: matcher(models.AutogeneratedRouteSettingsHashLabel, "dde34b8127e68f31"), + GroupInterval: util.Pointer(model.Duration(1 * time.Minute)), + }, &definitions.Route{ + Receiver: "receiver1", + ObjectMatchers: matcher(models.AutogeneratedRouteSettingsHashLabel, "ed4038c5d6733607"), + GroupByStr: []string{model.AlertNameLabel, models.FolderTitleLabel, "custom"}, + GroupInterval: util.Pointer(model.Duration(1 * time.Minute)), + GroupWait: util.Pointer(model.Duration(2 * time.Minute)), + RepeatInterval: util.Pointer(model.Duration(3 * time.Minute)), + MuteTimeIntervals: []string{"maintenance"}, + }), + withChildRoutes(basicContactRoute("receiver2"), &definitions.Route{ + Receiver: "receiver2", + ObjectMatchers: matcher(models.AutogeneratedRouteSettingsHashLabel, "27e1d1717c9ef621"), + GroupWait: util.Pointer(model.Duration(2 * time.Minute)), + }), + withChildRoutes(basicContactRoute("receiver4"), &definitions.Route{ + Receiver: "receiver4", + ObjectMatchers: matcher(models.AutogeneratedRouteSettingsHashLabel, "5e5ab8d592b12e86"), + GroupByStr: []string{model.AlertNameLabel, models.FolderTitleLabel, "custom"}, + }), + withChildRoutes(basicContactRoute("receiver3"), &definitions.Route{ + Receiver: "receiver3", + ObjectMatchers: matcher(models.AutogeneratedRouteSettingsHashLabel, "9e282ef0193d830a"), + RepeatInterval: util.Pointer(model.Duration(3 * time.Minute)), + }), + }, + }), + }, + { + name: "when skipInvalid=true, invalid settings are skipped", + existingConfig: configGen([]string{"receiver1", "receiver2", "receiver3"}, nil), + storeSettings: []models.NotificationSettings{ + models.NewDefaultNotificationSettings("receiverA"), // Doesn't exist. + models.CopyNotificationSettings(models.NewDefaultNotificationSettings("receiver1"), models.NSMuts.WithMuteTimeIntervals("maintenance")), // Doesn't exist. + models.CopyNotificationSettings(models.NewDefaultNotificationSettings("receiver2"), models.NSMuts.WithGroupWait(util.Pointer(-2*time.Minute))), // Negative. + }, + skipInvalid: true, + expRoute: withChildRoutes(rootRoute(), &definitions.Route{ + Receiver: "default", + ObjectMatchers: matcher(models.AutogeneratedRouteLabel, "true"), + Routes: []*definitions.Route{ + basicContactRoute("receiver1"), + basicContactRoute("receiver3"), + basicContactRoute("receiver2"), + }, + }), + }, + { + name: "when skipInvalid=false, invalid receiver throws error", + existingConfig: configGen([]string{"receiver1", "receiver2", "receiver3"}, nil), + storeSettings: []models.NotificationSettings{models.NewDefaultNotificationSettings("receiverA")}, + skipInvalid: false, + expErrorContains: "receiverA", + }, + { + name: "when skipInvalid=false, invalid settings throws error", + existingConfig: configGen([]string{"receiver1", "receiver2", "receiver3"}, nil), + storeSettings: []models.NotificationSettings{models.CopyNotificationSettings(models.NewDefaultNotificationSettings("receiver1"), models.NSMuts.WithMuteTimeIntervals("maintenance"))}, + skipInvalid: false, + expErrorContains: "maintenance", + }, + { + name: "when skipInvalid=false, invalid settings throws error", + existingConfig: configGen([]string{"receiver1", "receiver2", "receiver3"}, nil), + storeSettings: []models.NotificationSettings{models.CopyNotificationSettings(models.NewDefaultNotificationSettings("receiver2"), models.NSMuts.WithGroupWait(util.Pointer(-2*time.Minute)))}, + skipInvalid: false, + expErrorContains: "group wait", + }, + } + + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + orgId := int64(1) + store := &fakeConfigStore{ + notificationSettings: make(map[int64]map[models.AlertRuleKey][]models.NotificationSettings), + } + store.notificationSettings[orgId] = make(map[models.AlertRuleKey][]models.NotificationSettings) + + for _, setting := range tt.storeSettings { + store.notificationSettings[orgId][models.AlertRuleKey{OrgID: orgId, UID: util.GenerateShortUID()}] = []models.NotificationSettings{setting} + } + + err := AddAutogenConfig(context.Background(), &logtest.Fake{}, store, orgId, tt.existingConfig, tt.skipInvalid) + if tt.expErrorContains != "" { + require.Error(t, err) + require.ErrorContains(t, err, tt.expErrorContains) + return + } else { + require.NoError(t, err) + } + + cOpt := []cmp.Option{ + cmpopts.IgnoreUnexported(definitions.Route{}, labels.Matcher{}), + } + if !cmp.Equal(tt.expRoute, tt.existingConfig.Route, cOpt...) { + t.Errorf("Unexpected Route: %v", cmp.Diff(tt.expRoute, tt.existingConfig.Route, cOpt...)) + } + }) + } +} diff --git a/pkg/services/ngalert/notifier/multiorg_alertmanager.go b/pkg/services/ngalert/notifier/multiorg_alertmanager.go index 34394fec23d..89ffe28a845 100644 --- a/pkg/services/ngalert/notifier/multiorg_alertmanager.go +++ b/pkg/services/ngalert/notifier/multiorg_alertmanager.go @@ -16,6 +16,7 @@ import ( "github.com/grafana/grafana/pkg/infra/kvstore" "github.com/grafana/grafana/pkg/infra/log" + "github.com/grafana/grafana/pkg/services/featuremgmt" apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" "github.com/grafana/grafana/pkg/services/ngalert/metrics" "github.com/grafana/grafana/pkg/services/ngalert/models" @@ -67,8 +68,9 @@ type MultiOrgAlertmanager struct { alertmanagersMtx sync.RWMutex alertmanagers map[int64]Alertmanager - settings *setting.Cfg - logger log.Logger + settings *setting.Cfg + featureManager featuremgmt.FeatureToggles + logger log.Logger // clusterPeer represents the clustering peers of Alertmanagers between Grafana instances. peer alertingNotify.ClusterPeer @@ -95,24 +97,35 @@ func WithAlertmanagerOverride(f func(OrgAlertmanagerFactory) OrgAlertmanagerFact } } -func NewMultiOrgAlertmanager(cfg *setting.Cfg, configStore AlertingStore, orgStore store.OrgStore, - kvStore kvstore.KVStore, provStore provisioningStore, decryptFn alertingNotify.GetDecryptedValueFn, - m *metrics.MultiOrgAlertmanager, ns notifications.Service, l log.Logger, s secrets.Service, opts ...Option, +func NewMultiOrgAlertmanager( + cfg *setting.Cfg, + configStore AlertingStore, + orgStore store.OrgStore, + kvStore kvstore.KVStore, + provStore provisioningStore, + decryptFn alertingNotify.GetDecryptedValueFn, + m *metrics.MultiOrgAlertmanager, + ns notifications.Service, + l log.Logger, + s secrets.Service, + featureManager featuremgmt.FeatureToggles, + opts ...Option, ) (*MultiOrgAlertmanager, error) { moa := &MultiOrgAlertmanager{ Crypto: NewCrypto(s, configStore, l), ProvStore: provStore, - logger: l, - settings: cfg, - alertmanagers: map[int64]Alertmanager{}, - configStore: configStore, - orgStore: orgStore, - kvStore: kvStore, - decryptFn: decryptFn, - metrics: m, - ns: ns, - peer: &NilPeer{}, + logger: l, + settings: cfg, + featureManager: featureManager, + alertmanagers: map[int64]Alertmanager{}, + configStore: configStore, + orgStore: orgStore, + kvStore: kvStore, + decryptFn: decryptFn, + metrics: m, + ns: ns, + peer: &NilPeer{}, } if err := moa.setupClustering(cfg); err != nil { @@ -122,7 +135,7 @@ func NewMultiOrgAlertmanager(cfg *setting.Cfg, configStore AlertingStore, orgSto // Set up the default per tenant Alertmanager factory. moa.factory = func(ctx context.Context, orgID int64) (Alertmanager, error) { m := metrics.NewAlertmanagerMetrics(moa.metrics.GetOrCreateOrgRegistry(orgID)) - return NewAlertmanager(ctx, orgID, moa.settings, moa.configStore, moa.kvStore, moa.peer, moa.decryptFn, moa.ns, m) + return NewAlertmanager(ctx, orgID, moa.settings, moa.configStore, moa.kvStore, moa.peer, moa.decryptFn, moa.ns, m, featureManager.IsEnabled(ctx, featuremgmt.FlagAlertingSimplifiedRouting)) } for _, opt := range opts { diff --git a/pkg/services/ngalert/notifier/multiorg_alertmanager_remote_test.go b/pkg/services/ngalert/notifier/multiorg_alertmanager_remote_test.go index 827173630c0..603cfb7e654 100644 --- a/pkg/services/ngalert/notifier/multiorg_alertmanager_remote_test.go +++ b/pkg/services/ngalert/notifier/multiorg_alertmanager_remote_test.go @@ -9,6 +9,7 @@ import ( "time" "github.com/grafana/grafana/pkg/infra/log" + "github.com/grafana/grafana/pkg/services/featuremgmt" "github.com/grafana/grafana/pkg/services/ngalert/metrics" "github.com/grafana/grafana/pkg/services/ngalert/models" "github.com/grafana/grafana/pkg/services/ngalert/notifier" @@ -98,6 +99,7 @@ func TestMultiorgAlertmanager_RemoteSecondaryMode(t *testing.T) { nil, nopLogger, secretsService, + &featuremgmt.FeatureManager{}, override, ) require.NoError(t, err) diff --git a/pkg/services/ngalert/notifier/multiorg_alertmanager_test.go b/pkg/services/ngalert/notifier/multiorg_alertmanager_test.go index 0698e5701be..569c3a4579c 100644 --- a/pkg/services/ngalert/notifier/multiorg_alertmanager_test.go +++ b/pkg/services/ngalert/notifier/multiorg_alertmanager_test.go @@ -15,6 +15,7 @@ import ( "github.com/stretchr/testify/require" "github.com/grafana/grafana/pkg/infra/log" + "github.com/grafana/grafana/pkg/services/featuremgmt" "github.com/grafana/grafana/pkg/services/ngalert/metrics" "github.com/grafana/grafana/pkg/services/ngalert/models" "github.com/grafana/grafana/pkg/services/ngalert/store" @@ -45,7 +46,7 @@ func TestMultiOrgAlertmanager_SyncAlertmanagersForOrgs(t *testing.T) { DisabledOrgs: map[int64]struct{}{5: {}}, }, // do not poll in tests. } - mam, err := NewMultiOrgAlertmanager(cfg, configStore, orgStore, kvStore, provStore, decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService) + mam, err := NewMultiOrgAlertmanager(cfg, configStore, orgStore, kvStore, provStore, decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService, &featuremgmt.FeatureManager{}) require.NoError(t, err) ctx := context.Background() @@ -178,7 +179,7 @@ func TestMultiOrgAlertmanager_SyncAlertmanagersForOrgsWithFailures(t *testing.T) DefaultConfiguration: setting.GetAlertmanagerDefaultConfiguration(), }, // do not poll in tests. } - mam, err := NewMultiOrgAlertmanager(cfg, configStore, orgStore, kvStore, provStore, decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService) + mam, err := NewMultiOrgAlertmanager(cfg, configStore, orgStore, kvStore, provStore, decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService, &featuremgmt.FeatureManager{}) require.NoError(t, err) ctx := context.Background() @@ -265,7 +266,7 @@ func TestMultiOrgAlertmanager_AlertmanagerFor(t *testing.T) { decryptFn := secretsService.GetDecryptedValue reg := prometheus.NewPedanticRegistry() m := metrics.NewNGAlert(reg) - mam, err := NewMultiOrgAlertmanager(cfg, configStore, orgStore, kvStore, provStore, decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService) + mam, err := NewMultiOrgAlertmanager(cfg, configStore, orgStore, kvStore, provStore, decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService, &featuremgmt.FeatureManager{}) require.NoError(t, err) ctx := context.Background() @@ -317,7 +318,7 @@ func TestMultiOrgAlertmanager_ActivateHistoricalConfiguration(t *testing.T) { decryptFn := secretsService.GetDecryptedValue reg := prometheus.NewPedanticRegistry() m := metrics.NewNGAlert(reg) - mam, err := NewMultiOrgAlertmanager(cfg, configStore, orgStore, kvStore, provStore, decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService) + mam, err := NewMultiOrgAlertmanager(cfg, configStore, orgStore, kvStore, provStore, decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService, &featuremgmt.FeatureManager{}) require.NoError(t, err) ctx := context.Background() diff --git a/pkg/services/ngalert/notifier/testing.go b/pkg/services/ngalert/notifier/testing.go index 999c5b24c0c..6e9d30fd2a7 100644 --- a/pkg/services/ngalert/notifier/testing.go +++ b/pkg/services/ngalert/notifier/testing.go @@ -19,6 +19,35 @@ type fakeConfigStore struct { // historicConfigs stores configs by orgID. historicConfigs map[int64][]*models.HistoricAlertConfiguration + + // notificationSettings stores notification settings by orgID. + notificationSettings map[int64]map[models.AlertRuleKey][]models.NotificationSettings +} + +func (f *fakeConfigStore) ListNotificationSettings(ctx context.Context, q models.ListNotificationSettingsQuery) (map[models.AlertRuleKey][]models.NotificationSettings, error) { + settings, ok := f.notificationSettings[q.OrgID] + if !ok { + return nil, nil + } + if q.ReceiverName != "" { + filteredSettings := make(map[models.AlertRuleKey][]models.NotificationSettings) + for key, notificationSettings := range settings { + // Current semantics is that we only key entries where any of the settings match the receiver name. + var found bool + for _, setting := range notificationSettings { + if q.ReceiverName == setting.Receiver { + found = true + break + } + } + if found { + filteredSettings[key] = notificationSettings + } + } + return filteredSettings, nil + } + + return settings, nil } // Saves the image or returns an error. @@ -199,3 +228,10 @@ type fakeState struct { func (fs *fakeState) MarshalBinary() ([]byte, error) { return []byte(fs.data), nil } + +type NoValidation struct { +} + +func (n NoValidation) Validate(_ models.NotificationSettings) error { + return nil +} diff --git a/pkg/services/ngalert/notifier/validation.go b/pkg/services/ngalert/notifier/validation.go new file mode 100644 index 00000000000..1f2cad0b01a --- /dev/null +++ b/pkg/services/ngalert/notifier/validation.go @@ -0,0 +1,132 @@ +package notifier + +import ( + "context" + "errors" + "fmt" + "sync" + + "github.com/grafana/grafana/pkg/infra/log" + "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" + "github.com/grafana/grafana/pkg/services/ngalert/models" + "github.com/grafana/grafana/pkg/services/ngalert/store" + "github.com/prometheus/alertmanager/config" +) + +// NotificationSettingsValidator validates NotificationSettings against the current Alertmanager configuration +type NotificationSettingsValidator interface { + Validate(s models.NotificationSettings) error +} + +// staticValidator is a NotificationSettingsValidator that uses static pre-fetched values for available receivers and mute timings. +type staticValidator struct { + availableReceivers map[string]struct{} + availableMuteTimings map[string]struct{} +} + +// apiAlertingConfig contains the methods required to validate NotificationSettings and create autogen routes. +type apiAlertingConfig[R receiver] interface { + GetReceivers() []R + GetMuteTimeIntervals() []config.MuteTimeInterval + GetRoute() *definitions.Route +} + +type receiver interface { + GetName() string +} + +// NewNotificationSettingsValidator creates a new NotificationSettingsValidator from the given apiAlertingConfig. +func NewNotificationSettingsValidator[R receiver](am apiAlertingConfig[R]) NotificationSettingsValidator { + availableReceivers := make(map[string]struct{}) + for _, receiver := range am.GetReceivers() { + availableReceivers[receiver.GetName()] = struct{}{} + } + + availableMuteTimings := make(map[string]struct{}) + for _, interval := range am.GetMuteTimeIntervals() { + availableMuteTimings[interval.Name] = struct{}{} + } + + return staticValidator{ + availableReceivers: availableReceivers, + availableMuteTimings: availableMuteTimings, + } +} + +// Validate checks that models.NotificationSettings is valid and references existing receivers and mute timings. +func (n staticValidator) Validate(settings models.NotificationSettings) error { + if err := settings.Validate(); err != nil { + return err + } + var errs []error + if _, ok := n.availableReceivers[settings.Receiver]; !ok { + errs = append(errs, fmt.Errorf("receiver '%s' does not exist", settings.Receiver)) + } + for _, interval := range settings.MuteTimeIntervals { + if _, ok := n.availableMuteTimings[interval]; !ok { + errs = append(errs, fmt.Errorf("mute time interval '%s' does not exist", interval)) + } + } + return errors.Join(errs...) +} + +// NotificationSettingsValidatorProvider provides a NotificationSettingsValidator for a given orgID. +type NotificationSettingsValidatorProvider interface { + Validator(ctx context.Context, orgID int64) (NotificationSettingsValidator, error) +} + +// notificationSettingsValidationService provides a new NotificationSettingsValidator for a given orgID by loading the latest Alertmanager configuration. +type notificationSettingsValidationService struct { + store store.AlertingStore +} + +func NewNotificationSettingsValidationService(store store.AlertingStore) NotificationSettingsValidatorProvider { + return ¬ificationSettingsValidationService{ + store: store, + } +} + +// Validator returns a NotificationSettingsValidator using the alertmanager configuration from the given orgID. +func (v *notificationSettingsValidationService) Validator(ctx context.Context, orgID int64) (NotificationSettingsValidator, error) { + rawCfg, err := v.store.GetLatestAlertmanagerConfiguration(ctx, orgID) + if err != nil { + return staticValidator{}, err + } + cfg, err := Load([]byte(rawCfg.AlertmanagerConfiguration)) + if err != nil { + return staticValidator{}, err + } + log.New("ngalert.notifier.validator").FromContext(ctx).Debug("Create validator from Alertmanager configuration", "hash", rawCfg.ConfigurationHash) + return NewNotificationSettingsValidator(&cfg.AlertmanagerConfig), nil +} + +type cachedNotificationSettingsValidationService struct { + srv NotificationSettingsValidatorProvider + mtx sync.Mutex + validators map[int64]NotificationSettingsValidator +} + +func NewCachedNotificationSettingsValidationService(store store.AlertingStore) NotificationSettingsValidatorProvider { + return &cachedNotificationSettingsValidationService{ + srv: NewNotificationSettingsValidationService(store), + mtx: sync.Mutex{}, + validators: map[int64]NotificationSettingsValidator{}, + } +} + +// Validator returns a NotificationSettingsValidator using the alertmanager configuration from the given orgID. +func (v *cachedNotificationSettingsValidationService) Validator(ctx context.Context, orgID int64) (NotificationSettingsValidator, error) { + v.mtx.Lock() + defer v.mtx.Unlock() + + result, ok := v.validators[orgID] + if !ok { + vd, err := v.srv.Validator(ctx, orgID) + if err != nil { + return nil, err + } + v.validators[orgID] = vd + result = vd + } + return result, nil +} diff --git a/pkg/services/ngalert/provisioning/alert_rules.go b/pkg/services/ngalert/provisioning/alert_rules.go index c83eee732ab..bf7e93b3a1a 100644 --- a/pkg/services/ngalert/provisioning/alert_rules.go +++ b/pkg/services/ngalert/provisioning/alert_rules.go @@ -9,11 +9,16 @@ import ( "github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/services/dashboards" "github.com/grafana/grafana/pkg/services/ngalert/models" + "github.com/grafana/grafana/pkg/services/ngalert/notifier" "github.com/grafana/grafana/pkg/services/ngalert/store" "github.com/grafana/grafana/pkg/services/quota" "github.com/grafana/grafana/pkg/util" ) +type NotificationSettingsValidatorProvider interface { + Validator(ctx context.Context, orgID int64) (notifier.NotificationSettingsValidator, error) +} + type AlertRuleService struct { defaultIntervalSeconds int64 baseIntervalSeconds int64 @@ -24,6 +29,7 @@ type AlertRuleService struct { quotas QuotaChecker xact TransactionManager log log.Logger + nsValidatorProvider NotificationSettingsValidatorProvider } func NewAlertRuleService(ruleStore RuleStore, @@ -34,7 +40,9 @@ func NewAlertRuleService(ruleStore RuleStore, defaultIntervalSeconds int64, baseIntervalSeconds int64, rulesPerRuleGroupLimit int64, - log log.Logger) *AlertRuleService { + log log.Logger, + ns NotificationSettingsValidatorProvider, +) *AlertRuleService { return &AlertRuleService{ defaultIntervalSeconds: defaultIntervalSeconds, baseIntervalSeconds: baseIntervalSeconds, @@ -45,6 +53,7 @@ func NewAlertRuleService(ruleStore RuleStore, quotas: quotas, xact: xact, log: log, + nsValidatorProvider: ns, } } @@ -137,6 +146,17 @@ func (service *AlertRuleService) CreateAlertRule(ctx context.Context, rule model return models.AlertRule{}, err } rule.Updated = time.Now() + if len(rule.NotificationSettings) > 0 { + validator, err := service.nsValidatorProvider.Validator(ctx, rule.OrgID) + if err != nil { + return models.AlertRule{}, err + } + for _, setting := range rule.NotificationSettings { + if err := validator.Validate(setting); err != nil { + return models.AlertRule{}, err + } + } + } err = service.xact.InTransaction(ctx, func(ctx context.Context) error { ids, err := service.ruleStore.InsertAlertRules(ctx, []models.AlertRule{ rule, @@ -231,6 +251,32 @@ func (service *AlertRuleService) ReplaceRuleGroup(ctx context.Context, orgID int return err } + delta, err := service.calcDelta(ctx, orgID, group) + if err != nil { + return err + } + + if len(delta.New) == 0 && len(delta.Update) == 0 && len(delta.Delete) == 0 { + return nil + } + + newOrUpdatedNotificationSettings := delta.NewOrUpdatedNotificationSettings() + if len(newOrUpdatedNotificationSettings) > 0 { + validator, err := service.nsValidatorProvider.Validator(ctx, delta.GroupKey.OrgID) + if err != nil { + return err + } + for _, s := range newOrUpdatedNotificationSettings { + if err := validator.Validate(s); err != nil { + return errors.Join(models.ErrAlertRuleFailedValidation, err) + } + } + } + + return service.persistDelta(ctx, orgID, delta, userID, provenance) +} + +func (service *AlertRuleService) calcDelta(ctx context.Context, orgID int64, group models.AlertRuleGroup) (*store.GroupDelta, error) { // If the provided request did not provide the rules list at all, treat it as though it does not wish to change rules. // This is done for backwards compatibility. Requests which specify only the interval must update only the interval. if group.Rules == nil { @@ -241,7 +287,7 @@ func (service *AlertRuleService) ReplaceRuleGroup(ctx context.Context, orgID int } ruleList, err := service.ruleStore.ListAlertRules(ctx, &listRulesQuery) if err != nil { - return fmt.Errorf("failed to list alert rules: %w", err) + return nil, fmt.Errorf("failed to list alert rules: %w", err) } group.Rules = make([]models.AlertRule, 0, len(ruleList)) for _, r := range ruleList { @@ -252,7 +298,7 @@ func (service *AlertRuleService) ReplaceRuleGroup(ctx context.Context, orgID int } if err := service.checkGroupLimits(group); err != nil { - return fmt.Errorf("write rejected due to exceeded limits: %w", err) + return nil, fmt.Errorf("write rejected due to exceeded limits: %w", err) } key := models.AlertRuleGroupKey{ @@ -264,22 +310,20 @@ func (service *AlertRuleService) ReplaceRuleGroup(ctx context.Context, orgID int group = *syncGroupRuleFields(&group, orgID) for i := range group.Rules { if err := group.Rules[i].SetDashboardAndPanelFromAnnotations(); err != nil { - return err + return nil, err } rules = append(rules, &models.AlertRuleWithOptionals{AlertRule: group.Rules[i], HasPause: true}) } delta, err := store.CalculateChanges(ctx, service.ruleStore, key, rules) if err != nil { - return fmt.Errorf("failed to calculate diff for alert rules: %w", err) + return nil, fmt.Errorf("failed to calculate diff for alert rules: %w", err) } // Refresh all calculated fields across all rules. - delta = store.UpdateCalculatedRuleFields(delta) - - if len(delta.New) == 0 && len(delta.Update) == 0 && len(delta.Delete) == 0 { - return nil - } + return store.UpdateCalculatedRuleFields(delta), nil +} +func (service *AlertRuleService) persistDelta(ctx context.Context, orgID int64, delta *store.GroupDelta, userID int64, provenance models.Provenance) error { return service.xact.InTransaction(ctx, func(ctx context.Context) error { // Delete first as this could prevent future unique constraint violations. if len(delta.Delete) > 0 { @@ -314,7 +358,7 @@ func (service *AlertRuleService) ReplaceRuleGroup(ctx context.Context, orgID int New: *update.New, }) } - if err = service.ruleStore.UpdateAlertRules(ctx, updates); err != nil { + if err := service.ruleStore.UpdateAlertRules(ctx, updates); err != nil { return fmt.Errorf("failed to update alert rules: %w", err) } for _, update := range delta.Update { @@ -336,7 +380,7 @@ func (service *AlertRuleService) ReplaceRuleGroup(ctx context.Context, orgID int } } - if err = service.checkLimitsTransactionCtx(ctx, orgID, userID); err != nil { + if err := service.checkLimitsTransactionCtx(ctx, orgID, userID); err != nil { return err } @@ -353,6 +397,17 @@ func (service *AlertRuleService) UpdateAlertRule(ctx context.Context, rule model if storedProvenance != provenance && storedProvenance != models.ProvenanceNone { return models.AlertRule{}, fmt.Errorf("cannot change provenance from '%s' to '%s'", storedProvenance, provenance) } + if len(rule.NotificationSettings) > 0 { + validator, err := service.nsValidatorProvider.Validator(ctx, rule.OrgID) + if err != nil { + return models.AlertRule{}, err + } + for _, setting := range rule.NotificationSettings { + if err := validator.Validate(setting); err != nil { + return models.AlertRule{}, err + } + } + } rule.Updated = time.Now() rule.ID = storedRule.ID rule.IntervalSeconds = storedRule.IntervalSeconds diff --git a/pkg/services/ngalert/provisioning/contactpoints.go b/pkg/services/ngalert/provisioning/contactpoints.go index 92142fb363c..c9886e97cb2 100644 --- a/pkg/services/ngalert/provisioning/contactpoints.go +++ b/pkg/services/ngalert/provisioning/contactpoints.go @@ -22,13 +22,19 @@ import ( "github.com/grafana/grafana/pkg/util" ) +type AlertRuleNotificationSettingsStore interface { + RenameReceiverInNotificationSettings(ctx context.Context, orgID int64, oldReceiver, newReceiver string) (int, error) + ListNotificationSettings(ctx context.Context, q models.ListNotificationSettingsQuery) (map[models.AlertRuleKey][]models.NotificationSettings, error) +} + type ContactPointService struct { - configStore *alertmanagerConfigStoreImpl - encryptionService secrets.Service - provenanceStore ProvisioningStore - xact TransactionManager - receiverService receiverService - log log.Logger + configStore *alertmanagerConfigStoreImpl + encryptionService secrets.Service + provenanceStore ProvisioningStore + notificationSettingsStore AlertRuleNotificationSettingsStore + xact TransactionManager + receiverService receiverService + log log.Logger } type receiverService interface { @@ -36,16 +42,18 @@ type receiverService interface { } func NewContactPointService(store AMConfigStore, encryptionService secrets.Service, - provenanceStore ProvisioningStore, xact TransactionManager, receiverService receiverService, log log.Logger) *ContactPointService { + provenanceStore ProvisioningStore, xact TransactionManager, receiverService receiverService, log log.Logger, + nsStore AlertRuleNotificationSettingsStore) *ContactPointService { return &ContactPointService{ configStore: &alertmanagerConfigStoreImpl{ store: store, }, - receiverService: receiverService, - encryptionService: encryptionService, - provenanceStore: provenanceStore, - xact: xact, - log: log, + receiverService: receiverService, + encryptionService: encryptionService, + provenanceStore: provenanceStore, + xact: xact, + log: log, + notificationSettingsStore: nsStore, } } @@ -277,7 +285,7 @@ func (ecp *ContactPointService) UpdateContactPoint(ctx context.Context, orgID in return err } - configModified := stitchReceiver(revision.cfg, mergedReceiver) + configModified, renamedReceiver := stitchReceiver(revision.cfg, mergedReceiver) if !configModified { return fmt.Errorf("contact point with uid '%s' not found", mergedReceiver.UID) } @@ -286,6 +294,15 @@ func (ecp *ContactPointService) UpdateContactPoint(ctx context.Context, orgID in if err := ecp.configStore.Save(ctx, revision, orgID); err != nil { return err } + if renamedReceiver != "" && renamedReceiver != mergedReceiver.Name { + affected, err := ecp.notificationSettingsStore.RenameReceiverInNotificationSettings(ctx, orgID, renamedReceiver, mergedReceiver.Name) + if err != nil { + return err + } + if affected > 0 { + ecp.log.Info("Renamed receiver in notification settings", "oldName", renamedReceiver, "newName", mergedReceiver.Name, "affectedSettings", affected) + } + } return ecp.provenanceStore.SetProvenance(ctx, &contactPoint, orgID, provenance) }) if err != nil { @@ -325,6 +342,21 @@ func (ecp *ContactPointService) DeleteContactPoint(ctx context.Context, orgID in } return ecp.xact.InTransaction(ctx, func(ctx context.Context) error { + if fullRemoval { + used, err := ecp.notificationSettingsStore.ListNotificationSettings(ctx, models.ListNotificationSettingsQuery{OrgID: orgID, ReceiverName: name}) + if err != nil { + return fmt.Errorf("failed to query alert rules for reference to the contact point '%s': %w", name, err) + } + if len(used) > 0 { + uids := make([]string, 0, len(used)) + for key := range used { + uids = append(uids, key.UID) + } + ecp.log.Error("Cannot delete contact point because it is used in rule's notification settings", "receiverName", name, "rulesUid", strings.Join(uids, ",")) + return fmt.Errorf("contact point '%s' is currently used in notification settings by one or many alert rules", name) + } + } + if err := ecp.configStore.Save(ctx, revision, orgID); err != nil { return err } @@ -383,10 +415,12 @@ func (ecp *ContactPointService) encryptValue(value string) (string, error) { // stitchReceiver modifies a receiver, target, in an alertmanager configStore. It modifies the given configStore in-place. // Returns true if the configStore was altered in any way, and false otherwise. -func stitchReceiver(cfg *apimodels.PostableUserConfig, target *apimodels.PostableGrafanaReceiver) bool { +// If integration was moved to another group and it was the last in the previous group, the second parameter contains the name of the old group that is gone +func stitchReceiver(cfg *apimodels.PostableUserConfig, target *apimodels.PostableGrafanaReceiver) (bool, string) { // Algorithm to fix up receivers. Receivers are very complex and depend heavily on internal consistency. // All receivers in a given receiver group have the same name. We must maintain this across renames. configModified := false + renamedReceiver := "" groupLoop: for groupIdx, receiverGroup := range cfg.AlertmanagerConfig.Receivers { // Does the current group contain the grafana receiver we're interested in? @@ -411,6 +445,7 @@ groupLoop: replaceReferences(receiverGroup.Name, target.Name, cfg.AlertmanagerConfig.Route) receiverGroup.Name = target.Name receiverGroup.GrafanaManagedReceivers[i] = target + renamedReceiver = receiverGroup.Name } // Otherwise, we only want to rename the receiver we are touching... NOT all of them. @@ -452,7 +487,7 @@ groupLoop: } } - return configModified + return configModified, renamedReceiver } func replaceReferences(oldName, newName string, routes ...*apimodels.Route) { diff --git a/pkg/services/ngalert/provisioning/contactpoints_test.go b/pkg/services/ngalert/provisioning/contactpoints_test.go index fb46846630b..995b9bb6c47 100644 --- a/pkg/services/ngalert/provisioning/contactpoints_test.go +++ b/pkg/services/ngalert/provisioning/contactpoints_test.go @@ -8,6 +8,7 @@ import ( "testing" "github.com/prometheus/alertmanager/config" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/grafana/grafana/pkg/components/simplejson" @@ -405,11 +406,12 @@ func createEncryptedConfig(t *testing.T, secretService secrets.Service) string { func TestStitchReceivers(t *testing.T) { type testCase struct { - name string - initial *definitions.PostableUserConfig - new *definitions.PostableGrafanaReceiver - expModified bool - expCfg definitions.PostableApiAlertingConfig + name string + initial *definitions.PostableUserConfig + new *definitions.PostableGrafanaReceiver + expModified bool + expCfg definitions.PostableApiAlertingConfig + expRenamedReceiver string } cases := []testCase{ @@ -489,7 +491,8 @@ func TestStitchReceivers(t *testing.T) { Name: "new-receiver", Type: "slack", }, - expModified: true, + expModified: true, + expRenamedReceiver: "new-receiver", expCfg: definitions.PostableApiAlertingConfig{ Config: definitions.Config{ Route: &definitions.Route{ @@ -1090,7 +1093,8 @@ func TestStitchReceivers(t *testing.T) { Name: "receiver-1", Type: "slack", }, - expModified: true, + expModified: true, + expRenamedReceiver: "receiver-1", expCfg: definitions.PostableApiAlertingConfig{ Config: definitions.Config{ Route: &definitions.Route{ @@ -1142,8 +1146,12 @@ func TestStitchReceivers(t *testing.T) { cfg = c.initial } - modified := stitchReceiver(cfg, c.new) - + modified, renamedReceiver := stitchReceiver(cfg, c.new) + if c.expRenamedReceiver != "" { + assert.Equal(t, c.expRenamedReceiver, renamedReceiver) + } else { + assert.Empty(t, renamedReceiver) + } require.Equal(t, c.expModified, modified) require.Equal(t, c.expCfg, cfg.AlertmanagerConfig) }) diff --git a/pkg/services/ngalert/provisioning/testing.go b/pkg/services/ngalert/provisioning/testing.go index fa36058e99d..921da0498a8 100644 --- a/pkg/services/ngalert/provisioning/testing.go +++ b/pkg/services/ngalert/provisioning/testing.go @@ -8,6 +8,7 @@ import ( mock "github.com/stretchr/testify/mock" "github.com/grafana/grafana/pkg/services/ngalert/models" + "github.com/grafana/grafana/pkg/services/ngalert/notifier" ) const defaultAlertmanagerConfigJSON = ` @@ -139,3 +140,10 @@ func (a *alertmanagerConfigStoreFake) Save(ctx context.Context, revision *cfgRev } return nil } + +type NotificationSettingsValidatorProviderFake struct { +} + +func (n *NotificationSettingsValidatorProviderFake) Validator(ctx context.Context, orgID int64) (notifier.NotificationSettingsValidator, error) { + return notifier.NoValidation{}, nil +} diff --git a/pkg/services/ngalert/schedule/registry.go b/pkg/services/ngalert/schedule/registry.go index 24c030cc7ab..ad7f403ad35 100644 --- a/pkg/services/ngalert/schedule/registry.go +++ b/pkg/services/ngalert/schedule/registry.go @@ -336,6 +336,11 @@ func (r ruleWithFolder) Fingerprint() fingerprint { writeInt(0) } + for _, setting := range rule.NotificationSettings { + binary.LittleEndian.PutUint64(tmp, uint64(setting.Fingerprint())) + writeBytes(tmp) + } + // fields that do not affect the state. // TODO consider removing fields below from the fingerprint writeInt(rule.ID) diff --git a/pkg/services/ngalert/schedule/registry_test.go b/pkg/services/ngalert/schedule/registry_test.go index 7aa080a7ce9..f812bf7a9de 100644 --- a/pkg/services/ngalert/schedule/registry_test.go +++ b/pkg/services/ngalert/schedule/registry_test.go @@ -415,6 +415,9 @@ func TestRuleWithFolderFingerprint(t *testing.T) { "key-label": "value-label", }, IsPaused: false, + NotificationSettings: []models.NotificationSettings{ + models.NotificationSettingsGen()(), + }, } r2 := &models.AlertRule{ ID: 2, @@ -450,6 +453,9 @@ func TestRuleWithFolderFingerprint(t *testing.T) { "key-label": "value-label23", }, IsPaused: true, + NotificationSettings: []models.NotificationSettings{ + models.NotificationSettingsGen()(), + }, } excludedFields := map[string]struct{}{ diff --git a/pkg/services/ngalert/schedule/schedule.go b/pkg/services/ngalert/schedule/schedule.go index d6a87775eac..500bd278d6e 100644 --- a/pkg/services/ngalert/schedule/schedule.go +++ b/pkg/services/ngalert/schedule/schedule.go @@ -450,7 +450,7 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertR e.scheduledAt, e.rule, results, - state.GetRuleExtraLabels(e.rule, e.folderTitle, !sch.disableGrafanaFolder), + state.GetRuleExtraLabels(logger, e.rule, e.folderTitle, !sch.disableGrafanaFolder), ) processDuration.Observe(sch.clock.Now().Sub(start).Seconds()) diff --git a/pkg/services/ngalert/sender/router_test.go b/pkg/services/ngalert/sender/router_test.go index 505a4f038bb..d3b88880f9a 100644 --- a/pkg/services/ngalert/sender/router_test.go +++ b/pkg/services/ngalert/sender/router_test.go @@ -19,6 +19,7 @@ import ( "github.com/grafana/grafana/pkg/infra/log/logtest" "github.com/grafana/grafana/pkg/services/datasources" fake_ds "github.com/grafana/grafana/pkg/services/datasources/fakes" + "github.com/grafana/grafana/pkg/services/featuremgmt" "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" "github.com/grafana/grafana/pkg/services/ngalert/metrics" "github.com/grafana/grafana/pkg/services/ngalert/models" @@ -410,7 +411,7 @@ func createMultiOrgAlertmanager(t *testing.T, orgs []int64) *notifier.MultiOrgAl m := metrics.NewNGAlert(registry) secretsService := secretsManager.SetupTestService(t, fake_secrets.NewFakeSecretsStore()) decryptFn := secretsService.GetDecryptedValue - moa, err := notifier.NewMultiOrgAlertmanager(cfg, cfgStore, orgStore, kvStore, fakes.NewFakeProvisioningStore(), decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService) + moa, err := notifier.NewMultiOrgAlertmanager(cfg, cfgStore, orgStore, kvStore, fakes.NewFakeProvisioningStore(), decryptFn, m.GetMultiOrgAlertmanagerMetrics(), nil, log.New("testlogger"), secretsService, &featuremgmt.FeatureManager{}) require.NoError(t, err) require.NoError(t, moa.LoadAndSyncAlertmanagersForOrgs(context.Background())) require.Eventually(t, func() bool { diff --git a/pkg/services/ngalert/state/cache.go b/pkg/services/ngalert/state/cache.go index d9bfbca4dda..ae82381310e 100644 --- a/pkg/services/ngalert/state/cache.go +++ b/pkg/services/ngalert/state/cache.go @@ -121,9 +121,36 @@ func (rs *ruleStates) getOrAdd(stateCandidate State) *State { } func calculateState(ctx context.Context, log log.Logger, alertRule *ngModels.AlertRule, result eval.Result, extraLabels data.Labels, externalURL *url.URL) State { + var reserved []string + resultLabels := result.Instance + if len(resultLabels) > 0 { + for key := range ngModels.LabelsUserCannotSpecify { + if value, ok := resultLabels[key]; ok { + if reserved == nil { // make a copy of labels if we are going to modify it + resultLabels = result.Instance.Copy() + } + reserved = append(reserved, key) + delete(resultLabels, key) + // we cannot delete the reserved label completely because it can cause alert instances to collide (when this label is only unique across results) + // so we just rename it to something that does not collide with reserved labels + newKey := strings.TrimSuffix(strings.TrimPrefix(key, "__"), "__") + if _, ok = resultLabels[newKey]; newKey == "" || newKey == key || ok { // in the case if in the future the LabelsUserCannotSpecify contains labels that do not have double underscore + newKey = key + "_user" + } + if _, ok = resultLabels[newKey]; !ok { // if it still collides with another existing label, we just drop the label + resultLabels[newKey] = value + } else { + log.Warn("Result contains reserved label, and, after renaming, a new label collides with an existing one. Removing the label completely", "deletedLabel", key, "renamedLabel", newKey) + } + } + } + if len(reserved) > 0 { + log.Debug("Found collision of result labels and system reserved. Renamed labels with suffix '_user'", "renamedLabels", strings.Join(reserved, ",")) + } + } // Merge both the extra labels and the labels from the evaluation into a common set // of labels that can be expanded in custom labels and annotations. - templateData := template.NewData(mergeLabels(extraLabels, result.Instance), result) + templateData := template.NewData(mergeLabels(extraLabels, resultLabels), result) // For now, do nothing with these errors as they are already logged in expand. // In the future, we want to show these errors to the user somehow. @@ -139,7 +166,7 @@ func calculateState(ctx context.Context, log log.Logger, alertRule *ngModels.Ale } } - lbs := make(data.Labels, len(extraLabels)+len(labels)+len(result.Instance)) + lbs := make(data.Labels, len(extraLabels)+len(labels)+len(resultLabels)) dupes := make(data.Labels) for key, val := range extraLabels { lbs[key] = val @@ -159,7 +186,7 @@ func calculateState(ctx context.Context, log log.Logger, alertRule *ngModels.Ale log.Warn("Rule declares one or many reserved labels. Those rules labels will be ignored", "labels", dupes) } dupes = make(data.Labels) - for key, val := range result.Instance { + for key, val := range resultLabels { _, ok := lbs[key] // if duplicate labels exist, reserved or alert rule label will take precedence if ok { @@ -190,7 +217,7 @@ func calculateState(ctx context.Context, log log.Logger, alertRule *ngModels.Ale Values: values, StartsAt: result.EvaluatedAt, EndsAt: result.EvaluatedAt, - ResultFingerprint: result.Instance.Fingerprint(), + ResultFingerprint: result.Instance.Fingerprint(), // remember original result fingerprint } return newState } diff --git a/pkg/services/ngalert/state/cache_test.go b/pkg/services/ngalert/state/cache_test.go index 95e81513b38..58a0adbfafd 100644 --- a/pkg/services/ngalert/state/cache_test.go +++ b/pkg/services/ngalert/state/cache_test.go @@ -8,6 +8,7 @@ import ( "testing" "time" + "github.com/google/uuid" "github.com/grafana/grafana-plugin-sdk-go/data" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -263,6 +264,61 @@ func Test_getOrCreate(t *testing.T) { state := c.getOrCreate(context.Background(), l, rule, result, nil, url) assert.Equal(t, map[string]float64{"B0": 1, "B1": 2}, state.Values) }) + + t.Run("when result labels collide with system labels from LabelsUserCannotSpecify", func(t *testing.T) { + result := eval.Result{ + Instance: models.GenerateAlertLabels(5, "result-"), + } + m := models.LabelsUserCannotSpecify + t.Cleanup(func() { + models.LabelsUserCannotSpecify = m + }) + + models.LabelsUserCannotSpecify = map[string]struct{}{ + "__label1__": {}, + "label2__": {}, + "__label3": {}, + "label4": {}, + } + result.Instance["__label1__"] = uuid.NewString() + result.Instance["label2__"] = uuid.NewString() + result.Instance["__label3"] = uuid.NewString() + result.Instance["label4"] = uuid.NewString() + + rule := generateRule() + + state := c.getOrCreate(context.Background(), l, rule, result, nil, url) + + for key := range models.LabelsUserCannotSpecify { + assert.NotContains(t, state.Labels, key) + } + assert.Contains(t, state.Labels, "label1") + assert.Equal(t, state.Labels["label1"], result.Instance["__label1__"]) + + assert.Contains(t, state.Labels, "label2") + assert.Equal(t, state.Labels["label2"], result.Instance["label2__"]) + + assert.Contains(t, state.Labels, "label3") + assert.Equal(t, state.Labels["label3"], result.Instance["__label3"]) + + assert.Contains(t, state.Labels, "label4_user") + assert.Equal(t, state.Labels["label4_user"], result.Instance["label4"]) + + t.Run("should drop label if renamed collides with existing", func(t *testing.T) { + result.Instance["label1"] = uuid.NewString() + result.Instance["label1_user"] = uuid.NewString() + result.Instance["label4_user"] = uuid.NewString() + + state = c.getOrCreate(context.Background(), l, rule, result, nil, url) + assert.NotContains(t, state.Labels, "__label1__") + assert.Contains(t, state.Labels, "label1") + assert.Equal(t, state.Labels["label1"], result.Instance["label1"]) + assert.Equal(t, state.Labels["label1_user"], result.Instance["label1_user"]) + + assert.NotContains(t, state.Labels, "label4") + assert.Equal(t, state.Labels["label4_user"], result.Instance["label4_user"]) + }) + }) } func Test_mergeLabels(t *testing.T) { diff --git a/pkg/services/ngalert/state/state.go b/pkg/services/ngalert/state/state.go index 147d49c8ab8..6a1f3b2bab1 100644 --- a/pkg/services/ngalert/state/state.go +++ b/pkg/services/ngalert/state/state.go @@ -2,6 +2,7 @@ package state import ( "context" + "encoding/json" "errors" "fmt" "math" @@ -504,7 +505,7 @@ func ParseFormattedState(stateStr string) (eval.State, string, error) { } // GetRuleExtraLabels returns a map of built-in labels that should be added to an alert before it is sent to the Alertmanager or its state is cached. -func GetRuleExtraLabels(rule *models.AlertRule, folderTitle string, includeFolder bool) map[string]string { +func GetRuleExtraLabels(l log.Logger, rule *models.AlertRule, folderTitle string, includeFolder bool) map[string]string { extraLabels := make(map[string]string, 4) extraLabels[alertingModels.NamespaceUIDLabel] = rule.NamespaceUID @@ -514,5 +515,15 @@ func GetRuleExtraLabels(rule *models.AlertRule, folderTitle string, includeFolde if includeFolder { extraLabels[models.FolderTitleLabel] = folderTitle } + + if len(rule.NotificationSettings) > 0 { + // Notification settings are defined as a slice to workaround xorm behavior. + // Any items past the first should not exist so we ignore them. + if len(rule.NotificationSettings) > 1 { + ignored, _ := json.Marshal(rule.NotificationSettings[1:]) + l.Error("Detected multiple notification settings, which is not supported. Only the first will be applied", "ignored_settings", string(ignored)) + } + return mergeLabels(extraLabels, rule.NotificationSettings[0].ToLabels()) + } return extraLabels } diff --git a/pkg/services/ngalert/state/state_test.go b/pkg/services/ngalert/state/state_test.go index fa1cef5ee5f..870f0b1d87d 100644 --- a/pkg/services/ngalert/state/state_test.go +++ b/pkg/services/ngalert/state/state_test.go @@ -10,9 +10,13 @@ import ( "github.com/benbjohnson/clock" "github.com/golang/mock/gomock" + "github.com/google/uuid" + "github.com/grafana/alerting/models" + "github.com/prometheus/common/model" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/services/ngalert/eval" ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models" "github.com/grafana/grafana/pkg/services/screenshot" @@ -689,3 +693,80 @@ func TestParseFormattedState(t *testing.T) { require.Error(t, err) }) } + +func TestGetRuleExtraLabels(t *testing.T) { + logger := log.New() + + rule := ngmodels.AlertRuleGen()() + rule.NotificationSettings = nil + folderTitle := uuid.NewString() + + ns := ngmodels.NotificationSettings{ + Receiver: "Test", + GroupBy: []string{"alertname"}, + GroupWait: util.Pointer(model.Duration(1 * time.Second)), + } + + testCases := map[string]struct { + rule *ngmodels.AlertRule + includeFolder bool + expected map[string]string + }{ + "no_folder_no_notification": { + rule: ngmodels.CopyRule(rule), + includeFolder: false, + expected: map[string]string{ + models.NamespaceUIDLabel: rule.NamespaceUID, + model.AlertNameLabel: rule.Title, + models.RuleUIDLabel: rule.UID, + }, + }, + "with_folder_no_notification": { + rule: ngmodels.CopyRule(rule), + includeFolder: true, + expected: map[string]string{ + models.NamespaceUIDLabel: rule.NamespaceUID, + model.AlertNameLabel: rule.Title, + models.RuleUIDLabel: rule.UID, + models.FolderTitleLabel: folderTitle, + }, + }, + "with_notification": { + rule: func() *ngmodels.AlertRule { + r := ngmodels.CopyRule(rule) + r.NotificationSettings = []ngmodels.NotificationSettings{ns} + return r + }(), + expected: map[string]string{ + models.NamespaceUIDLabel: rule.NamespaceUID, + model.AlertNameLabel: rule.Title, + models.RuleUIDLabel: rule.UID, + ngmodels.AutogeneratedRouteLabel: "true", + ngmodels.AutogeneratedRouteReceiverNameLabel: ns.Receiver, + ngmodels.AutogeneratedRouteSettingsHashLabel: ns.Fingerprint().String(), + }, + }, + "ignore_multiple_notifications": { + rule: func() *ngmodels.AlertRule { + r := ngmodels.CopyRule(rule) + r.NotificationSettings = []ngmodels.NotificationSettings{ns, ngmodels.NotificationSettingsGen()(), ngmodels.NotificationSettingsGen()()} + return r + }(), + expected: map[string]string{ + models.NamespaceUIDLabel: rule.NamespaceUID, + model.AlertNameLabel: rule.Title, + models.RuleUIDLabel: rule.UID, + ngmodels.AutogeneratedRouteLabel: "true", + ngmodels.AutogeneratedRouteReceiverNameLabel: ns.Receiver, + ngmodels.AutogeneratedRouteSettingsHashLabel: ns.Fingerprint().String(), + }, + }, + } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + result := GetRuleExtraLabels(logger, tc.rule, folderTitle, tc.includeFolder) + require.Equal(t, tc.expected, result) + }) + } +} diff --git a/pkg/services/ngalert/store/alert_rule.go b/pkg/services/ngalert/store/alert_rule.go index 71c97b6db78..c8d95a4108d 100644 --- a/pkg/services/ngalert/store/alert_rule.go +++ b/pkg/services/ngalert/store/alert_rule.go @@ -2,12 +2,14 @@ package store import ( "context" + "encoding/json" "errors" "fmt" "strings" "github.com/google/uuid" "golang.org/x/exp/maps" + "golang.org/x/exp/slices" "github.com/grafana/grafana/pkg/infra/db" "github.com/grafana/grafana/pkg/services/accesscontrol" @@ -18,8 +20,10 @@ import ( ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models" "github.com/grafana/grafana/pkg/services/org" "github.com/grafana/grafana/pkg/services/sqlstore" + "github.com/grafana/grafana/pkg/services/sqlstore/migrator" "github.com/grafana/grafana/pkg/services/store/entity" "github.com/grafana/grafana/pkg/util" + "xorm.io/xorm" ) // AlertRuleMaxTitleLength is the maximum length of the alert rule title @@ -141,22 +145,23 @@ func (st DBstore) InsertAlertRules(ctx context.Context, rules []ngmodels.AlertRu } newRules = append(newRules, r) ruleVersions = append(ruleVersions, ngmodels.AlertRuleVersion{ - RuleUID: r.UID, - RuleOrgID: r.OrgID, - RuleNamespaceUID: r.NamespaceUID, - RuleGroup: r.RuleGroup, - ParentVersion: 0, - Version: r.Version, - Created: r.Updated, - Condition: r.Condition, - Title: r.Title, - Data: r.Data, - IntervalSeconds: r.IntervalSeconds, - NoDataState: r.NoDataState, - ExecErrState: r.ExecErrState, - For: r.For, - Annotations: r.Annotations, - Labels: r.Labels, + RuleUID: r.UID, + RuleOrgID: r.OrgID, + RuleNamespaceUID: r.NamespaceUID, + RuleGroup: r.RuleGroup, + ParentVersion: 0, + Version: r.Version, + Created: r.Updated, + Condition: r.Condition, + Title: r.Title, + Data: r.Data, + IntervalSeconds: r.IntervalSeconds, + NoDataState: r.NoDataState, + ExecErrState: r.ExecErrState, + For: r.For, + Annotations: r.Annotations, + Labels: r.Labels, + NotificationSettings: r.NotificationSettings, }) } if len(newRules) > 0 { @@ -216,23 +221,24 @@ func (st DBstore) UpdateAlertRules(ctx context.Context, rules []ngmodels.UpdateR } parentVersion = r.Existing.Version ruleVersions = append(ruleVersions, ngmodels.AlertRuleVersion{ - RuleOrgID: r.New.OrgID, - RuleUID: r.New.UID, - RuleNamespaceUID: r.New.NamespaceUID, - RuleGroup: r.New.RuleGroup, - RuleGroupIndex: r.New.RuleGroupIndex, - ParentVersion: parentVersion, - Version: r.New.Version + 1, - Created: r.New.Updated, - Condition: r.New.Condition, - Title: r.New.Title, - Data: r.New.Data, - IntervalSeconds: r.New.IntervalSeconds, - NoDataState: r.New.NoDataState, - ExecErrState: r.New.ExecErrState, - For: r.New.For, - Annotations: r.New.Annotations, - Labels: r.New.Labels, + RuleOrgID: r.New.OrgID, + RuleUID: r.New.UID, + RuleNamespaceUID: r.New.NamespaceUID, + RuleGroup: r.New.RuleGroup, + RuleGroupIndex: r.New.RuleGroupIndex, + ParentVersion: parentVersion, + Version: r.New.Version + 1, + Created: r.New.Updated, + Condition: r.New.Condition, + Title: r.New.Title, + Data: r.New.Data, + IntervalSeconds: r.New.IntervalSeconds, + NoDataState: r.New.NoDataState, + ExecErrState: r.New.ExecErrState, + For: r.New.For, + Annotations: r.New.Annotations, + Labels: r.New.Labels, + NotificationSettings: r.New.NotificationSettings, }) } if len(ruleVersions) > 0 { @@ -365,6 +371,13 @@ func (st DBstore) ListAlertRules(ctx context.Context, query *ngmodels.ListAlertR q = q.Where("rule_group = ?", query.RuleGroup) } + if query.ReceiverName != "" { + q, err = st.filterByReceiverName(query.ReceiverName, q) + if err != nil { + return err + } + } + q = q.Asc("namespace_uid", "rule_group", "rule_group_idx", "id") alertRules := make([]*ngmodels.AlertRule, 0) @@ -385,6 +398,13 @@ func (st DBstore) ListAlertRules(ctx context.Context, query *ngmodels.ListAlertR st.Logger.Error("Invalid rule found in DB store, ignoring it", "func", "ListAlertRules", "error", err) continue } + if query.ReceiverName != "" { // remove false-positive hits from the result + if !slices.ContainsFunc(rule.NotificationSettings, func(settings ngmodels.NotificationSettings) bool { + return settings.Receiver == query.ReceiverName + }) { + continue + } + } alertRules = append(alertRules, rule) } @@ -648,3 +668,91 @@ func (st DBstore) validateAlertRule(alertRule ngmodels.AlertRule) error { return nil } + +// ListNotificationSettings fetches all notification settings for given organization +func (st DBstore) ListNotificationSettings(ctx context.Context, q ngmodels.ListNotificationSettingsQuery) (map[ngmodels.AlertRuleKey][]ngmodels.NotificationSettings, error) { + var rules []ngmodels.AlertRule + err := st.SQLStore.WithDbSession(ctx, func(sess *db.Session) error { + query := sess.Table(ngmodels.AlertRule{}).Select("uid, notification_settings").Where("org_id = ?", q.OrgID) + if q.ReceiverName != "" { + var err error + query, err = st.filterByReceiverName(q.ReceiverName, query) + if err != nil { + return err + } + } else { + query = query.And("notification_settings IS NOT NULL AND notification_settings <> 'null'") + } + return query.Find(&rules) + }) + if err != nil { + return nil, err + } + result := make(map[ngmodels.AlertRuleKey][]ngmodels.NotificationSettings, len(rules)) + for _, rule := range rules { + var ns []ngmodels.NotificationSettings + if q.ReceiverName != "" { // if filter by receiver name is specified, perform fine filtering on client to avoid false-positives + for _, setting := range rule.NotificationSettings { + if q.ReceiverName == setting.Receiver { // currently, there can be only one setting. If in future there are more, we will return all settings of a rule that has a setting with receiver + ns = rule.NotificationSettings + break + } + } + } else { + ns = rule.NotificationSettings + } + if len(ns) > 0 { + key := ngmodels.AlertRuleKey{ + OrgID: q.OrgID, + UID: rule.UID, + } + result[key] = rule.NotificationSettings + } + } + return result, nil +} + +func (st DBstore) filterByReceiverName(receiver string, sess *xorm.Session) (*xorm.Session, error) { + if receiver == "" { + return sess, nil + } + // marshall string according to JSON rules so we follow escaping rules. + b, err := json.Marshal(receiver) + if err != nil { + return nil, fmt.Errorf("failed to marshall receiver name query: %w", err) + } + var search = string(b) + if st.SQLStore.GetDialect().DriverName() != migrator.SQLite { + // this escapes escaped double quote (\") to \\\" + search = strings.ReplaceAll(strings.ReplaceAll(search, `\`, `\\`), `"`, `\"`) + } + return sess.And(fmt.Sprintf("notification_settings %s ?", st.SQLStore.GetDialect().LikeStr()), "%"+search+"%"), nil +} + +func (st DBstore) RenameReceiverInNotificationSettings(ctx context.Context, orgID int64, oldReceiver, newReceiver string) (int, error) { + // fetch entire rules because Update method requires it because it copies rules to version table + rules, err := st.ListAlertRules(ctx, &ngmodels.ListAlertRulesQuery{ + OrgID: orgID, + ReceiverName: oldReceiver, + }) + if err != nil { + return 0, err + } + if len(rules) == 0 { + return 0, nil + } + var updates []ngmodels.UpdateRule + for _, rule := range rules { + r := ngmodels.CopyRule(rule) + for idx := range r.NotificationSettings { + if r.NotificationSettings[idx].Receiver == oldReceiver { + r.NotificationSettings[idx].Receiver = newReceiver + } + } + updates = append(updates, ngmodels.UpdateRule{ + Existing: rule, + New: *r, + }) + } + return len(updates), st.UpdateAlertRules(ctx, updates) +} diff --git a/pkg/services/ngalert/store/alert_rule_test.go b/pkg/services/ngalert/store/alert_rule_test.go index 844173297d6..ca0e0765b92 100644 --- a/pkg/services/ngalert/store/alert_rule_test.go +++ b/pkg/services/ngalert/store/alert_rule_test.go @@ -5,10 +5,12 @@ import ( "errors" "fmt" "strings" + "sync" "testing" "time" "github.com/google/uuid" + "github.com/stretchr/testify/assert" "github.com/grafana/grafana/pkg/bus" "github.com/grafana/grafana/pkg/infra/log" @@ -650,6 +652,168 @@ func TestIntegrationInsertAlertRules(t *testing.T) { require.ErrorContains(t, err, deref[0].NamespaceUID) } +func TestIntegrationAlertRulesNotificationSettings(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + sqlStore := db.InitTestDB(t) + cfg := setting.NewCfg() + cfg.UnifiedAlerting.BaseInterval = 1 * time.Second + store := &DBstore{ + SQLStore: sqlStore, + FolderService: setupFolderService(t, sqlStore, cfg, featuremgmt.WithFeatures()), + Logger: log.New("test-dbstore"), + Cfg: cfg.UnifiedAlerting, + } + + uniqueUids := &sync.Map{} + receiverName := "receiver\"-" + uuid.NewString() + rules := models.GenerateAlertRules(3, models.AlertRuleGen(models.WithOrgID(1), withIntervalMatching(store.Cfg.BaseInterval), models.WithUniqueUID(uniqueUids))) + receiveRules := models.GenerateAlertRules(3, + models.AlertRuleGen( + models.WithOrgID(1), + withIntervalMatching(store.Cfg.BaseInterval), + models.WithUniqueUID(uniqueUids), + models.WithNotificationSettingsGen(models.NotificationSettingsGen(models.NSMuts.WithReceiver(receiverName))))) + noise := models.GenerateAlertRules(3, + models.AlertRuleGen( + models.WithOrgID(1), + withIntervalMatching(store.Cfg.BaseInterval), + models.WithUniqueUID(uniqueUids), + models.WithNotificationSettingsGen(models.NotificationSettingsGen(models.NSMuts.WithMuteTimeIntervals(receiverName))))) // simulate collision of names of receiver and mute timing + deref := make([]models.AlertRule, 0, len(rules)+len(receiveRules)+len(noise)) + for _, rule := range append(append(rules, receiveRules...), noise...) { + r := *rule + r.ID = 0 + deref = append(deref, r) + } + + _, err := store.InsertAlertRules(context.Background(), deref) + require.NoError(t, err) + + t.Run("should find rules by receiver name", func(t *testing.T) { + expectedUIDs := map[string]struct{}{} + for _, rule := range receiveRules { + expectedUIDs[rule.UID] = struct{}{} + } + actual, err := store.ListAlertRules(context.Background(), &models.ListAlertRulesQuery{ + OrgID: 1, + ReceiverName: receiverName, + }) + require.NoError(t, err) + assert.Len(t, actual, len(expectedUIDs)) + for _, rule := range actual { + assert.Contains(t, expectedUIDs, rule.UID) + } + }) + + t.Run("RenameReceiverInNotificationSettings should update all rules that refer to the old receiver", func(t *testing.T) { + newName := "new-receiver" + affected, err := store.RenameReceiverInNotificationSettings(context.Background(), 1, receiverName, newName) + require.NoError(t, err) + require.Equal(t, len(receiveRules), affected) + + expectedUIDs := map[string]struct{}{} + for _, rule := range receiveRules { + expectedUIDs[rule.UID] = struct{}{} + } + actual, err := store.ListAlertRules(context.Background(), &models.ListAlertRulesQuery{ + OrgID: 1, + ReceiverName: newName, + }) + require.NoError(t, err) + assert.Len(t, actual, len(expectedUIDs)) + for _, rule := range actual { + assert.Contains(t, expectedUIDs, rule.UID) + } + + actual, err = store.ListAlertRules(context.Background(), &models.ListAlertRulesQuery{ + OrgID: 1, + ReceiverName: receiverName, + }) + require.NoError(t, err) + require.Empty(t, actual) + }) +} + +func TestIntegrationListNotificationSettings(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + sqlStore := db.InitTestDB(t) + cfg := setting.NewCfg() + cfg.UnifiedAlerting.BaseInterval = 1 * time.Second + store := &DBstore{ + SQLStore: sqlStore, + FolderService: setupFolderService(t, sqlStore, cfg, featuremgmt.WithFeatures()), + Logger: log.New("test-dbstore"), + Cfg: cfg.UnifiedAlerting, + } + + uids := &sync.Map{} + titles := &sync.Map{} + receiverName := `receiver%"-👍'test` + rulesWithNotifications := models.GenerateAlertRules(5, models.AlertRuleGen( + models.WithOrgID(1), + models.WithUniqueUID(uids), + models.WithUniqueTitle(titles), + withIntervalMatching(store.Cfg.BaseInterval), + models.WithNotificationSettingsGen(models.NotificationSettingsGen(models.NSMuts.WithReceiver(receiverName))), + )) + rulesInOtherOrg := models.GenerateAlertRules(5, models.AlertRuleGen( + models.WithOrgID(2), + models.WithUniqueUID(uids), + models.WithUniqueTitle(titles), + withIntervalMatching(store.Cfg.BaseInterval), + models.WithNotificationSettingsGen(models.NotificationSettingsGen()), + )) + rulesWithNoNotifications := models.GenerateAlertRules(5, models.AlertRuleGen( + models.WithOrgID(1), + models.WithUniqueUID(uids), + models.WithUniqueTitle(titles), + withIntervalMatching(store.Cfg.BaseInterval), + models.WithNoNotificationSettings(), + )) + deref := make([]models.AlertRule, 0, len(rulesWithNotifications)+len(rulesWithNoNotifications)+len(rulesInOtherOrg)) + for _, rule := range append(append(rulesWithNotifications, rulesWithNoNotifications...), rulesInOtherOrg...) { + r := *rule + r.ID = 0 + deref = append(deref, r) + } + + _, err := store.InsertAlertRules(context.Background(), deref) + require.NoError(t, err) + + result, err := store.ListNotificationSettings(context.Background(), models.ListNotificationSettingsQuery{OrgID: 1}) + require.NoError(t, err) + require.Len(t, result, len(rulesWithNotifications)) + for _, rule := range rulesWithNotifications { + if !assert.Contains(t, result, rule.GetKey()) { + continue + } + assert.EqualValues(t, rule.NotificationSettings, result[rule.GetKey()]) + } + + t.Run("should list notification settings by receiver name", func(t *testing.T) { + expectedUIDs := map[models.AlertRuleKey]struct{}{} + for _, rule := range rulesWithNotifications { + expectedUIDs[rule.GetKey()] = struct{}{} + } + + actual, err := store.ListNotificationSettings(context.Background(), models.ListNotificationSettingsQuery{ + OrgID: 1, + ReceiverName: receiverName, + }) + require.NoError(t, err) + assert.Len(t, actual, len(expectedUIDs)) + for ruleKey := range actual { + assert.Contains(t, expectedUIDs, ruleKey) + } + }) +} + // createAlertRule creates an alert rule in the database and returns it. // If a generator is not specified, uniqueness of primary key is not guaranteed. func createRule(t *testing.T, store *DBstore, generate func() *models.AlertRule) *models.AlertRule { diff --git a/pkg/services/ngalert/store/deltas.go b/pkg/services/ngalert/store/deltas.go index 15184da809f..27fe5d86289 100644 --- a/pkg/services/ngalert/store/deltas.go +++ b/pkg/services/ngalert/store/deltas.go @@ -31,6 +31,27 @@ func (c *GroupDelta) IsEmpty() bool { return len(c.Update)+len(c.New)+len(c.Delete) == 0 } +// NewOrUpdatedNotificationSettings returns a list of notification settings that are either new or updated in the group. +func (c *GroupDelta) NewOrUpdatedNotificationSettings() []models.NotificationSettings { + var settings []models.NotificationSettings + for _, rule := range c.New { + if len(rule.NotificationSettings) > 0 { + settings = append(settings, rule.NotificationSettings...) + } + } + for _, delta := range c.Update { + if len(delta.New.NotificationSettings) == 0 { + continue + } + d := delta.Diff.GetDiffsForField("NotificationSettings") + if len(d) == 0 { + continue + } + settings = append(settings, delta.New.NotificationSettings...) + } + return settings +} + type RuleReader interface { ListAlertRules(ctx context.Context, query *models.ListAlertRulesQuery) (models.RulesGroup, error) GetAlertRulesGroupByRuleUID(ctx context.Context, query *models.GetAlertRulesGroupByRuleUIDQuery) ([]*models.AlertRule, error) diff --git a/pkg/services/provisioning/alerting/provisioner.go b/pkg/services/provisioning/alerting/provisioner.go index bff80ce8706..1555de96f05 100644 --- a/pkg/services/provisioning/alerting/provisioner.go +++ b/pkg/services/provisioning/alerting/provisioner.go @@ -29,15 +29,6 @@ func Provision(ctx context.Context, cfg ProvisionerConfig) error { } logger.Info("starting to provision alerting") logger.Debug("read all alerting files", "file_count", len(files)) - ruleProvisioner := NewAlertRuleProvisioner( - logger, - cfg.DashboardService, - cfg.DashboardProvService, - cfg.RuleService) - err = ruleProvisioner.Provision(ctx, files) - if err != nil { - return fmt.Errorf("alert rules: %w", err) - } cpProvisioner := NewContactPointProvisoner(logger, cfg.ContactPointService) err = cpProvisioner.Provision(ctx, files) if err != nil { @@ -62,10 +53,6 @@ func Provision(ctx context.Context, cfg ProvisionerConfig) error { if err != nil { return fmt.Errorf("notification policies: %w", err) } - err = cpProvisioner.Unprovision(ctx, files) - if err != nil { - return fmt.Errorf("contact points: %w", err) - } err = mtProvisioner.Unprovision(ctx, files) if err != nil { return fmt.Errorf("mute times: %w", err) @@ -74,6 +61,19 @@ func Provision(ctx context.Context, cfg ProvisionerConfig) error { if err != nil { return fmt.Errorf("text templates: %w", err) } + ruleProvisioner := NewAlertRuleProvisioner( + logger, + cfg.DashboardService, + cfg.DashboardProvService, + cfg.RuleService) + err = ruleProvisioner.Provision(ctx, files) + if err != nil { + return fmt.Errorf("alert rules: %w", err) + } + err = cpProvisioner.Unprovision(ctx, files) // Unprovision contact points after rules to make sure all references in rules are updated + if err != nil { + return fmt.Errorf("contact points: %w", err) + } logger.Info("finished to provision alerting") return nil } diff --git a/pkg/services/provisioning/alerting/rules_types.go b/pkg/services/provisioning/alerting/rules_types.go index 8c245d0ee9a..5e72156b226 100644 --- a/pkg/services/provisioning/alerting/rules_types.go +++ b/pkg/services/provisioning/alerting/rules_types.go @@ -11,6 +11,7 @@ import ( "github.com/grafana/grafana/pkg/services/ngalert/models" "github.com/grafana/grafana/pkg/services/provisioning/values" + "github.com/grafana/grafana/pkg/util" ) type RuleDelete struct { @@ -61,18 +62,19 @@ func (ruleGroupV1 *AlertRuleGroupV1) MapToModel() (models.AlertRuleGroupWithFold } type AlertRuleV1 struct { - UID values.StringValue `json:"uid" yaml:"uid"` - Title values.StringValue `json:"title" yaml:"title"` - Condition values.StringValue `json:"condition" yaml:"condition"` - Data []QueryV1 `json:"data" yaml:"data"` - DashboardUID values.StringValue `json:"dasboardUid" yaml:"dashboardUid"` - PanelID values.Int64Value `json:"panelId" yaml:"panelId"` - NoDataState values.StringValue `json:"noDataState" yaml:"noDataState"` - ExecErrState values.StringValue `json:"execErrState" yaml:"execErrState"` - For values.StringValue `json:"for" yaml:"for"` - Annotations values.StringMapValue `json:"annotations" yaml:"annotations"` - Labels values.StringMapValue `json:"labels" yaml:"labels"` - IsPaused values.BoolValue `json:"isPaused" yaml:"isPaused"` + UID values.StringValue `json:"uid" yaml:"uid"` + Title values.StringValue `json:"title" yaml:"title"` + Condition values.StringValue `json:"condition" yaml:"condition"` + Data []QueryV1 `json:"data" yaml:"data"` + DashboardUID values.StringValue `json:"dasboardUid" yaml:"dashboardUid"` + PanelID values.Int64Value `json:"panelId" yaml:"panelId"` + NoDataState values.StringValue `json:"noDataState" yaml:"noDataState"` + ExecErrState values.StringValue `json:"execErrState" yaml:"execErrState"` + For values.StringValue `json:"for" yaml:"for"` + Annotations values.StringMapValue `json:"annotations" yaml:"annotations"` + Labels values.StringMapValue `json:"labels" yaml:"labels"` + IsPaused values.BoolValue `json:"isPaused" yaml:"isPaused"` + NotificationSettings *NotificationSettingsV1 `json:"notification_settings" yaml:"notification_settings"` } func (rule *AlertRuleV1) mapToModel(orgID int64) (models.AlertRule, error) { @@ -130,6 +132,13 @@ func (rule *AlertRuleV1) mapToModel(orgID int64) (models.AlertRule, error) { return models.AlertRule{}, fmt.Errorf("rule '%s' failed to parse: no data set", alertRule.Title) } alertRule.IsPaused = rule.IsPaused.Value() + if rule.NotificationSettings != nil { + ns, err := rule.NotificationSettings.mapToModel() + if err != nil { + return models.AlertRule{}, fmt.Errorf("rule '%s' failed to parse: %w", alertRule.Title, err) + } + alertRule.NotificationSettings = append(alertRule.NotificationSettings, ns) + } return alertRule, nil } @@ -169,3 +178,71 @@ func (queryV1 *QueryV1) mapToModel() (models.AlertQuery, error) { Model: rawMessage, }, nil } + +type NotificationSettingsV1 struct { + Receiver values.StringValue `json:"receiver" yaml:"receiver"` + GroupBy []values.StringValue `json:"group_by,omitempty" yaml:"group_by"` + GroupWait values.StringValue `json:"group_wait,omitempty" yaml:"group_wait"` + GroupInterval values.StringValue `json:"group_interval,omitempty" yaml:"group_interval"` + RepeatInterval values.StringValue `json:"repeat_interval,omitempty" yaml:"repeat_interval"` + MuteTimeIntervals []values.StringValue `json:"mute_time_intervals,omitempty" yaml:"mute_time_intervals"` +} + +func (nsV1 *NotificationSettingsV1) mapToModel() (models.NotificationSettings, error) { + if nsV1.Receiver.Value() == "" { + return models.NotificationSettings{}, fmt.Errorf("receiver must not be empty") + } + var gw, gi, ri *model.Duration + if nsV1.GroupWait.Value() != "" { + dur, err := model.ParseDuration(nsV1.GroupWait.Value()) + if err != nil { + return models.NotificationSettings{}, fmt.Errorf("failed to parse group wait: %w", err) + } + gw = util.Pointer(dur) + } + if nsV1.GroupInterval.Value() != "" { + dur, err := model.ParseDuration(nsV1.GroupInterval.Value()) + if err != nil { + return models.NotificationSettings{}, fmt.Errorf("failed to parse group interval: %w", err) + } + gi = util.Pointer(dur) + } + if nsV1.RepeatInterval.Value() != "" { + dur, err := model.ParseDuration(nsV1.RepeatInterval.Value()) + if err != nil { + return models.NotificationSettings{}, fmt.Errorf("failed to parse repeat interval: %w", err) + } + ri = util.Pointer(dur) + } + + var groupBy []string + if len(nsV1.GroupBy) > 0 { + groupBy = make([]string, 0, len(nsV1.GroupBy)) + for _, value := range nsV1.GroupBy { + if value.Value() == "" { + continue + } + groupBy = append(groupBy, value.Value()) + } + } + + var mute []string + if len(nsV1.MuteTimeIntervals) > 0 { + mute = make([]string, 0, len(nsV1.MuteTimeIntervals)) + for _, value := range nsV1.MuteTimeIntervals { + if value.Value() == "" { + continue + } + mute = append(mute, value.Value()) + } + } + + return models.NotificationSettings{ + Receiver: nsV1.Receiver.Value(), + GroupBy: groupBy, + GroupWait: gw, + GroupInterval: gi, + RepeatInterval: ri, + MuteTimeIntervals: mute, + }, nil +} diff --git a/pkg/services/provisioning/alerting/rules_types_test.go b/pkg/services/provisioning/alerting/rules_types_test.go index dcca42e0574..fda8947cf8b 100644 --- a/pkg/services/provisioning/alerting/rules_types_test.go +++ b/pkg/services/provisioning/alerting/rules_types_test.go @@ -4,11 +4,13 @@ import ( "testing" "time" + "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "gopkg.in/yaml.v3" "github.com/grafana/grafana/pkg/services/ngalert/models" "github.com/grafana/grafana/pkg/services/provisioning/values" + "github.com/grafana/grafana/pkg/util" ) func TestRuleGroup(t *testing.T) { @@ -187,6 +189,109 @@ func TestRules(t *testing.T) { require.NoError(t, err) require.Equal(t, ruleMapped.NoDataState, models.NoData) }) + t.Run("a rule with notification settings should map it correctly", func(t *testing.T) { + rule := validRuleV1(t) + rule.NotificationSettings = &NotificationSettingsV1{ + Receiver: stringToStringValue("test-receiver"), + } + ruleMapped, err := rule.mapToModel(1) + require.NoError(t, err) + require.Len(t, ruleMapped.NotificationSettings, 1) + require.Equal(t, models.NotificationSettings{Receiver: "test-receiver"}, ruleMapped.NotificationSettings[0]) + }) +} + +func TestNotificationsSettingsV1MapToModel(t *testing.T) { + tests := []struct { + name string + input NotificationSettingsV1 + expected models.NotificationSettings + wantErr bool + }{ + { + name: "Valid Input", + input: NotificationSettingsV1{ + Receiver: stringToStringValue("test-receiver"), + GroupBy: []values.StringValue{stringToStringValue("test-group_by")}, + GroupWait: stringToStringValue("1s"), + GroupInterval: stringToStringValue("2s"), + RepeatInterval: stringToStringValue("3s"), + MuteTimeIntervals: []values.StringValue{stringToStringValue("test-mute")}, + }, + expected: models.NotificationSettings{ + Receiver: "test-receiver", + GroupBy: []string{"test-group_by"}, + GroupWait: util.Pointer(model.Duration(1 * time.Second)), + GroupInterval: util.Pointer(model.Duration(2 * time.Second)), + RepeatInterval: util.Pointer(model.Duration(3 * time.Second)), + MuteTimeIntervals: []string{"test-mute"}, + }, + }, + { + name: "Skips empty elements in group_by", + input: NotificationSettingsV1{ + Receiver: stringToStringValue("test-receiver"), + GroupBy: []values.StringValue{stringToStringValue("test-group_by1"), stringToStringValue(""), stringToStringValue("test-group_by2")}, + }, + expected: models.NotificationSettings{ + Receiver: "test-receiver", + GroupBy: []string{"test-group_by1", "test-group_by2"}, + }, + }, + { + name: "Skips empty elements in mute timings", + input: NotificationSettingsV1{ + Receiver: stringToStringValue("test-receiver"), + MuteTimeIntervals: []values.StringValue{stringToStringValue("test-mute1"), stringToStringValue(""), stringToStringValue("test-mute2")}, + }, + expected: models.NotificationSettings{ + Receiver: "test-receiver", + MuteTimeIntervals: []string{"test-mute1", "test-mute2"}, + }, + }, + { + name: "Empty Receiver", + input: NotificationSettingsV1{ + Receiver: stringToStringValue(""), + }, + wantErr: true, + }, + { + name: "Invalid GroupWait Duration", + input: NotificationSettingsV1{ + Receiver: stringToStringValue("test-receiver"), + GroupWait: stringToStringValue("invalidDuration"), + }, + wantErr: true, + }, + { + name: "Invalid GroupInterval Duration", + input: NotificationSettingsV1{ + Receiver: stringToStringValue("test-receiver"), + GroupInterval: stringToStringValue("invalidDuration"), + }, + wantErr: true, + }, + { + name: "Invalid RepeatInterval Duration", + input: NotificationSettingsV1{ + Receiver: stringToStringValue("test-receiver"), + GroupInterval: stringToStringValue("invalidDuration"), + }, + wantErr: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got, err := tc.input.mapToModel() + if tc.wantErr { + require.Error(t, err) + return + } + require.Equal(t, tc.expected, got) + }) + } } func validRuleGroupV1(t *testing.T) AlertRuleGroupV1 { @@ -238,3 +343,12 @@ func validRuleV1(t *testing.T) AlertRuleV1 { Data: []QueryV1{{}}, } } + +func stringToStringValue(s string) values.StringValue { + result := values.StringValue{} + err := yaml.Unmarshal([]byte(s), &result) + if err != nil { + panic(err) + } + return result +} diff --git a/pkg/services/provisioning/provisioning.go b/pkg/services/provisioning/provisioning.go index 91e5669208b..cfb02bbf9f5 100644 --- a/pkg/services/provisioning/provisioning.go +++ b/pkg/services/provisioning/provisioning.go @@ -16,7 +16,7 @@ import ( datasourceservice "github.com/grafana/grafana/pkg/services/datasources" "github.com/grafana/grafana/pkg/services/encryption" "github.com/grafana/grafana/pkg/services/folder" - alertingNotifier "github.com/grafana/grafana/pkg/services/ngalert/notifier" + "github.com/grafana/grafana/pkg/services/ngalert/notifier" "github.com/grafana/grafana/pkg/services/ngalert/provisioning" "github.com/grafana/grafana/pkg/services/ngalert/store" "github.com/grafana/grafana/pkg/services/notifications" @@ -281,10 +281,10 @@ func (ps *ProvisioningServiceImpl) ProvisionAlerting(ctx context.Context) error int64(ps.Cfg.UnifiedAlerting.DefaultRuleEvaluationInterval.Seconds()), int64(ps.Cfg.UnifiedAlerting.BaseInterval.Seconds()), ps.Cfg.UnifiedAlerting.RulesPerRuleGroupLimit, - ps.log) - receiverSvc := alertingNotifier.NewReceiverService(ps.ac, &st, st, ps.secretService, ps.SQLStore, ps.log) + ps.log, notifier.NewCachedNotificationSettingsValidationService(&st)) + receiverSvc := notifier.NewReceiverService(ps.ac, &st, st, ps.secretService, ps.SQLStore, ps.log) contactPointService := provisioning.NewContactPointService(&st, ps.secretService, - st, ps.SQLStore, receiverSvc, ps.log) + st, ps.SQLStore, receiverSvc, ps.log, &st) notificationPolicyService := provisioning.NewNotificationPolicyService(&st, st, ps.SQLStore, ps.Cfg.UnifiedAlerting, ps.log) mutetimingsService := provisioning.NewMuteTimingService(&st, st, &st, ps.log) diff --git a/pkg/services/sqlstore/migrations/migrations.go b/pkg/services/sqlstore/migrations/migrations.go index e4602dada79..fd2ecacfc06 100644 --- a/pkg/services/sqlstore/migrations/migrations.go +++ b/pkg/services/sqlstore/migrations/migrations.go @@ -117,6 +117,8 @@ func (oss *OSSMigrations) AddMigration(mg *Migrator) { } addKVStoreMySQLValueTypeLongTextMigration(mg) + + ualert.AddRuleNotificationSettingsColumns(mg) } func addStarMigrations(mg *Migrator) { diff --git a/pkg/services/sqlstore/migrations/ualert/rule_notification_settings_mig.go b/pkg/services/sqlstore/migrations/ualert/rule_notification_settings_mig.go new file mode 100644 index 00000000000..7c5eb2a6ebb --- /dev/null +++ b/pkg/services/sqlstore/migrations/ualert/rule_notification_settings_mig.go @@ -0,0 +1,20 @@ +package ualert + +import ( + "github.com/grafana/grafana/pkg/services/sqlstore/migrator" +) + +// AddRuleNotificationSettingsColumns creates a column for notification settings in the alert_rule and alert_rule_version tables. +func AddRuleNotificationSettingsColumns(mg *migrator.Migrator) { + mg.AddMigration("add notification_settings column to alert_rule table", migrator.NewAddColumnMigration(migrator.Table{Name: "alert_rule"}, &migrator.Column{ + Name: "notification_settings", + Type: migrator.DB_Text, + Nullable: true, + })) + + mg.AddMigration("add notification_settings column to alert_rule_version table", migrator.NewAddColumnMigration(migrator.Table{Name: "alert_rule_version"}, &migrator.Column{ + Name: "notification_settings", + Type: migrator.DB_Text, + Nullable: true, + })) +} diff --git a/pkg/tests/api/alerting/api_ruler_test.go b/pkg/tests/api/alerting/api_ruler_test.go index e634492f224..cf9186d8f87 100644 --- a/pkg/tests/api/alerting/api_ruler_test.go +++ b/pkg/tests/api/alerting/api_ruler_test.go @@ -9,6 +9,7 @@ import ( "math/rand" "net/http" "path" + "slices" "strings" "testing" "time" @@ -16,6 +17,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/uuid" "github.com/grafana/grafana-plugin-sdk-go/data" + "github.com/prometheus/alertmanager/pkg/labels" "github.com/prometheus/common/model" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -1816,3 +1818,247 @@ func TestIntegrationHysteresisRule(t *testing.T) { require.NoErrorf(t, json.Unmarshal([]byte(f.At(normalIdx).(string)), &d), body) assert.EqualValuesf(t, 1, d.Values["B"], body) } + +func TestIntegrationRuleNotificationSettings(t *testing.T) { + testinfra.SQLiteIntegrationTest(t) + + // Setup Grafana and its Database. Scheduler is set to evaluate every 1 second + dir, p := testinfra.CreateGrafDir(t, testinfra.GrafanaOpts{ + DisableLegacyAlerting: true, + EnableUnifiedAlerting: true, + DisableAnonymous: true, + AppModeProduction: true, + NGAlertSchedulerBaseInterval: 1 * time.Second, + EnableFeatureToggles: []string{featuremgmt.FlagConfigurableSchedulerTick, featuremgmt.FlagAlertingSimplifiedRouting}, + }) + + grafanaListedAddr, store := testinfra.StartGrafana(t, dir, p) + + // Create a user to make authenticated requests + createUser(t, store, user.CreateUserCommand{ + DefaultOrgRole: string(org.RoleAdmin), + Password: "password", + Login: "grafana", + }) + + apiClient := newAlertingApiClient(grafanaListedAddr, "grafana", "password") + + folder := "Test-Alerting" + apiClient.CreateFolder(t, folder, folder) + + testDataRaw, err := testData.ReadFile(path.Join("test-data", "rule-notification-settings-1-post.json")) + require.NoError(t, err) + + type testData struct { + RuleGroup apimodels.PostableRuleGroupConfig + Receiver apimodels.EmbeddedContactPoint + TimeInterval apimodels.MuteTimeInterval + } + var d testData + err = json.Unmarshal(testDataRaw, &d) + require.NoError(t, err) + + apiClient.EnsureReceiver(t, d.Receiver) + apiClient.EnsureMuteTiming(t, d.TimeInterval) + + t.Run("create should fail if receiver does not exist", func(t *testing.T) { + var copyD testData + err = json.Unmarshal(testDataRaw, ©D) + group := copyD.RuleGroup + ns := group.Rules[0].GrafanaManagedAlert.NotificationSettings + ns.Receiver = "random-receiver" + + _, status, body := apiClient.PostRulesGroupWithStatus(t, folder, &group) + require.Equalf(t, http.StatusBadRequest, status, body) + t.Log(body) + }) + + t.Run("create should fail if mute timing does not exist", func(t *testing.T) { + var copyD testData + err = json.Unmarshal(testDataRaw, ©D) + group := copyD.RuleGroup + ns := group.Rules[0].GrafanaManagedAlert.NotificationSettings + ns.MuteTimeIntervals = []string{"random-time-interval"} + + _, status, body := apiClient.PostRulesGroupWithStatus(t, folder, &group) + require.Equalf(t, http.StatusBadRequest, status, body) + t.Log(body) + }) + + t.Run("create should fail if group_by does not contain special labels", func(t *testing.T) { + var copyD testData + err = json.Unmarshal(testDataRaw, ©D) + group := copyD.RuleGroup + ns := group.Rules[0].GrafanaManagedAlert.NotificationSettings + ns.GroupBy = []string{"label1"} + + _, status, body := apiClient.PostRulesGroupWithStatus(t, folder, &group) + require.Equalf(t, http.StatusBadRequest, status, body) + t.Log(body) + }) + + t.Run("should create rule and generate route", func(t *testing.T) { + _, status, body := apiClient.PostRulesGroupWithStatus(t, folder, &d.RuleGroup) + require.Equalf(t, http.StatusAccepted, status, body) + notificationSettings := d.RuleGroup.Rules[0].GrafanaManagedAlert.NotificationSettings + + var routeBody string + if !assert.EventuallyWithT(t, func(c *assert.CollectT) { + amConfig, status, body := apiClient.GetAlertmanagerConfigWithStatus(t) + routeBody = body + if !assert.Equalf(t, http.StatusOK, status, body) { + return + } + route := amConfig.AlertmanagerConfig.Route + + if !assert.Len(c, route.Routes, 1) { + return + } + + // Check that we are in the auto-generated root + autogenRoute := route.Routes[0] + if !assert.Len(c, autogenRoute.ObjectMatchers, 1) { + return + } + canContinue := assert.Equal(c, ngmodels.AutogeneratedRouteLabel, autogenRoute.ObjectMatchers[0].Name) + assert.Equal(c, labels.MatchEqual, autogenRoute.ObjectMatchers[0].Type) + assert.Equal(c, "true", autogenRoute.ObjectMatchers[0].Value) + + assert.Equalf(c, route.Receiver, autogenRoute.Receiver, "Autogenerated root receiver must be the default one") + assert.Nil(c, autogenRoute.GroupWait) + assert.Nil(c, autogenRoute.GroupInterval) + assert.Nil(c, autogenRoute.RepeatInterval) + assert.Empty(c, autogenRoute.MuteTimeIntervals) + assert.Empty(c, autogenRoute.GroupBy) + if !canContinue { + return + } + // Now check that the second level is route for receivers + if !assert.NotEmpty(c, autogenRoute.Routes) { + return + } + // There can be many routes, for all receivers + idx := slices.IndexFunc(autogenRoute.Routes, func(route *apimodels.Route) bool { + return route.Receiver == notificationSettings.Receiver + }) + if !assert.GreaterOrEqual(t, idx, 0) { + return + } + receiverRoute := autogenRoute.Routes[idx] + if !assert.Len(c, receiverRoute.ObjectMatchers, 1) { + return + } + canContinue = assert.Equal(c, ngmodels.AutogeneratedRouteReceiverNameLabel, receiverRoute.ObjectMatchers[0].Name) + assert.Equal(c, labels.MatchEqual, receiverRoute.ObjectMatchers[0].Type) + assert.Equal(c, notificationSettings.Receiver, receiverRoute.ObjectMatchers[0].Value) + + assert.Equal(c, notificationSettings.Receiver, receiverRoute.Receiver) + assert.Nil(c, receiverRoute.GroupWait) + assert.Nil(c, receiverRoute.GroupInterval) + assert.Nil(c, receiverRoute.RepeatInterval) + assert.Empty(c, receiverRoute.MuteTimeIntervals) + var groupBy []string + for _, name := range receiverRoute.GroupBy { + groupBy = append(groupBy, string(name)) + } + slices.Sort(groupBy) + assert.EqualValues(c, []string{"alertname", "grafana_folder"}, groupBy) + if !canContinue { + return + } + // Now check that we created the 3rd level for specific combination of settings + if !assert.Lenf(c, receiverRoute.Routes, 1, "Receiver route should contain one options route") { + return + } + optionsRoute := receiverRoute.Routes[0] + if !assert.Len(c, optionsRoute.ObjectMatchers, 1) { + return + } + assert.Equal(c, ngmodels.AutogeneratedRouteSettingsHashLabel, optionsRoute.ObjectMatchers[0].Name) + assert.Equal(c, labels.MatchEqual, optionsRoute.ObjectMatchers[0].Type) + assert.EqualValues(c, notificationSettings.GroupWait, optionsRoute.GroupWait) + assert.EqualValues(c, notificationSettings.GroupInterval, optionsRoute.GroupInterval) + assert.EqualValues(c, notificationSettings.RepeatInterval, optionsRoute.RepeatInterval) + assert.EqualValues(c, notificationSettings.MuteTimeIntervals, optionsRoute.MuteTimeIntervals) + groupBy = nil + for _, name := range optionsRoute.GroupBy { + groupBy = append(groupBy, string(name)) + } + assert.EqualValues(c, notificationSettings.GroupBy, groupBy) + }, 10*time.Second, 1*time.Second) { + t.Logf("config: %s", routeBody) + } + }) + + t.Run("should correctly create alerts", func(t *testing.T) { + var response string + if !assert.EventuallyWithT(t, func(c *assert.CollectT) { + groups, status, body := apiClient.GetActiveAlertsWithStatus(t) + require.Equalf(t, http.StatusOK, status, body) + response = body + if len(groups) == 0 { + return + } + g := groups[0] + alert := g.Alerts[0] + assert.Contains(c, alert.Labels, ngmodels.AutogeneratedRouteLabel) + assert.Equal(c, "true", alert.Labels[ngmodels.AutogeneratedRouteLabel]) + assert.Contains(c, alert.Labels, ngmodels.AutogeneratedRouteReceiverNameLabel) + assert.Equal(c, d.Receiver.Name, alert.Labels[ngmodels.AutogeneratedRouteReceiverNameLabel]) + assert.Contains(c, alert.Labels, ngmodels.AutogeneratedRouteSettingsHashLabel) + assert.NotEmpty(c, alert.Labels[ngmodels.AutogeneratedRouteSettingsHashLabel]) + }, 10*time.Second, 1*time.Second) { + t.Logf("response: %s", response) + } + }) + + t.Run("should update rule with empty settings and delete route", func(t *testing.T) { + var copyD testData + err = json.Unmarshal(testDataRaw, ©D) + group := copyD.RuleGroup + notificationSettings := group.Rules[0].GrafanaManagedAlert.NotificationSettings + group.Rules[0].GrafanaManagedAlert.NotificationSettings = nil + + _, status, body := apiClient.PostRulesGroupWithStatus(t, folder, &group) + require.Equalf(t, http.StatusAccepted, status, body) + + var routeBody string + if !assert.EventuallyWithT(t, func(c *assert.CollectT) { + amConfig, status, body := apiClient.GetAlertmanagerConfigWithStatus(t) + routeBody = body + if !assert.Equalf(t, http.StatusOK, status, body) { + return + } + route := amConfig.AlertmanagerConfig.Route + + if !assert.Len(c, route.Routes, 1) { + return + } + // Check that we are in the auto-generated root + autogenRoute := route.Routes[0] + if !assert.Len(c, autogenRoute.ObjectMatchers, 1) { + return + } + if !assert.Equal(c, ngmodels.AutogeneratedRouteLabel, autogenRoute.ObjectMatchers[0].Name) { + return + } + // Now check that the second level is route for receivers + if !assert.NotEmpty(c, autogenRoute.Routes) { + return + } + // There can be many routes, for all receivers + idx := slices.IndexFunc(autogenRoute.Routes, func(route *apimodels.Route) bool { + return route.Receiver == notificationSettings.Receiver + }) + if !assert.GreaterOrEqual(t, idx, 0) { + return + } + receiverRoute := autogenRoute.Routes[idx] + if !assert.Empty(c, receiverRoute.Routes) { + return + } + }, 10*time.Second, 1*time.Second) { + t.Logf("config: %s", routeBody) + } + }) +} diff --git a/pkg/tests/api/alerting/test-data/rule-notification-settings-1-post.json b/pkg/tests/api/alerting/test-data/rule-notification-settings-1-post.json new file mode 100644 index 00000000000..0331fd94e27 --- /dev/null +++ b/pkg/tests/api/alerting/test-data/rule-notification-settings-1-post.json @@ -0,0 +1,58 @@ +{ + "ruleGroup" : { + "name": "Group1", + "interval": "1m", + "rules": [ + { + "for": "0", + "labels": { + "label1": "test-label" + }, + "annotations": { + "annotation": "test-annotation" + }, + "grafana_alert": { + "title": "Rule1", + "condition": "A", + "data": [ + { + "refId": "A", + "datasourceUid": "__expr__", + "model": { + "expression": "0 > 0", + "type": "math" + } + } + ], + "no_data_state": "NoData", + "exec_err_state": "Alerting", + "notification_settings": { + "receiver": "rule-receiver", + "group_by": [ + "alertname", + "grafana_folder", + "label1" + ], + "group_wait": "100ms", + "group_interval": "5s", + "repeat_interval": "1d", + "mute_time_intervals": [ + "rule-time-interval" + ] + } + } + } + ] + }, + "receiver": { + "name": "rule-receiver", + "type": "webhook", + "settings": { + "url": "http://localhost:3000/_callback" + } + }, + "timeInterval": { + "name": "rule-time-interval", + "time_intervals":[{"times":[{"start_time":"10:00","end_time":"12:00"}]}] + } +} \ No newline at end of file diff --git a/pkg/tests/api/alerting/testing.go b/pkg/tests/api/alerting/testing.go index 7b1cb6318de..208e7856a38 100644 --- a/pkg/tests/api/alerting/testing.go +++ b/pkg/tests/api/alerting/testing.go @@ -226,13 +226,14 @@ func convertGettableGrafanaRuleToPostable(gettable *apimodels.GettableGrafanaRul return nil } return &apimodels.PostableGrafanaRule{ - Title: gettable.Title, - Condition: gettable.Condition, - Data: gettable.Data, - UID: gettable.UID, - NoDataState: gettable.NoDataState, - ExecErrState: gettable.ExecErrState, - IsPaused: &gettable.IsPaused, + Title: gettable.Title, + Condition: gettable.Condition, + Data: gettable.Data, + UID: gettable.UID, + NoDataState: gettable.NoDataState, + ExecErrState: gettable.ExecErrState, + IsPaused: &gettable.IsPaused, + NotificationSettings: gettable.NotificationSettings, } } @@ -711,6 +712,13 @@ func (a apiClient) CreateMuteTimingWithStatus(t *testing.T, interval apimodels.M return sendRequest[apimodels.MuteTimeInterval](t, req, http.StatusCreated) } +func (a apiClient) EnsureMuteTiming(t *testing.T, interval apimodels.MuteTimeInterval) { + t.Helper() + + _, status, body := a.CreateMuteTimingWithStatus(t, interval) + require.Equalf(t, http.StatusCreated, status, body) +} + func (a apiClient) UpdateMuteTimingWithStatus(t *testing.T, interval apimodels.MuteTimeInterval) (apimodels.MuteTimeInterval, int, string) { t.Helper() @@ -810,6 +818,43 @@ func (a apiClient) GetTimeIntervalByNameWithStatus(t *testing.T, name string) (a return sendRequest[apimodels.GettableTimeIntervals](t, req, http.StatusOK) } +func (a apiClient) CreateReceiverWithStatus(t *testing.T, receiver apimodels.EmbeddedContactPoint) (apimodels.EmbeddedContactPoint, int, string) { + t.Helper() + + buf := bytes.Buffer{} + enc := json.NewEncoder(&buf) + err := enc.Encode(receiver) + require.NoError(t, err) + + req, err := http.NewRequest(http.MethodPost, fmt.Sprintf("%s/api/v1/provisioning/contact-points", a.url), &buf) + req.Header.Add("Content-Type", "application/json") + require.NoError(t, err) + + return sendRequest[apimodels.EmbeddedContactPoint](t, req, http.StatusAccepted) +} + +func (a apiClient) EnsureReceiver(t *testing.T, receiver apimodels.EmbeddedContactPoint) { + t.Helper() + + _, status, body := a.CreateReceiverWithStatus(t, receiver) + require.Equalf(t, http.StatusAccepted, status, body) +} + +func (a apiClient) GetAlertmanagerConfigWithStatus(t *testing.T) (apimodels.GettableUserConfig, int, string) { + t.Helper() + req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("%s/api/alertmanager/grafana/config/api/v1/alerts", a.url), nil) + require.NoError(t, err) + + return sendRequest[apimodels.GettableUserConfig](t, req, http.StatusOK) +} + +func (a apiClient) GetActiveAlertsWithStatus(t *testing.T) (apimodels.AlertGroups, int, string) { + t.Helper() + req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("%s/api/alertmanager/grafana/api/v2/alerts/groups", a.url), nil) + require.NoError(t, err) + return sendRequest[apimodels.AlertGroups](t, req, http.StatusOK) +} + func sendRequest[T any](t *testing.T, req *http.Request, successStatusCode int) (T, int, string) { client := &http.Client{} resp, err := client.Do(req)