diff --git a/pkg/services/ngalert/api/api_provisioning.go b/pkg/services/ngalert/api/api_provisioning.go index 935cf1bc8d3..06805da5b19 100644 --- a/pkg/services/ngalert/api/api_provisioning.go +++ b/pkg/services/ngalert/api/api_provisioning.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "net/http" + "regexp" "strings" "github.com/grafana/grafana/pkg/api/response" @@ -19,6 +20,7 @@ import ( "github.com/grafana/grafana/pkg/services/ngalert/provisioning" "github.com/grafana/grafana/pkg/services/ngalert/store" "github.com/grafana/grafana/pkg/util" + alertmanager_config "github.com/prometheus/alertmanager/config" ) const disableProvenanceHeaderName = "X-Disable-Provenance" @@ -600,10 +602,67 @@ func escapeAlertingFileExport(body definitions.AlertingFileExport) definitions.A for i, group := range body.Groups { body.Groups[i] = escapeRuleGroup(group) } - // TODO: implement escaping for the other export fields + for i, cp := range body.ContactPoints { + body.ContactPoints[i] = escapeContactPoint(cp) + } + for i, np := range body.Policies { + body.Policies[i] = escapeNotificationPolicy(np) + } return body } +func escapeRouteExport(r *definitions.RouteExport) { + r.Receiver = addEscapeCharactersToString(r.Receiver) + if r.GroupByStr != nil { + groupByStr := make([]string, len(*r.GroupByStr)) + for i, groupBy := range *r.GroupByStr { + groupByStr[i] = addEscapeCharactersToString(groupBy) + } + r.GroupByStr = &groupByStr + } + for k, v := range r.Match { + r.Match[k] = addEscapeCharactersToString(v) + } + for k, v := range r.MatchRE { + // convert regex to string, escape then covert back to regex + stringRepr := addEscapeCharactersToString(v.String()) + mutated := regexp.MustCompile(stringRepr) + r.MatchRE[k] = alertmanager_config.Regexp{Regexp: mutated} + } + if r.MuteTimeIntervals != nil { + muteTimeIntervals := make([]string, len(*r.MuteTimeIntervals)) + for i, muteTimeInterval := range *r.MuteTimeIntervals { + muteTimeIntervals[i] = addEscapeCharactersToString(muteTimeInterval) + } + r.MuteTimeIntervals = &muteTimeIntervals + } + for i := range r.Routes { + escapeRouteExport(r.Routes[i]) + } +} + +func escapeNotificationPolicy(np definitions.NotificationPolicyExport) definitions.NotificationPolicyExport { + escapeRouteExport(np.RouteExport) + return np +} + +func escapeContactPoint(cp definitions.ContactPointExport) definitions.ContactPointExport { + cp.Name = addEscapeCharactersToString(cp.Name) + for i, receiver := range cp.Receivers { + settingsJson, err := receiver.Settings.MarshalJSON() + if err != nil { + // This should never happen, as the settings are already marshaled to JSON in the API + panic(fmt.Errorf("failed to marshal settings to JSON: %w", err)) + } + settingsEscaped := []byte(addEscapeCharactersToString(string(settingsJson))) + if err := cp.Receivers[i].Settings.UnmarshalJSON(settingsEscaped); err != nil { + // This should never happen, as the settings are already marshaled to JSON in the API + panic(fmt.Errorf("failed to unmarshal settings from JSON: %w", err)) + } + } + return cp +} + // escape all strings except: // Alert rule annotations: groups[].rules[].annotations // Alert rule time range: groups[].rules[].relativeTimeRange diff --git a/pkg/tests/api/alerting/api_provisioning_test.go b/pkg/tests/api/alerting/api_provisioning_test.go index a2c151a53eb..ec9cda827b8 100644 --- a/pkg/tests/api/alerting/api_provisioning_test.go +++ b/pkg/tests/api/alerting/api_provisioning_test.go @@ -877,4 +877,136 @@ func TestIntegrationExportFileProvision(t *testing.T) { require.YAMLEq(t, string(expectedYaml), exportRaw) }) }) + t.Run("when provisioning mute times from files", func(t *testing.T) { + // add file provisioned mute times + fileProvisionedMuteTimings, err := testData.ReadFile(path.Join("test-data", "provisioning-mute-times.yaml")) + require.NoError(t, err) + + var expected definitions.AlertingFileExport + require.NoError(t, yaml.Unmarshal(fileProvisionedMuteTimings, &expected)) + expected.MuteTimings[0].OrgID = 1 // HACK to deal with weird goyaml behavior + expectedYamlRaw, err := yaml.Marshal(expected) + require.NoError(t, err) + + err = os.WriteFile(filepath.Join(alertingDir, "provisioning-mute-times.yaml"), fileProvisionedMuteTimings, 0750) + require.NoError(t, err) + + apiClient.ReloadAlertingFileProvisioning(t) + + t.Run("exported mute times shouldn't escape $ characters", func(t *testing.T) { + // call export endpoint + exportRaw := apiClient.ExportMuteTiming(t, "$mute_time_a", "yaml") + var export definitions.AlertingFileExport + require.NoError(t, yaml.Unmarshal([]byte(exportRaw), &export)) + expectedYaml := string(expectedYamlRaw) + // verify the file exported matches the file provisioned thing + require.Len(t, export.MuteTimings, 1) + require.YAMLEq(t, expectedYaml, exportRaw) + }) + }) +} + +func TestIntegrationExportFileProvisionMixed(t *testing.T) { + dir, p := testinfra.CreateGrafDir(t, testinfra.GrafanaOpts{ + DisableLegacyAlerting: true, + EnableUnifiedAlerting: true, + DisableAnonymous: true, + AppModeProduction: true, + }) + + provisioningDir := filepath.Join(dir, "conf", "provisioning") + alertingDir := filepath.Join(provisioningDir, "alerting") + err := os.MkdirAll(alertingDir, 0750) + require.NoError(t, err) + + grafanaListedAddr, env := testinfra.StartGrafanaEnv(t, dir, p) + + apiClient := newAlertingApiClient(grafanaListedAddr, "admin", "admin") + createUser(t, env.SQLStore, env.Cfg, user.CreateUserCommand{ + DefaultOrgRole: string(org.RoleAdmin), + Password: "admin", + Login: "admin", + IsAdmin: true, + }) + + apiClient.ReloadCachedPermissions(t) + t.Run("when provisioning mixed set of alerting configurations from files", func(t *testing.T) { + // add file provisioned mixed set of alerting configurations + fileProvisionedResources, err := testData.ReadFile(path.Join("test-data", "provisioning-mixed-set.yaml")) + require.NoError(t, err) + + var expected definitions.AlertingFileExport + require.NoError(t, yaml.Unmarshal(fileProvisionedResources, &expected)) + expected.MuteTimings[0].OrgID = 1 // HACK to deal with weird goyaml behavior + + err = os.WriteFile(filepath.Join(alertingDir, "provisioning-mixed-set.yaml"), fileProvisionedResources, 0750) + require.NoError(t, err) + + apiClient.ReloadAlertingFileProvisioning(t) + + t.Run("exported notification policy matches imported", func(t *testing.T) { + notificationPolicyExpected := expected + notificationPolicyExpected.MuteTimings = nil + notificationPolicyExpected.ContactPoints = nil + notificationPolicyExpected.Groups = nil + serializedExpected, err := yaml.Marshal(notificationPolicyExpected) + require.NoError(t, err) + + actual := apiClient.ExportNotificationPolicy(t, "yaml") + + require.YAMLEq(t, string(serializedExpected), actual) + }) + }) +} + +func TestIntegrationExportFileProvisionContactPoints(t *testing.T) { + dir, p := testinfra.CreateGrafDir(t, testinfra.GrafanaOpts{ + DisableLegacyAlerting: true, + EnableUnifiedAlerting: true, + DisableAnonymous: true, + AppModeProduction: true, + }) + + provisioningDir := filepath.Join(dir, "conf", "provisioning") + alertingDir := filepath.Join(provisioningDir, "alerting") + err := os.MkdirAll(alertingDir, 0750) + require.NoError(t, err) + + grafanaListedAddr, env := testinfra.StartGrafanaEnv(t, dir, p) + + apiClient := newAlertingApiClient(grafanaListedAddr, "admin", "admin") + createUser(t, env.SQLStore, env.Cfg, user.CreateUserCommand{ + DefaultOrgRole: string(org.RoleAdmin), + Password: "admin", + Login: "admin", + IsAdmin: true, + }) + + apiClient.ReloadCachedPermissions(t) + t.Run("when provisioning contact points from files", func(t *testing.T) { + // add file provisioned contact points + fileProvisionedContactPoints, err := testData.ReadFile(path.Join("test-data", "provisioning-contact-points.yaml")) + require.NoError(t, err) + + var expected definitions.AlertingFileExport + require.NoError(t, yaml.Unmarshal(fileProvisionedContactPoints, &expected)) + expectedYaml, err := yaml.Marshal(expected) + require.NoError(t, err) + + err = os.WriteFile(filepath.Join(alertingDir, "provisioning-contact-points.yaml"), fileProvisionedContactPoints, 0750) + require.NoError(t, err) + + apiClient.ReloadAlertingFileProvisioning(t) + + t.Run("exported contact points should escape $ characters", func(t *testing.T) { + // call export endpoint + exportRaw := apiClient.ExportReceiver(t, "cp_1_$escaped", "yaml", true) + var export definitions.AlertingFileExport + require.NoError(t, yaml.Unmarshal([]byte(exportRaw), &export)) + + // verify the file exported matches the file provisioned thing + require.Len(t, export.ContactPoints, 1) + require.YAMLEq(t, string(expectedYaml), exportRaw) + }) + }) } diff --git a/pkg/tests/api/alerting/test-data/provisioning-contact-points.yaml b/pkg/tests/api/alerting/test-data/provisioning-contact-points.yaml new file mode 100644 index 00000000000..0a759626c4c --- /dev/null +++ b/pkg/tests/api/alerting/test-data/provisioning-contact-points.yaml @@ -0,0 +1,21 @@ +# config file version +apiVersion: 1 + +# List of contact points to import or update +contactPoints: + # organization ID, default = 1 + - orgId: 1 + # name of the contact point + name: cp_1_$$escaped + + receivers: + # unique identifier for the receiver. Should not exceed 40 symbols. Only letters, numbers, - (hyphen), and _ (underscore) allowed. + - uid: first_uid + # type of the receiver + type: prometheus-alertmanager + # Disable the additional [Incident Resolved] follow-up alert, default = false + disableResolveMessage: false + # settings for the specific receiver type + settings: + url: http://test:9000 + something: $$escaped diff --git a/pkg/tests/api/alerting/test-data/provisioning-mixed-set.yaml b/pkg/tests/api/alerting/test-data/provisioning-mixed-set.yaml new file mode 100644 index 00000000000..b8845d29548 --- /dev/null +++ b/pkg/tests/api/alerting/test-data/provisioning-mixed-set.yaml @@ -0,0 +1,159 @@ +# config file version +apiVersion: 1 + +contactPoints: + # organization ID, default = 1 + - orgId: 1 + # name of the contact point + name: $$xyz + + receivers: + # unique identifier for the receiver. Should not exceed 40 symbols. Only letters, numbers, - (hyphen), and _ (underscore) allowed. + - uid: first_uid + # type of the receiver + type: prometheus-alertmanager + # Disable the additional [Incident Resolved] follow-up alert, default = false + disableResolveMessage: false + # settings for the specific receiver type + settings: + url: http://test:9000 + something: $$escaped + +muteTimes: + # organization ID, default = 1 + - orgId: 1 + # name of the mute time interval, must be unique + name: $mute_time_1 + # time intervals that should trigger the muting + # refer to https://prometheus.io/docs/alerting/latest/configuration/#time_interval-0 + time_intervals: + - times: + - start_time: "06:00" + end_time: "23:59" + location: "UTC" + weekdays: ["monday:wednesday", "saturday", "sunday"] + months: ["1:3", "may:august", "december"] + years: ["2020:2022", "2030"] + days_of_month: ["1:5", "-3:-1"] + - orgId: 1 + # name of the mute time interval, must be unique + name: $mute_time_2 + # time intervals that should trigger the muting + # refer to https://prometheus.io/docs/alerting/latest/configuration/#time_interval-0 + time_intervals: + - times: + - start_time: "09:00" + end_time: "10:00" + location: "UTC" + weekdays: ["monday:wednesday", "saturday", "sunday"] + months: ["1:3", "may:august", "december"] + years: ["2020:2022", "2030"] + days_of_month: ["1:5", "-3:-1"] + +# ONLY THESE PATHS ARE NOT TEMPLATED and therefore don't need escaping: +# Alert rule annotations: groups[].rules[].annotations +# Alert rule time range: groups[].rules[].relativeTimeRange +# Alert rule query model: groups[].rules[].data.model +groups: + # organization ID, default = 1 + - orgId: 1 + # name of the rule group + name: my_rule_group + # name of the folder the rule group will be stored in + folder: my_first_folder_with_$$escaped_symbols + # interval that the rule group should evaluated at + interval: 60s + # list of rules that are part of the rule group + rules: + # unique identifier for the rule. Should not exceed 40 symbols. Only letters, numbers, - (hyphen), and _ (underscore) allowed. + - uid: my_id_1 + # title of the rule that will be displayed in the UI + title: my_first_rule_with_$$escaped_symbols + # which query should be used for the condition + condition: A + # list of query objects that should be executed on each + # evaluation - should be obtained through the API + data: + - refId: A + datasourceUid: "__expr__" + model: + conditions: + - evaluator: + params: + - 3 + type: gt + operator: + type: and + query: + params: + - A + reducer: + type: last + type: query + datasource: + type: __expr__ + uid: "__expr__" + expression: 1==0 + intervalMs: 1000 + maxDataPoints: 43200 + refId: A + type: math + # UID of a dashboard that the alert rule should be linked to + dashboardUid: my_dashboard + # ID of the panel that the alert rule should be linked to + panelId: 123 + # the state the alert rule will have when no data is returned + # possible values: "NoData", "Alerting", "OK", default = NoData + noDataState: Alerting + # the state the alert rule will have when the query execution + # failed - possible values: "Error", "Alerting", "OK" + # default = Alerting + execErrState: Alerting + # for how long should the alert fire before alerting + for: 60s + # > a map of strings to pass around any data + annotations: + some_key: some_value + $no_escaping_needed: $no_escaping_needed + # a map of strings that can be used to filter and + # route alerts + labels: + team: sre_team_1 + label_keys_not_$escaped: $$escaped_value + something: "escaped in the middle of things $$value" + templated: "{{ $$labels.team }}" + middle: "u$$ing_escaped_symbols" + notification_settings: + receiver: $$xyz + group_by: + - label_keys_not_$$escaped + - something + group_wait: 5m + group_interval: 10m + repeat_interval: 10m + mute_time_intervals: + - $mute_time_1 + - $mute_time_2 + +policies: + # organization ID, default = 1 + - orgId: 1 + # name of the contact point that should be used for this route + receiver: $$xyz + group_by: + - label_keys_not_$$escaped + # a list of prometheus-like matchers that an alert rule has to fulfill to match the node (allowed chars + # [a-zA-Z_:]) + matchers: + - alertname = Watchdog + - service_id_X = serviceX + - severity =~ "warning|critical" + # a list of grafana-like matchers that an alert rule has to fulfill to match the node + object_matchers: + - ["alertname", "=", "CPUUsage"] + - ["service_id-X", "=", "serviceX"] + - ["severity", "=~", "warning|critical"] + group_wait: 30s + group_interval: 5m + repeat_interval: 4h + routes: [] diff --git a/pkg/tests/api/alerting/test-data/provisioning-mute-times.yaml b/pkg/tests/api/alerting/test-data/provisioning-mute-times.yaml new file mode 100644 index 00000000000..3dc4d14872a --- /dev/null +++ b/pkg/tests/api/alerting/test-data/provisioning-mute-times.yaml @@ -0,0 +1,20 @@ +# config file version +apiVersion: 1 + +# ONLY THESE PATHS ARE NOT TEMPLATED and therefore don't need escaping: +# Mute timings name: muteTimes[].name +# Mute timings time intervals: muteTimes[].time_intervals[] +muteTimes: + - orgId: 1 + name: $mute_time_a + # time intervals that should trigger the muting + # refer to https://prometheus.io/docs/alerting/latest/configuration/#time_interval-0 + time_intervals: + - times: + - start_time: "06:00" + end_time: "23:59" + location: "UTC" + weekdays: ["monday:wednesday", "saturday", "sunday"] + months: ["1:3", "may:august", "december"] + years: ["2020:2022", "2030"] + days_of_month: ["1:5", "-3:-1"] diff --git a/pkg/tests/api/alerting/testing.go b/pkg/tests/api/alerting/testing.go index f599ec060a1..c66281d7af7 100644 --- a/pkg/tests/api/alerting/testing.go +++ b/pkg/tests/api/alerting/testing.go @@ -839,6 +839,32 @@ func (a apiClient) DeleteMuteTimingWithStatus(t *testing.T, name string) (int, s return resp.StatusCode, string(body) } +func (a apiClient) ExportMuteTiming(t *testing.T, name string, format string) string { + t.Helper() + + u, err := url.Parse(fmt.Sprintf("%s/api/v1/provisioning/mute-timings/%s/export", a.url, name)) + require.NoError(t, err) + q := url.Values{} + q.Set("format", format) + u.RawQuery = q.Encode() + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + require.NoError(t, err) + + client := &http.Client{} + resp, err := client.Do(req) + require.NoError(t, err) + + defer func() { + _ = resp.Body.Close() + }() + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + + requireStatusCode(t, http.StatusOK, resp.StatusCode, string(body)) + return string(body) +} + func (a apiClient) GetRouteWithStatus(t *testing.T) (apimodels.Route, int, string) { t.Helper() @@ -883,6 +909,32 @@ func (a apiClient) UpdateRouteWithStatus(t *testing.T, route apimodels.Route, no return resp.StatusCode, string(body) } +func (a apiClient) ExportNotificationPolicy(t *testing.T, format string) string { + t.Helper() + + u, err := url.Parse(fmt.Sprintf("%s/api/v1/provisioning/policies/export", a.url)) + require.NoError(t, err) + q := url.Values{} + q.Set("format", format) + u.RawQuery = q.Encode() + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + require.NoError(t, err) + + client := &http.Client{} + resp, err := client.Do(req) + require.NoError(t, err) + + defer func() { + _ = resp.Body.Close() + }() + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + + requireStatusCode(t, http.StatusOK, resp.StatusCode, string(body)) + return string(body) +} + func (a apiClient) UpdateRoute(t *testing.T, route apimodels.Route, noProvenance bool) { t.Helper() status, data := a.UpdateRouteWithStatus(t, route, noProvenance)