Alerting: Correctly escape provisioning API exports (#99039)

When exporting contact-points, mute-timings, and notification policies in the provisioning API, we need to escape the `$` character which is used in interpolation by file provisioning.

Follow up to #97985
This commit is contained in:
Moustafa Baiou 2025-01-27 14:59:50 -05:00 committed by GitHub
parent d71904cb27
commit 82f457495a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 444 additions and 1 deletions

View File

@ -5,6 +5,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"net/http" "net/http"
"regexp"
"strings" "strings"
"github.com/grafana/grafana/pkg/api/response" "github.com/grafana/grafana/pkg/api/response"
@ -19,6 +20,7 @@ import (
"github.com/grafana/grafana/pkg/services/ngalert/provisioning" "github.com/grafana/grafana/pkg/services/ngalert/provisioning"
"github.com/grafana/grafana/pkg/services/ngalert/store" "github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/util" "github.com/grafana/grafana/pkg/util"
alertmanager_config "github.com/prometheus/alertmanager/config"
) )
const disableProvenanceHeaderName = "X-Disable-Provenance" const disableProvenanceHeaderName = "X-Disable-Provenance"
@ -600,10 +602,67 @@ func escapeAlertingFileExport(body definitions.AlertingFileExport) definitions.A
for i, group := range body.Groups { for i, group := range body.Groups {
body.Groups[i] = escapeRuleGroup(group) body.Groups[i] = escapeRuleGroup(group)
} }
// TODO: implement escaping for the other export fields for i, cp := range body.ContactPoints {
body.ContactPoints[i] = escapeContactPoint(cp)
}
for i, np := range body.Policies {
body.Policies[i] = escapeNotificationPolicy(np)
}
return body return body
} }
func escapeRouteExport(r *definitions.RouteExport) {
r.Receiver = addEscapeCharactersToString(r.Receiver)
if r.GroupByStr != nil {
groupByStr := make([]string, len(*r.GroupByStr))
for i, groupBy := range *r.GroupByStr {
groupByStr[i] = addEscapeCharactersToString(groupBy)
}
r.GroupByStr = &groupByStr
}
for k, v := range r.Match {
r.Match[k] = addEscapeCharactersToString(v)
}
for k, v := range r.MatchRE {
// convert regex to string, escape then covert back to regex
stringRepr := addEscapeCharactersToString(v.String())
mutated := regexp.MustCompile(stringRepr)
r.MatchRE[k] = alertmanager_config.Regexp{Regexp: mutated}
}
if r.MuteTimeIntervals != nil {
muteTimeIntervals := make([]string, len(*r.MuteTimeIntervals))
for i, muteTimeInterval := range *r.MuteTimeIntervals {
muteTimeIntervals[i] = addEscapeCharactersToString(muteTimeInterval)
}
r.MuteTimeIntervals = &muteTimeIntervals
}
for i := range r.Routes {
escapeRouteExport(r.Routes[i])
}
}
func escapeNotificationPolicy(np definitions.NotificationPolicyExport) definitions.NotificationPolicyExport {
escapeRouteExport(np.RouteExport)
return np
}
func escapeContactPoint(cp definitions.ContactPointExport) definitions.ContactPointExport {
cp.Name = addEscapeCharactersToString(cp.Name)
for i, receiver := range cp.Receivers {
settingsJson, err := receiver.Settings.MarshalJSON()
if err != nil {
// This should never happen, as the settings are already marshaled to JSON in the API
panic(fmt.Errorf("failed to marshal settings to JSON: %w", err))
}
settingsEscaped := []byte(addEscapeCharactersToString(string(settingsJson)))
if err := cp.Receivers[i].Settings.UnmarshalJSON(settingsEscaped); err != nil {
// This should never happen, as the settings are already marshaled to JSON in the API
panic(fmt.Errorf("failed to unmarshal settings from JSON: %w", err))
}
}
return cp
}
// escape all strings except: // escape all strings except:
// Alert rule annotations: groups[].rules[].annotations // Alert rule annotations: groups[].rules[].annotations
// Alert rule time range: groups[].rules[].relativeTimeRange // Alert rule time range: groups[].rules[].relativeTimeRange

View File

@ -877,4 +877,136 @@ func TestIntegrationExportFileProvision(t *testing.T) {
require.YAMLEq(t, string(expectedYaml), exportRaw) require.YAMLEq(t, string(expectedYaml), exportRaw)
}) })
}) })
t.Run("when provisioning mute times from files", func(t *testing.T) {
// add file provisioned mute times
fileProvisionedMuteTimings, err := testData.ReadFile(path.Join("test-data", "provisioning-mute-times.yaml"))
require.NoError(t, err)
var expected definitions.AlertingFileExport
require.NoError(t, yaml.Unmarshal(fileProvisionedMuteTimings, &expected))
expected.MuteTimings[0].OrgID = 1 // HACK to deal with weird goyaml behavior
expectedYamlRaw, err := yaml.Marshal(expected)
require.NoError(t, err)
err = os.WriteFile(filepath.Join(alertingDir, "provisioning-mute-times.yaml"), fileProvisionedMuteTimings, 0750)
require.NoError(t, err)
apiClient.ReloadAlertingFileProvisioning(t)
t.Run("exported mute times shouldn't escape $ characters", func(t *testing.T) {
// call export endpoint
exportRaw := apiClient.ExportMuteTiming(t, "$mute_time_a", "yaml")
var export definitions.AlertingFileExport
require.NoError(t, yaml.Unmarshal([]byte(exportRaw), &export))
expectedYaml := string(expectedYamlRaw)
// verify the file exported matches the file provisioned thing
require.Len(t, export.MuteTimings, 1)
require.YAMLEq(t, expectedYaml, exportRaw)
})
})
}
func TestIntegrationExportFileProvisionMixed(t *testing.T) {
dir, p := testinfra.CreateGrafDir(t, testinfra.GrafanaOpts{
DisableLegacyAlerting: true,
EnableUnifiedAlerting: true,
DisableAnonymous: true,
AppModeProduction: true,
})
provisioningDir := filepath.Join(dir, "conf", "provisioning")
alertingDir := filepath.Join(provisioningDir, "alerting")
err := os.MkdirAll(alertingDir, 0750)
require.NoError(t, err)
grafanaListedAddr, env := testinfra.StartGrafanaEnv(t, dir, p)
apiClient := newAlertingApiClient(grafanaListedAddr, "admin", "admin")
createUser(t, env.SQLStore, env.Cfg, user.CreateUserCommand{
DefaultOrgRole: string(org.RoleAdmin),
Password: "admin",
Login: "admin",
IsAdmin: true,
})
apiClient.ReloadCachedPermissions(t)
t.Run("when provisioning mixed set of alerting configurations from files", func(t *testing.T) {
// add file provisioned mixed set of alerting configurations
fileProvisionedResources, err := testData.ReadFile(path.Join("test-data", "provisioning-mixed-set.yaml"))
require.NoError(t, err)
var expected definitions.AlertingFileExport
require.NoError(t, yaml.Unmarshal(fileProvisionedResources, &expected))
expected.MuteTimings[0].OrgID = 1 // HACK to deal with weird goyaml behavior
err = os.WriteFile(filepath.Join(alertingDir, "provisioning-mixed-set.yaml"), fileProvisionedResources, 0750)
require.NoError(t, err)
apiClient.ReloadAlertingFileProvisioning(t)
t.Run("exported notification policy matches imported", func(t *testing.T) {
notificationPolicyExpected := expected
notificationPolicyExpected.MuteTimings = nil
notificationPolicyExpected.ContactPoints = nil
notificationPolicyExpected.Groups = nil
serializedExpected, err := yaml.Marshal(notificationPolicyExpected)
require.NoError(t, err)
actual := apiClient.ExportNotificationPolicy(t, "yaml")
require.YAMLEq(t, string(serializedExpected), actual)
})
})
}
func TestIntegrationExportFileProvisionContactPoints(t *testing.T) {
dir, p := testinfra.CreateGrafDir(t, testinfra.GrafanaOpts{
DisableLegacyAlerting: true,
EnableUnifiedAlerting: true,
DisableAnonymous: true,
AppModeProduction: true,
})
provisioningDir := filepath.Join(dir, "conf", "provisioning")
alertingDir := filepath.Join(provisioningDir, "alerting")
err := os.MkdirAll(alertingDir, 0750)
require.NoError(t, err)
grafanaListedAddr, env := testinfra.StartGrafanaEnv(t, dir, p)
apiClient := newAlertingApiClient(grafanaListedAddr, "admin", "admin")
createUser(t, env.SQLStore, env.Cfg, user.CreateUserCommand{
DefaultOrgRole: string(org.RoleAdmin),
Password: "admin",
Login: "admin",
IsAdmin: true,
})
apiClient.ReloadCachedPermissions(t)
t.Run("when provisioning contact points from files", func(t *testing.T) {
// add file provisioned contact points
fileProvisionedContactPoints, err := testData.ReadFile(path.Join("test-data", "provisioning-contact-points.yaml"))
require.NoError(t, err)
var expected definitions.AlertingFileExport
require.NoError(t, yaml.Unmarshal(fileProvisionedContactPoints, &expected))
expectedYaml, err := yaml.Marshal(expected)
require.NoError(t, err)
err = os.WriteFile(filepath.Join(alertingDir, "provisioning-contact-points.yaml"), fileProvisionedContactPoints, 0750)
require.NoError(t, err)
apiClient.ReloadAlertingFileProvisioning(t)
t.Run("exported contact points should escape $ characters", func(t *testing.T) {
// call export endpoint
exportRaw := apiClient.ExportReceiver(t, "cp_1_$escaped", "yaml", true)
var export definitions.AlertingFileExport
require.NoError(t, yaml.Unmarshal([]byte(exportRaw), &export))
// verify the file exported matches the file provisioned thing
require.Len(t, export.ContactPoints, 1)
require.YAMLEq(t, string(expectedYaml), exportRaw)
})
})
} }

View File

@ -0,0 +1,21 @@
# config file version
apiVersion: 1
# List of contact points to import or update
contactPoints:
# <int> organization ID, default = 1
- orgId: 1
# <string, required> name of the contact point
name: cp_1_$$escaped
receivers:
# <string, required> unique identifier for the receiver. Should not exceed 40 symbols. Only letters, numbers, - (hyphen), and _ (underscore) allowed.
- uid: first_uid
# <string, required> type of the receiver
type: prometheus-alertmanager
# <bool, optional> Disable the additional [Incident Resolved] follow-up alert, default = false
disableResolveMessage: false
# <object, required> settings for the specific receiver type
settings:
url: http://test:9000
something: $$escaped

View File

@ -0,0 +1,159 @@
# config file version
apiVersion: 1
contactPoints:
# <int> organization ID, default = 1
- orgId: 1
# <string, required> name of the contact point
name: $$xyz
receivers:
# <string, required> unique identifier for the receiver. Should not exceed 40 symbols. Only letters, numbers, - (hyphen), and _ (underscore) allowed.
- uid: first_uid
# <string, required> type of the receiver
type: prometheus-alertmanager
# <bool, optional> Disable the additional [Incident Resolved] follow-up alert, default = false
disableResolveMessage: false
# <object, required> settings for the specific receiver type
settings:
url: http://test:9000
something: $$escaped
muteTimes:
# <int> organization ID, default = 1
- orgId: 1
# <string, required> name of the mute time interval, must be unique
name: $mute_time_1
# <list> time intervals that should trigger the muting
# refer to https://prometheus.io/docs/alerting/latest/configuration/#time_interval-0
time_intervals:
- times:
- start_time: "06:00"
end_time: "23:59"
location: "UTC"
weekdays: ["monday:wednesday", "saturday", "sunday"]
months: ["1:3", "may:august", "december"]
years: ["2020:2022", "2030"]
days_of_month: ["1:5", "-3:-1"]
- orgId: 1
# <string, required> name of the mute time interval, must be unique
name: $mute_time_2
# <list> time intervals that should trigger the muting
# refer to https://prometheus.io/docs/alerting/latest/configuration/#time_interval-0
time_intervals:
- times:
- start_time: "09:00"
end_time: "10:00"
location: "UTC"
weekdays: ["monday:wednesday", "saturday", "sunday"]
months: ["1:3", "may:august", "december"]
years: ["2020:2022", "2030"]
days_of_month: ["1:5", "-3:-1"]
# ONLY THESE PATHS ARE NOT TEMPLATED and therefore don't need escaping:
# Alert rule annotations: groups[].rules[].annotations
# Alert rule time range: groups[].rules[].relativeTimeRange
# Alert rule query model: groups[].rules[].data.model
groups:
# <int> organization ID, default = 1
- orgId: 1
# <string, required> name of the rule group
name: my_rule_group
# <string, required> name of the folder the rule group will be stored in
folder: my_first_folder_with_$$escaped_symbols
# <duration, required> interval that the rule group should evaluated at
interval: 60s
# <list, required> list of rules that are part of the rule group
rules:
# <string, required> unique identifier for the rule. Should not exceed 40 symbols. Only letters, numbers, - (hyphen), and _ (underscore) allowed.
- uid: my_id_1
# <string, required> title of the rule that will be displayed in the UI
title: my_first_rule_with_$$escaped_symbols
# <string, required> which query should be used for the condition
condition: A
# <list, required> list of query objects that should be executed on each
# evaluation - should be obtained through the API
data:
- refId: A
datasourceUid: "__expr__"
model:
conditions:
- evaluator:
params:
- 3
type: gt
operator:
type: and
query:
params:
- A
reducer:
type: last
type: query
datasource:
type: __expr__
uid: "__expr__"
expression: 1==0
intervalMs: 1000
maxDataPoints: 43200
refId: A
type: math
# <string> UID of a dashboard that the alert rule should be linked to
dashboardUid: my_dashboard
# <int> ID of the panel that the alert rule should be linked to
panelId: 123
# <string> the state the alert rule will have when no data is returned
# possible values: "NoData", "Alerting", "OK", default = NoData
noDataState: Alerting
# <string> the state the alert rule will have when the query execution
# failed - possible values: "Error", "Alerting", "OK"
# default = Alerting
execErrState: Alerting
# <duration, required> for how long should the alert fire before alerting
for: 60s
# <map<string, string>> a map of strings to pass around any data
annotations:
some_key: some_value
$no_escaping_needed: $no_escaping_needed
# <map<string, string> a map of strings that can be used to filter and
# route alerts
labels:
team: sre_team_1
label_keys_not_$escaped: $$escaped_value
something: "escaped in the middle of things $$value"
templated: "{{ $$labels.team }}"
middle: "u$$ing_escaped_symbols"
notification_settings:
receiver: $$xyz
group_by:
- label_keys_not_$$escaped
- something
group_wait: 5m
group_interval: 10m
repeat_interval: 10m
mute_time_intervals:
- $mute_time_1
- $mute_time_2
policies:
# <int> organization ID, default = 1
- orgId: 1
# <string> name of the contact point that should be used for this route
receiver: $$xyz
group_by:
- label_keys_not_$$escaped
# <list> a list of prometheus-like matchers that an alert rule has to fulfill to match the node (allowed chars
# [a-zA-Z_:])
matchers:
- alertname = Watchdog
- service_id_X = serviceX
- severity =~ "warning|critical"
# <list> a list of grafana-like matchers that an alert rule has to fulfill to match the node
object_matchers:
- ["alertname", "=", "CPUUsage"]
- ["service_id-X", "=", "serviceX"]
- ["severity", "=~", "warning|critical"]
group_wait: 30s
group_interval: 5m
repeat_interval: 4h
routes: []

View File

@ -0,0 +1,20 @@
# config file version
apiVersion: 1
# ONLY THESE PATHS ARE NOT TEMPLATED and therefore don't need escaping:
# Mute timings name: muteTimes[].name
# Mute timings time intervals: muteTimes[].time_intervals[]
muteTimes:
- orgId: 1
name: $mute_time_a
# <list> time intervals that should trigger the muting
# refer to https://prometheus.io/docs/alerting/latest/configuration/#time_interval-0
time_intervals:
- times:
- start_time: "06:00"
end_time: "23:59"
location: "UTC"
weekdays: ["monday:wednesday", "saturday", "sunday"]
months: ["1:3", "may:august", "december"]
years: ["2020:2022", "2030"]
days_of_month: ["1:5", "-3:-1"]

View File

@ -839,6 +839,32 @@ func (a apiClient) DeleteMuteTimingWithStatus(t *testing.T, name string) (int, s
return resp.StatusCode, string(body) return resp.StatusCode, string(body)
} }
func (a apiClient) ExportMuteTiming(t *testing.T, name string, format string) string {
t.Helper()
u, err := url.Parse(fmt.Sprintf("%s/api/v1/provisioning/mute-timings/%s/export", a.url, name))
require.NoError(t, err)
q := url.Values{}
q.Set("format", format)
u.RawQuery = q.Encode()
req, err := http.NewRequest(http.MethodGet, u.String(), nil)
require.NoError(t, err)
client := &http.Client{}
resp, err := client.Do(req)
require.NoError(t, err)
defer func() {
_ = resp.Body.Close()
}()
body, err := io.ReadAll(resp.Body)
require.NoError(t, err)
requireStatusCode(t, http.StatusOK, resp.StatusCode, string(body))
return string(body)
}
func (a apiClient) GetRouteWithStatus(t *testing.T) (apimodels.Route, int, string) { func (a apiClient) GetRouteWithStatus(t *testing.T) (apimodels.Route, int, string) {
t.Helper() t.Helper()
@ -883,6 +909,32 @@ func (a apiClient) UpdateRouteWithStatus(t *testing.T, route apimodels.Route, no
return resp.StatusCode, string(body) return resp.StatusCode, string(body)
} }
func (a apiClient) ExportNotificationPolicy(t *testing.T, format string) string {
t.Helper()
u, err := url.Parse(fmt.Sprintf("%s/api/v1/provisioning/policies/export", a.url))
require.NoError(t, err)
q := url.Values{}
q.Set("format", format)
u.RawQuery = q.Encode()
req, err := http.NewRequest(http.MethodGet, u.String(), nil)
require.NoError(t, err)
client := &http.Client{}
resp, err := client.Do(req)
require.NoError(t, err)
defer func() {
_ = resp.Body.Close()
}()
body, err := io.ReadAll(resp.Body)
require.NoError(t, err)
requireStatusCode(t, http.StatusOK, resp.StatusCode, string(body))
return string(body)
}
func (a apiClient) UpdateRoute(t *testing.T, route apimodels.Route, noProvenance bool) { func (a apiClient) UpdateRoute(t *testing.T, route apimodels.Route, noProvenance bool) {
t.Helper() t.Helper()
status, data := a.UpdateRouteWithStatus(t, route, noProvenance) status, data := a.UpdateRouteWithStatus(t, route, noProvenance)