Alerting: Convert 'Both' type Prometheus queries to 'Range' in migration (#70781)

* Alerting: Convert 'Both' type Prometheus queries to 'Range' in migration
This commit is contained in:
Matthew Jacobson 2023-06-28 14:02:57 -04:00 committed by GitHub
parent 0cd56627db
commit 00d5f7fed7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 108 additions and 3 deletions

View File

@ -149,6 +149,8 @@ longer supported.
1. The JSON format for webhook notifications has changed in Grafana Alerting and uses the format from [Prometheus Alertmanager](https://prometheus.io/docs/alerting/latest/configuration/#webhook_config). 1. The JSON format for webhook notifications has changed in Grafana Alerting and uses the format from [Prometheus Alertmanager](https://prometheus.io/docs/alerting/latest/configuration/#webhook_config).
1. Alerting on Prometheus `Both` type queries is not supported in Grafana Alerting. Existing legacy alerts with `Both` type queries are migrated to Grafana Alerting as alerts with `Range` type queries.
**Limitations** **Limitations**
1. Since `Hipchat` and `Sensu` notification channels are no longer supported, legacy alerts associated with these channels are not automatically migrated to Grafana Alerting. Assign the legacy alerts to a supported notification channel so that you continue to receive notifications for those alerts. 1. Since `Hipchat` and `Sensu` notification channels are no longer supported, legacy alerts associated with these channels are not automatically migrated to Grafana Alerting. Assign the legacy alerts to a supported notification channel so that you continue to receive notifications for those alerts.

View File

@ -111,7 +111,7 @@ func (m *migration) makeAlertRule(l log.Logger, cond condition, da dashAlert, fo
annotations["message"] = da.Message annotations["message"] = da.Message
var err error var err error
data, err := migrateAlertRuleQueries(cond.Data) data, err := migrateAlertRuleQueries(l, cond.Data)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to migrate alert rule queries: %w", err) return nil, fmt.Errorf("failed to migrate alert rule queries: %w", err)
} }
@ -168,7 +168,7 @@ func (m *migration) makeAlertRule(l log.Logger, cond condition, da dashAlert, fo
} }
// migrateAlertRuleQueries attempts to fix alert rule queries so they can work in unified alerting. Queries of some data sources are not compatible with unified alerting. // migrateAlertRuleQueries attempts to fix alert rule queries so they can work in unified alerting. Queries of some data sources are not compatible with unified alerting.
func migrateAlertRuleQueries(data []alertQuery) ([]alertQuery, error) { func migrateAlertRuleQueries(l log.Logger, data []alertQuery) ([]alertQuery, error) {
result := make([]alertQuery, 0, len(data)) result := make([]alertQuery, 0, len(data))
for _, d := range data { for _, d := range data {
// queries that are expression are not relevant, skip them. // queries that are expression are not relevant, skip them.
@ -184,6 +184,7 @@ func migrateAlertRuleQueries(data []alertQuery) ([]alertQuery, error) {
// remove hidden tag from the query (if exists) // remove hidden tag from the query (if exists)
delete(fixedData, "hide") delete(fixedData, "hide")
fixedData = fixGraphiteReferencedSubQueries(fixedData) fixedData = fixGraphiteReferencedSubQueries(fixedData)
fixedData = fixPrometheusBothTypeQuery(l, fixedData)
updatedModel, err := json.Marshal(fixedData) updatedModel, err := json.Marshal(fixedData)
if err != nil { if err != nil {
return nil, err return nil, err
@ -206,6 +207,76 @@ func fixGraphiteReferencedSubQueries(queryData map[string]json.RawMessage) map[s
return queryData return queryData
} }
// fixPrometheusBothTypeQuery converts Prometheus 'Both' type queries to range queries.
func fixPrometheusBothTypeQuery(l log.Logger, queryData map[string]json.RawMessage) map[string]json.RawMessage {
// There is the possibility to support this functionality by:
// - Splitting the query into two: one for instant and one for range.
// - Splitting the condition into two: one for each query, separated by OR.
// However, relying on a 'Both' query instead of multiple conditions to do this in legacy is likely
// to be unintentional. In addition, this would require more robust operator precedence in classic conditions.
// Given these reasons, we opt to convert them to range queries and log a warning.
var instant bool
if instantRaw, ok := queryData["instant"]; ok {
if err := json.Unmarshal(instantRaw, &instant); err != nil {
// Nothing to do here, we can't parse the instant field.
if isPrometheus, _ := isPrometheusQuery(queryData); isPrometheus {
l.Info("Failed to parse instant field on Prometheus query", "instant", string(instantRaw), "err", err)
}
return queryData
}
}
var rng bool
if rangeRaw, ok := queryData["range"]; ok {
if err := json.Unmarshal(rangeRaw, &rng); err != nil {
// Nothing to do here, we can't parse the range field.
if isPrometheus, _ := isPrometheusQuery(queryData); isPrometheus {
l.Info("Failed to parse range field on Prometheus query", "range", string(rangeRaw), "err", err)
}
return queryData
}
}
if !instant || !rng {
// Only apply this fix to 'Both' type queries.
return queryData
}
isPrometheus, err := isPrometheusQuery(queryData)
if err != nil {
l.Info("Unable to convert alert rule that resembles a Prometheus 'Both' type query to 'Range'", "err", err)
return queryData
}
if !isPrometheus {
// Only apply this fix to Prometheus.
return queryData
}
// Convert 'Both' type queries to `Range` queries by disabling the `Instant` portion.
l.Warn("Prometheus 'Both' type queries are not supported in unified alerting. Converting to range query.")
queryData["instant"] = []byte("false")
return queryData
}
// isPrometheusQuery checks if the query is for Prometheus.
func isPrometheusQuery(queryData map[string]json.RawMessage) (bool, error) {
ds, ok := queryData["datasource"]
if !ok {
return false, fmt.Errorf("missing datasource field")
}
var datasource struct {
Type string `json:"type"`
}
if err := json.Unmarshal(ds, &datasource); err != nil {
return false, fmt.Errorf("failed to parse datasource '%s': %w", string(ds), err)
}
if datasource.Type == "" {
return false, fmt.Errorf("missing type field '%s'", string(ds))
}
return datasource.Type == "prometheus", nil
}
type alertQuery struct { type alertQuery struct {
// RefID is the unique identifier of the query, set by the frontend call. // RefID is the unique identifier of the query, set by the frontend call.
RefID string `json:"refId"` RefID string `json:"refId"`

View File

@ -35,13 +35,45 @@ func TestMigrateAlertRuleQueries(t *testing.T) {
input: simplejson.NewFromAny(map[string]interface{}{"hide": true}), input: simplejson.NewFromAny(map[string]interface{}{"hide": true}),
expected: `{}`, expected: `{}`,
}, },
{
name: "when prometheus both type query, convert to range",
input: simplejson.NewFromAny(map[string]interface{}{
"datasource": map[string]string{
"type": "prometheus",
},
"instant": true,
"range": true,
}),
expected: `{"datasource":{"type":"prometheus"},"instant":false,"range":true}`,
},
{
name: "when prometheus instant type query, do nothing",
input: simplejson.NewFromAny(map[string]interface{}{
"datasource": map[string]string{
"type": "prometheus",
},
"instant": true,
}),
expected: `{"datasource":{"type":"prometheus"},"instant":true}`,
},
{
name: "when non-prometheus with instant and range, do nothing",
input: simplejson.NewFromAny(map[string]interface{}{
"datasource": map[string]string{
"type": "something",
},
"instant": true,
"range": true,
}),
expected: `{"datasource":{"type":"something"},"instant":true,"range":true}`,
},
} }
for _, tt := range tc { for _, tt := range tc {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
model, err := tt.input.Encode() model, err := tt.input.Encode()
require.NoError(t, err) require.NoError(t, err)
queries, err := migrateAlertRuleQueries([]alertQuery{{Model: model}}) queries, err := migrateAlertRuleQueries(&logtest.Fake{}, []alertQuery{{Model: model}})
if tt.err != nil { if tt.err != nil {
require.Error(t, err) require.Error(t, err)
require.EqualError(t, err, tt.err.Error()) require.EqualError(t, err, tt.err.Error())