diff --git a/docs/sources/alerting/unified-alerting/alerting-rules/create-grafana-managed-rule.md b/docs/sources/alerting/unified-alerting/alerting-rules/create-grafana-managed-rule.md index 2b4d50e0659..10c8e04a3ce 100644 --- a/docs/sources/alerting/unified-alerting/alerting-rules/create-grafana-managed-rule.md +++ b/docs/sources/alerting/unified-alerting/alerting-rules/create-grafana-managed-rule.md @@ -69,7 +69,8 @@ Configure alerting behavior in the absence of data using information in the foll | Alerting | Set alert rule state to `Alerting`. | | Ok | Set alert rule state to `Normal`. | -| Error or timeout option | Description | -| ----------------------- | ---------------------------------- | -| Alerting | Set alert rule state to `Alerting` | -| OK | Set alert rule state to `Normal` | +| Error or timeout option | Description | +| ----------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | +| Alerting | Set alert rule state to `Alerting` | +| OK | Set alert rule state to `Normal` | +| Error | Create a new alert `DatasourceError` with the name and UID of the alert rule, and UID of the datasource that returned no data as labels. | diff --git a/pkg/services/ngalert/models/alert_rule.go b/pkg/services/ngalert/models/alert_rule.go index e8bfc6a8442..07c1e13dea1 100644 --- a/pkg/services/ngalert/models/alert_rule.go +++ b/pkg/services/ngalert/models/alert_rule.go @@ -41,6 +41,7 @@ func (executionErrorState ExecutionErrorState) String() string { const ( AlertingErrState ExecutionErrorState = "Alerting" + ErrorErrState ExecutionErrorState = "Error" ) const ( diff --git a/pkg/services/ngalert/schedule/compat.go b/pkg/services/ngalert/schedule/compat.go index 5892840a3e7..a382570795f 100644 --- a/pkg/services/ngalert/schedule/compat.go +++ b/pkg/services/ngalert/schedule/compat.go @@ -19,6 +19,7 @@ import ( const ( NoDataAlertName = "DatasourceNoData" + ErrorAlertName = "DatasourceError" Rulename = "rulename" ) @@ -52,6 +53,10 @@ func stateToPostableAlert(alertState *state.State, appURL *url.URL) *models.Post return noDataAlert(nL, nA, alertState, urlStr) } + if alertState.State == eval.Error { + return errorAlert(nL, nA, alertState, urlStr) + } + return &models.PostableAlert{ Annotations: models.LabelSet(nA), StartsAt: strfmt.DateTime(alertState.StartsAt), @@ -84,6 +89,25 @@ func noDataAlert(labels data.Labels, annotations data.Labels, alertState *state. } } +// errorAlert is a special alert sent when evaluation of an alert rule failed due to an error. Like noDataAlert, it +// replaces the old behaviour of "Keep Last State" creating a separate alert called DatasourceError. +func errorAlert(labels, annotations data.Labels, alertState *state.State, urlStr string) *models.PostableAlert { + if name, ok := labels[model.AlertNameLabel]; ok { + labels[Rulename] = name + } + labels[model.AlertNameLabel] = ErrorAlertName + + return &models.PostableAlert{ + Annotations: models.LabelSet(annotations), + StartsAt: strfmt.DateTime(alertState.StartsAt), + EndsAt: strfmt.DateTime(alertState.EndsAt), + Alert: models.Alert{ + Labels: models.LabelSet(labels), + GeneratorURL: strfmt.URI(urlStr), + }, + } +} + func FromAlertStateToPostableAlerts(firingStates []*state.State, stateManager *state.Manager, appURL *url.URL) apimodels.PostableAlerts { alerts := apimodels.PostableAlerts{PostableAlerts: make([]models.PostableAlert, 0, len(firingStates))} var sentAlerts []*state.State diff --git a/pkg/services/ngalert/schedule/compat_test.go b/pkg/services/ngalert/schedule/compat_test.go index 05af8c10031..e8eafc98ba7 100644 --- a/pkg/services/ngalert/schedule/compat_test.go +++ b/pkg/services/ngalert/schedule/compat_test.go @@ -152,6 +152,35 @@ func Test_stateToPostableAlert(t *testing.T) { require.NotContains(t, result.Labels[model.AlertNameLabel], Rulename) }) }) + case eval.Error: + t.Run("should keep existing labels and change name", func(t *testing.T) { + alertState := randomState(tc.state) + alertState.Labels = randomMapOfStrings() + alertName := util.GenerateShortUID() + alertState.Labels[model.AlertNameLabel] = alertName + + result := stateToPostableAlert(alertState, appURL) + + expected := make(models.LabelSet, len(alertState.Labels)+1) + for k, v := range alertState.Labels { + expected[k] = v + } + expected[model.AlertNameLabel] = ErrorAlertName + expected[Rulename] = alertName + + require.Equal(t, expected, result.Labels) + + t.Run("should not backup original alert name if it does not exist", func(t *testing.T) { + alertState := randomState(tc.state) + alertState.Labels = randomMapOfStrings() + delete(alertState.Labels, model.AlertNameLabel) + + result := stateToPostableAlert(alertState, appURL) + + require.Equal(t, ErrorAlertName, result.Labels[model.AlertNameLabel]) + require.NotContains(t, result.Labels[model.AlertNameLabel], Rulename) + }) + }) default: t.Run("should copy labels as is", func(t *testing.T) { alertState := randomState(tc.state) diff --git a/pkg/services/ngalert/state/manager_test.go b/pkg/services/ngalert/state/manager_test.go index 8d87c9b1cc1..2ef40512a3c 100644 --- a/pkg/services/ngalert/state/manager_test.go +++ b/pkg/services/ngalert/state/manager_test.go @@ -2,11 +2,13 @@ package state_test import ( "context" + "errors" "testing" "time" "github.com/grafana/grafana-plugin-sdk-go/data" + "github.com/grafana/grafana/pkg/expr" "github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/services/ngalert/eval" "github.com/grafana/grafana/pkg/services/ngalert/metrics" @@ -1175,6 +1177,83 @@ func TestProcessEvalResults(t *testing.T) { }, }, }, + { + desc: "normal -> error when result is Error and ExecErrState is Error", + alertRule: &models.AlertRule{ + OrgID: 1, + Title: "test_title", + UID: "test_alert_rule_uid_2", + NamespaceUID: "test_namespace_uid", + Data: []models.AlertQuery{{ + RefID: "A", + DatasourceUID: "datasource_uid_1", + }}, + Annotations: map[string]string{"annotation": "test"}, + Labels: map[string]string{"label": "test"}, + IntervalSeconds: 10, + For: 1 * time.Minute, + ExecErrState: models.ErrorErrState, + }, + evalResults: []eval.Results{ + { + eval.Result{ + Instance: data.Labels{"instance_label": "test"}, + State: eval.Normal, + EvaluatedAt: evaluationTime, + EvaluationDuration: evaluationDuration, + }, + }, + { + eval.Result{ + Instance: data.Labels{"instance_label": "test"}, + Error: expr.QueryError{ + RefID: "A", + Err: errors.New("this is an error"), + }, + State: eval.Error, + EvaluatedAt: evaluationTime.Add(10 * time.Second), + EvaluationDuration: evaluationDuration, + }, + }, + }, + expectedStates: map[string]*state.State{ + `[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid_2"],["alertname","test_title"],["instance_label","test"],["label","test"]]`: { + AlertRuleUID: "test_alert_rule_uid_2", + OrgID: 1, + CacheId: `[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid_2"],["alertname","test_title"],["instance_label","test"],["label","test"]]`, + Labels: data.Labels{ + "__alert_rule_namespace_uid__": "test_namespace_uid", + "__alert_rule_uid__": "test_alert_rule_uid_2", + "alertname": "test_title", + "label": "test", + "instance_label": "test", + "datasource_uid": "datasource_uid_1", + }, + State: eval.Error, + Error: expr.QueryError{ + RefID: "A", + Err: errors.New("this is an error"), + }, + Results: []state.Evaluation{ + { + EvaluationTime: evaluationTime, + EvaluationState: eval.Normal, + Values: make(map[string]state.EvaluationValue), + }, + { + EvaluationTime: evaluationTime.Add(10 * time.Second), + EvaluationState: eval.Error, + Values: make(map[string]state.EvaluationValue), + }, + }, + StartsAt: evaluationTime.Add(10 * time.Second), + EndsAt: evaluationTime.Add(10 * time.Second).Add(state.ResendDelay * 3), + LastEvaluationTime: evaluationTime.Add(10 * time.Second), + EvaluationDuration: evaluationDuration, + Annotations: map[string]string{"annotation": "test", "Error": "failed to execute query A: this is an error"}, + }, + }, + }, { desc: "template is correctly expanded", alertRule: &models.AlertRule{ diff --git a/pkg/services/ngalert/state/state.go b/pkg/services/ngalert/state/state.go index 6fa9f93d780..7c5f6b43b02 100644 --- a/pkg/services/ngalert/state/state.go +++ b/pkg/services/ngalert/state/state.go @@ -1,10 +1,12 @@ package state import ( + "errors" "time" "github.com/grafana/grafana-plugin-sdk-go/data" + "github.com/grafana/grafana/pkg/expr" "github.com/grafana/grafana/pkg/services/ngalert/eval" ngModels "github.com/grafana/grafana/pkg/services/ngalert/models" ) @@ -87,6 +89,7 @@ func (a *State) resultAlerting(alertRule *ngModels.AlertRule, result eval.Result func (a *State) resultError(alertRule *ngModels.AlertRule, result eval.Result) { a.Error = result.Error + if a.StartsAt.IsZero() { a.StartsAt = result.EvaluatedAt } @@ -94,6 +97,23 @@ func (a *State) resultError(alertRule *ngModels.AlertRule, result eval.Result) { if alertRule.ExecErrState == ngModels.AlertingErrState { a.State = eval.Alerting + } else if alertRule.ExecErrState == ngModels.ErrorErrState { + a.State = eval.Error + + // If the evaluation failed because a query returned an error then + // update the state with the Datasource UID as a label and the error + // message as an annotation so other code can use this metadata to + // add context to alerts + var queryError expr.QueryError + if errors.As(a.Error, &queryError) { + for _, next := range alertRule.Data { + if next.RefID == queryError.RefID { + a.Labels["datasource_uid"] = next.DatasourceUID + break + } + } + a.Annotations["Error"] = queryError.Error() + } } } @@ -114,7 +134,7 @@ func (a *State) resultNoData(alertRule *ngModels.AlertRule, result eval.Result) } func (a *State) NeedsSending(resendDelay time.Duration) bool { - if a.State == eval.Pending || a.State == eval.Error || a.State == eval.Normal && !a.Resolved { + if a.State == eval.Pending || a.State == eval.Normal && !a.Resolved { return false } // if LastSentAt is before or equal to LastEvaluationTime + resendDelay, send again diff --git a/pkg/services/ngalert/state/state_test.go b/pkg/services/ngalert/state/state_test.go index a7d27ca7d5b..b5a3ee1e54d 100644 --- a/pkg/services/ngalert/state/state_test.go +++ b/pkg/services/ngalert/state/state_test.go @@ -122,7 +122,7 @@ func TestNeedsSending(t *testing.T) { }, { name: "state: error, needs to be re-sent", - expected: false, + expected: true, resendDelay: 1 * time.Minute, testState: &State{ State: eval.Error, diff --git a/pkg/services/sqlstore/migrations/ualert/alert_rule.go b/pkg/services/sqlstore/migrations/ualert/alert_rule.go index c33b3a2be2a..2b962611760 100644 --- a/pkg/services/sqlstore/migrations/ualert/alert_rule.go +++ b/pkg/services/sqlstore/migrations/ualert/alert_rule.go @@ -131,6 +131,10 @@ func (m *migration) makeAlertRule(cond condition, da dashAlert, folderUID string m.mg.Logger.Error("alert migration error: failed to create silence", "rule_name", ar.Title, "err", err) } + if err := m.addErrorSilence(da, ar); err != nil { + m.mg.Logger.Error("alert migration error: failed to create silence for Error", "rule_name", ar.Title, "err", err) + } + if err := m.addNoDataSilence(da, ar); err != nil { m.mg.Logger.Error("alert migration error: failed to create silence for NoData", "rule_name", ar.Title, "err", err) } @@ -215,7 +219,9 @@ func transExecErr(s string) (string, error) { case "", "alerting": return "Alerting", nil case "keep_state": - return "Alerting", nil + // Keep last state is translated to error as we now emit a + // DatasourceError alert when the state is error + return "Error", nil } return "", fmt.Errorf("unrecognized Execution Error setting %v", s) } diff --git a/pkg/services/sqlstore/migrations/ualert/silences.go b/pkg/services/sqlstore/migrations/ualert/silences.go index 648183d1c24..02bd997ae65 100644 --- a/pkg/services/sqlstore/migrations/ualert/silences.go +++ b/pkg/services/sqlstore/migrations/ualert/silences.go @@ -22,6 +22,8 @@ import ( const ( // Should be the same as 'NoDataAlertName' in pkg/services/schedule/compat.go. NoDataAlertName = "DatasourceNoData" + + ErrorAlertName = "DatasourceError" ) func (m *migration) addSilence(da dashAlert, rule *alertRule) error { @@ -61,6 +63,45 @@ func (m *migration) addSilence(da dashAlert, rule *alertRule) error { return nil } +func (m *migration) addErrorSilence(da dashAlert, rule *alertRule) error { + if da.ParsedSettings.ExecutionErrorState != "keep_state" { + return nil + } + + uid, err := uuid.NewV4() + if err != nil { + return errors.New("failed to create uuid for silence") + } + + s := &pb.MeshSilence{ + Silence: &pb.Silence{ + Id: uid.String(), + Matchers: []*pb.Matcher{ + { + Type: pb.Matcher_EQUAL, + Name: model.AlertNameLabel, + Pattern: ErrorAlertName, + }, + { + Type: pb.Matcher_EQUAL, + Name: "rule_uid", + Pattern: rule.UID, + }, + }, + StartsAt: time.Now(), + EndsAt: time.Now().AddDate(1, 0, 0), // 1 year + CreatedBy: "Grafana Migration", + Comment: fmt.Sprintf("Created during migration to unified alerting to silence Error state for alert rule ID '%s' and Title '%s' because the option 'Keep Last State' was selected for Error state", rule.UID, rule.Title), + }, + ExpiresAt: time.Now().AddDate(1, 0, 0), // 1 year + } + if _, ok := m.silences[da.OrgId]; !ok { + m.silences[da.OrgId] = make([]*pb.MeshSilence, 0) + } + m.silences[da.OrgId] = append(m.silences[da.OrgId], s) + return nil +} + func (m *migration) addNoDataSilence(da dashAlert, rule *alertRule) error { if da.ParsedSettings.NoDataState != "keep_state" { return nil diff --git a/public/app/features/alerting/unified/components/rule-editor/GrafanaAlertStatePicker.tsx b/public/app/features/alerting/unified/components/rule-editor/GrafanaAlertStatePicker.tsx index 9666ea8588a..42eab78203a 100644 --- a/public/app/features/alerting/unified/components/rule-editor/GrafanaAlertStatePicker.tsx +++ b/public/app/features/alerting/unified/components/rule-editor/GrafanaAlertStatePicker.tsx @@ -6,20 +6,25 @@ import React, { FC, useMemo } from 'react'; type Props = Omit, 'options'> & { includeNoData: boolean; + includeError: boolean; }; const options: SelectableValue[] = [ { value: GrafanaAlertStateDecision.Alerting, label: 'Alerting' }, { value: GrafanaAlertStateDecision.NoData, label: 'No Data' }, { value: GrafanaAlertStateDecision.OK, label: 'OK' }, + { value: GrafanaAlertStateDecision.Error, label: 'Error' }, ]; -export const GrafanaAlertStatePicker: FC = ({ includeNoData, ...props }) => { +export const GrafanaAlertStatePicker: FC = ({ includeNoData, includeError, ...props }) => { const opts = useMemo(() => { - if (includeNoData) { - return options; + if (!includeNoData) { + return options.filter((opt) => opt.value !== GrafanaAlertStateDecision.NoData); } - return options.filter((opt) => opt.value !== GrafanaAlertStateDecision.NoData); - }, [includeNoData]); + if (!includeError) { + return options.filter((opt) => opt.value !== GrafanaAlertStateDecision.Error); + } + return options; + }, [includeNoData, includeError]); return