Alerting: Alert rule should wait For duration when execution error state is Alerting (#47052)

Alerting: Alert rule should wait For duration when execution error state is Alerting
2025-01-10 08:03:58 -06:00 · 2022-03-31 09:57:58 +01:00 · 2022-03-31 09:57:58 +01:00 · 79769132c0
commit 79769132c0
parent 554492ec4e
3 changed files with 125 additions and 15 deletions
--- a/docs/sources/alerting/unified-alerting/alerting-rules/create-grafana-managed-rule.md
+++ b/docs/sources/alerting/unified-alerting/alerting-rules/create-grafana-managed-rule.md
@ -69,8 +69,8 @@ Configure alerting behavior in the absence of data using information in the foll
 | Alerting       | Set alert rule state to `Alerting`.                                                                                                       |
 | Ok             | Set alert rule state to `Normal`.                                                                                                         |

-| Error or timeout option | Description                                                                                                                              |
-| ----------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
-| Alerting                | Set alert rule state to `Alerting`                                                                                                       |
-| OK                      | Set alert rule state to `Normal`                                                                                                         |
-| Error                   | Create a new alert `DatasourceError` with the name and UID of the alert rule, and UID of the datasource that returned no data as labels. |
+| Error or timeout option | Description                                                                                                                                       |
+| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Alerting                | Set alert rule state to `Alerting`. From Grafana 8.5, the alert rule waits for the entire duration for which the condition is true before firing. |
+| OK                      | Set alert rule state to `Normal`                                                                                                                  |
+| Error                   | Create a new alert `DatasourceError` with the name and UID of the alert rule, and UID of the datasource that returned no data as labels.          |
--- a/pkg/services/ngalert/state/manager_test.go
+++ b/pkg/services/ngalert/state/manager_test.go
@ -1160,7 +1160,7 @@ func TestProcessEvalResults(t *testing.T) {
 			},
 		},
 		{
-			desc: "normal -> alerting when result is Error and ExecErrState is Alerting",
+			desc: "normal -> pending when For is set but not exceeded, result is Error and ExecErrState is Alerting",
 			alertRule: &models.AlertRule{
 				OrgID:           1,
 				Title:           "test_title",
@ -1203,7 +1203,7 @@ func TestProcessEvalResults(t *testing.T) {
 						"label":                        "test",
 						"instance_label":               "test",
 					},
-					State: eval.Alerting,
+					State: eval.Pending,
 					Results: []state.Evaluation{
 						{
 							EvaluationTime:  evaluationTime,
@ -1224,6 +1224,100 @@ func TestProcessEvalResults(t *testing.T) {
 				},
 			},
 		},
+		{
+			desc: "normal -> alerting when For is exceeded, result is Error and ExecErrState is Alerting",
+			alertRule: &models.AlertRule{
+				OrgID:           1,
+				Title:           "test_title",
+				UID:             "test_alert_rule_uid_2",
+				NamespaceUID:    "test_namespace_uid",
+				Annotations:     map[string]string{"annotation": "test"},
+				Labels:          map[string]string{"label": "test"},
+				IntervalSeconds: 10,
+				For:             30 * time.Second,
+				ExecErrState:    models.AlertingErrState,
+			},
+			evalResults: []eval.Results{
+				{
+					eval.Result{
+						Instance:           data.Labels{"instance_label": "test"},
+						State:              eval.Normal,
+						EvaluatedAt:        evaluationTime,
+						EvaluationDuration: evaluationDuration,
+					},
+				},
+				{
+					eval.Result{
+						Instance:           data.Labels{"instance_label": "test"},
+						State:              eval.Error,
+						EvaluatedAt:        evaluationTime.Add(10 * time.Second),
+						EvaluationDuration: evaluationDuration,
+					},
+				},
+				{
+					eval.Result{
+						Instance:           data.Labels{"instance_label": "test"},
+						State:              eval.Error,
+						EvaluatedAt:        evaluationTime.Add(20 * time.Second),
+						EvaluationDuration: evaluationDuration,
+					},
+				},
+				{
+					eval.Result{
+						Instance:           data.Labels{"instance_label": "test"},
+						State:              eval.Error,
+						EvaluatedAt:        evaluationTime.Add(30 * time.Second),
+						EvaluationDuration: evaluationDuration,
+					},
+				},
+				{
+					eval.Result{
+						Instance:           data.Labels{"instance_label": "test"},
+						State:              eval.Error,
+						EvaluatedAt:        evaluationTime.Add(40 * time.Second),
+						EvaluationDuration: evaluationDuration,
+					},
+				},
+			},
+			expectedAnnotations: 2,
+			expectedStates: map[string]*state.State{
+				`[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid_2"],["alertname","test_title"],["instance_label","test"],["label","test"]]`: {
+					AlertRuleUID: "test_alert_rule_uid_2",
+					OrgID:        1,
+					CacheId:      `[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid_2"],["alertname","test_title"],["instance_label","test"],["label","test"]]`,
+					Labels: data.Labels{
+						"__alert_rule_namespace_uid__": "test_namespace_uid",
+						"__alert_rule_uid__":           "test_alert_rule_uid_2",
+						"alertname":                    "test_title",
+						"label":                        "test",
+						"instance_label":               "test",
+					},
+					State: eval.Alerting,
+					Results: []state.Evaluation{
+						{
+							EvaluationTime:  evaluationTime.Add(20 * time.Second),
+							EvaluationState: eval.Error,
+							Values:          make(map[string]*float64),
+						},
+						{
+							EvaluationTime:  evaluationTime.Add(30 * time.Second),
+							EvaluationState: eval.Error,
+							Values:          make(map[string]*float64),
+						},
+						{
+							EvaluationTime:  evaluationTime.Add(40 * time.Second),
+							EvaluationState: eval.Error,
+							Values:          make(map[string]*float64),
+						},
+					},
+					StartsAt:           evaluationTime.Add(40 * time.Second),
+					EndsAt:             evaluationTime.Add(40 * time.Second).Add(state.ResendDelay * 3),
+					LastEvaluationTime: evaluationTime.Add(40 * time.Second),
+					EvaluationDuration: evaluationDuration,
+					Annotations:        map[string]string{"annotation": "test"},
+				},
+			},
+		},
 		{
 			desc: "normal -> error when result is Error and ExecErrState is Error",
 			alertRule: &models.AlertRule{
--- a/pkg/services/ngalert/state/state.go
+++ b/pkg/services/ngalert/state/state.go
@ -84,16 +84,10 @@ func (a *State) resultAlerting(alertRule *ngModels.AlertRule, result eval.Result
 func (a *State) resultError(alertRule *ngModels.AlertRule, result eval.Result) {
 	a.Error = result.Error

-	if a.StartsAt.IsZero() {
-		a.StartsAt = result.EvaluatedAt
-	}
-	a.setEndsAt(alertRule, result)
-
+	execErrState := eval.Error
 	if alertRule.ExecErrState == ngModels.AlertingErrState {
-		a.State = eval.Alerting
+		execErrState = eval.Alerting
 	} else if alertRule.ExecErrState == ngModels.ErrorErrState {
-		a.State = eval.Error
-
 		// If the evaluation failed because a query returned an error then
 		// update the state with the Datasource UID as a label and the error
 		// message as an annotation so other code can use this metadata to
@ -109,6 +103,28 @@ func (a *State) resultError(alertRule *ngModels.AlertRule, result eval.Result) {
 			}
 			a.Annotations["Error"] = queryError.Error()
 		}
+		execErrState = eval.Error
+	}
+
+	switch a.State {
+	case eval.Alerting, eval.Error:
+		a.setEndsAt(alertRule, result)
+	case eval.Pending:
+		if result.EvaluatedAt.Sub(a.StartsAt) >= alertRule.For {
+			a.State = execErrState
+			a.StartsAt = result.EvaluatedAt
+			a.setEndsAt(alertRule, result)
+		}
+	default:
+		// For is observed when Alerting is chosen for the alert state
+		// if execution error or timeout.
+		if execErrState == eval.Alerting && alertRule.For > 0 {
+			a.State = eval.Pending
+		} else {
+			a.State = execErrState
+		}
+		a.StartsAt = result.EvaluatedAt
+		a.setEndsAt(alertRule, result)
 	}
 }