mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Clear alerting rule evaluation errors after intermittent failures (#42386)
* Alerting: Clear alerting rule evaluation errors after intermittent failures When an alert transitioned in a way that `alerting -> error -> (alerting|nodata)`, the error provided by the `error` state would never be cleared thus the API and UI would show the health as an error.
This commit is contained in:
parent
725dbf8d95
commit
dd5a2e5128
@ -3,6 +3,7 @@ package state_test
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@ -1254,6 +1255,187 @@ func TestProcessEvalResults(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "normal -> alerting -> error -> alerting - it should clear the error",
|
||||
alertRule: &models.AlertRule{
|
||||
OrgID: 1,
|
||||
Title: "test_title",
|
||||
UID: "test_alert_rule_uid_2",
|
||||
NamespaceUID: "test_namespace_uid",
|
||||
Annotations: map[string]string{"annotation": "test"},
|
||||
Labels: map[string]string{"label": "test"},
|
||||
IntervalSeconds: 10,
|
||||
For: 30 * time.Second,
|
||||
},
|
||||
evalResults: []eval.Results{
|
||||
{
|
||||
eval.Result{
|
||||
Instance: data.Labels{"instance_label": "test"},
|
||||
State: eval.Normal,
|
||||
EvaluatedAt: evaluationTime,
|
||||
EvaluationDuration: evaluationDuration,
|
||||
},
|
||||
},
|
||||
{
|
||||
eval.Result{
|
||||
Instance: data.Labels{"instance_label": "test"},
|
||||
State: eval.Alerting,
|
||||
EvaluatedAt: evaluationTime.Add(30 * time.Second),
|
||||
EvaluationDuration: evaluationDuration,
|
||||
},
|
||||
},
|
||||
{
|
||||
eval.Result{
|
||||
Instance: data.Labels{"instance_label": "test"},
|
||||
State: eval.Error,
|
||||
Error: fmt.Errorf("Failed to query data"),
|
||||
EvaluatedAt: evaluationTime.Add(40 * time.Second),
|
||||
EvaluationDuration: evaluationDuration,
|
||||
},
|
||||
},
|
||||
{
|
||||
eval.Result{
|
||||
Instance: data.Labels{"instance_label": "test"},
|
||||
State: eval.Alerting,
|
||||
EvaluatedAt: evaluationTime.Add(70 * time.Second),
|
||||
EvaluationDuration: evaluationDuration,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedStates: map[string]*state.State{
|
||||
`[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid_2"],["alertname","test_title"],["instance_label","test"],["label","test"]]`: {
|
||||
AlertRuleUID: "test_alert_rule_uid_2",
|
||||
OrgID: 1,
|
||||
CacheId: `[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid_2"],["alertname","test_title"],["instance_label","test"],["label","test"]]`,
|
||||
Labels: data.Labels{
|
||||
"__alert_rule_namespace_uid__": "test_namespace_uid",
|
||||
"__alert_rule_uid__": "test_alert_rule_uid_2",
|
||||
"alertname": "test_title",
|
||||
"label": "test",
|
||||
"instance_label": "test",
|
||||
},
|
||||
State: eval.Alerting,
|
||||
Results: []state.Evaluation{
|
||||
{
|
||||
EvaluationTime: evaluationTime,
|
||||
EvaluationState: eval.Normal,
|
||||
Values: make(map[string]state.EvaluationValue),
|
||||
},
|
||||
{
|
||||
EvaluationTime: evaluationTime.Add(30 * time.Second),
|
||||
EvaluationState: eval.Alerting,
|
||||
Values: make(map[string]state.EvaluationValue),
|
||||
},
|
||||
{
|
||||
EvaluationTime: evaluationTime.Add(40 * time.Second),
|
||||
EvaluationState: eval.Error,
|
||||
Values: make(map[string]state.EvaluationValue),
|
||||
},
|
||||
{
|
||||
EvaluationTime: evaluationTime.Add(70 * time.Second),
|
||||
EvaluationState: eval.Alerting,
|
||||
Values: make(map[string]state.EvaluationValue),
|
||||
},
|
||||
},
|
||||
StartsAt: evaluationTime.Add(70 * time.Second),
|
||||
EndsAt: evaluationTime.Add(70 * time.Second).Add(state.ResendDelay * 3),
|
||||
LastEvaluationTime: evaluationTime.Add(70 * time.Second),
|
||||
EvaluationDuration: evaluationDuration,
|
||||
Annotations: map[string]string{"annotation": "test"},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "normal -> alerting -> error -> no data - it should clear the error",
|
||||
alertRule: &models.AlertRule{
|
||||
OrgID: 1,
|
||||
Title: "test_title",
|
||||
UID: "test_alert_rule_uid_2",
|
||||
NamespaceUID: "test_namespace_uid",
|
||||
Annotations: map[string]string{"annotation": "test"},
|
||||
Labels: map[string]string{"label": "test"},
|
||||
IntervalSeconds: 10,
|
||||
For: 30 * time.Second,
|
||||
NoDataState: models.NoData,
|
||||
},
|
||||
evalResults: []eval.Results{
|
||||
{
|
||||
eval.Result{
|
||||
Instance: data.Labels{"instance_label": "test"},
|
||||
State: eval.Normal,
|
||||
EvaluatedAt: evaluationTime,
|
||||
EvaluationDuration: evaluationDuration,
|
||||
},
|
||||
},
|
||||
{
|
||||
eval.Result{
|
||||
Instance: data.Labels{"instance_label": "test"},
|
||||
State: eval.Alerting,
|
||||
EvaluatedAt: evaluationTime.Add(30 * time.Second),
|
||||
EvaluationDuration: evaluationDuration,
|
||||
},
|
||||
},
|
||||
{
|
||||
eval.Result{
|
||||
Instance: data.Labels{"instance_label": "test"},
|
||||
State: eval.Error,
|
||||
Error: fmt.Errorf("Failed to query data"),
|
||||
EvaluatedAt: evaluationTime.Add(40 * time.Second),
|
||||
EvaluationDuration: evaluationDuration,
|
||||
},
|
||||
},
|
||||
{
|
||||
eval.Result{
|
||||
Instance: data.Labels{"instance_label": "test"},
|
||||
State: eval.NoData,
|
||||
EvaluatedAt: evaluationTime.Add(50 * time.Second),
|
||||
EvaluationDuration: evaluationDuration,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedStates: map[string]*state.State{
|
||||
`[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid_2"],["alertname","test_title"],["instance_label","test"],["label","test"]]`: {
|
||||
AlertRuleUID: "test_alert_rule_uid_2",
|
||||
OrgID: 1,
|
||||
CacheId: `[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid_2"],["alertname","test_title"],["instance_label","test"],["label","test"]]`,
|
||||
Labels: data.Labels{
|
||||
"__alert_rule_namespace_uid__": "test_namespace_uid",
|
||||
"__alert_rule_uid__": "test_alert_rule_uid_2",
|
||||
"alertname": "test_title",
|
||||
"label": "test",
|
||||
"instance_label": "test",
|
||||
},
|
||||
State: eval.NoData,
|
||||
Results: []state.Evaluation{
|
||||
{
|
||||
EvaluationTime: evaluationTime,
|
||||
EvaluationState: eval.Normal,
|
||||
Values: make(map[string]state.EvaluationValue),
|
||||
},
|
||||
{
|
||||
EvaluationTime: evaluationTime.Add(30 * time.Second),
|
||||
EvaluationState: eval.Alerting,
|
||||
Values: make(map[string]state.EvaluationValue),
|
||||
},
|
||||
{
|
||||
EvaluationTime: evaluationTime.Add(40 * time.Second),
|
||||
EvaluationState: eval.Error,
|
||||
Values: make(map[string]state.EvaluationValue),
|
||||
},
|
||||
{
|
||||
EvaluationTime: evaluationTime.Add(50 * time.Second),
|
||||
EvaluationState: eval.NoData,
|
||||
Values: make(map[string]state.EvaluationValue),
|
||||
},
|
||||
},
|
||||
StartsAt: evaluationTime.Add(30 * time.Second),
|
||||
EndsAt: evaluationTime.Add(50 * time.Second).Add(state.ResendDelay * 3),
|
||||
LastEvaluationTime: evaluationTime.Add(50 * time.Second),
|
||||
EvaluationDuration: evaluationDuration,
|
||||
Annotations: map[string]string{"annotation": "test"},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "template is correctly expanded",
|
||||
alertRule: &models.AlertRule{
|
||||
|
@ -57,15 +57,18 @@ func NewEvaluationValues(m map[string]eval.NumberValueCapture) map[string]Evalua
|
||||
}
|
||||
|
||||
func (a *State) resultNormal(alertRule *ngModels.AlertRule, result eval.Result) {
|
||||
a.Error = result.Error // should be nil since state is not error
|
||||
|
||||
if a.State != eval.Normal {
|
||||
a.EndsAt = result.EvaluatedAt
|
||||
a.StartsAt = result.EvaluatedAt
|
||||
}
|
||||
a.Error = result.Error // should be nil since state is not error
|
||||
a.State = eval.Normal
|
||||
}
|
||||
|
||||
func (a *State) resultAlerting(alertRule *ngModels.AlertRule, result eval.Result) {
|
||||
a.Error = result.Error // should be nil since the state is not an error
|
||||
|
||||
switch a.State {
|
||||
case eval.Alerting:
|
||||
a.setEndsAt(alertRule, result)
|
||||
@ -118,6 +121,8 @@ func (a *State) resultError(alertRule *ngModels.AlertRule, result eval.Result) {
|
||||
}
|
||||
|
||||
func (a *State) resultNoData(alertRule *ngModels.AlertRule, result eval.Result) {
|
||||
a.Error = result.Error
|
||||
|
||||
if a.StartsAt.IsZero() {
|
||||
a.StartsAt = result.EvaluatedAt
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user