mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Clear alerting rule evaluation errors after intermittent failures (#42386)
* Alerting: Clear alerting rule evaluation errors after intermittent failures When an alert transitioned in a way that `alerting -> error -> (alerting|nodata)`, the error provided by the `error` state would never be cleared thus the API and UI would show the health as an error.
This commit is contained in:
parent
725dbf8d95
commit
dd5a2e5128
@ -3,6 +3,7 @@ package state_test
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -1254,6 +1255,187 @@ func TestProcessEvalResults(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
desc: "normal -> alerting -> error -> alerting - it should clear the error",
|
||||||
|
alertRule: &models.AlertRule{
|
||||||
|
OrgID: 1,
|
||||||
|
Title: "test_title",
|
||||||
|
UID: "test_alert_rule_uid_2",
|
||||||
|
NamespaceUID: "test_namespace_uid",
|
||||||
|
Annotations: map[string]string{"annotation": "test"},
|
||||||
|
Labels: map[string]string{"label": "test"},
|
||||||
|
IntervalSeconds: 10,
|
||||||
|
For: 30 * time.Second,
|
||||||
|
},
|
||||||
|
evalResults: []eval.Results{
|
||||||
|
{
|
||||||
|
eval.Result{
|
||||||
|
Instance: data.Labels{"instance_label": "test"},
|
||||||
|
State: eval.Normal,
|
||||||
|
EvaluatedAt: evaluationTime,
|
||||||
|
EvaluationDuration: evaluationDuration,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
eval.Result{
|
||||||
|
Instance: data.Labels{"instance_label": "test"},
|
||||||
|
State: eval.Alerting,
|
||||||
|
EvaluatedAt: evaluationTime.Add(30 * time.Second),
|
||||||
|
EvaluationDuration: evaluationDuration,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
eval.Result{
|
||||||
|
Instance: data.Labels{"instance_label": "test"},
|
||||||
|
State: eval.Error,
|
||||||
|
Error: fmt.Errorf("Failed to query data"),
|
||||||
|
EvaluatedAt: evaluationTime.Add(40 * time.Second),
|
||||||
|
EvaluationDuration: evaluationDuration,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
eval.Result{
|
||||||
|
Instance: data.Labels{"instance_label": "test"},
|
||||||
|
State: eval.Alerting,
|
||||||
|
EvaluatedAt: evaluationTime.Add(70 * time.Second),
|
||||||
|
EvaluationDuration: evaluationDuration,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedStates: map[string]*state.State{
|
||||||
|
`[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid_2"],["alertname","test_title"],["instance_label","test"],["label","test"]]`: {
|
||||||
|
AlertRuleUID: "test_alert_rule_uid_2",
|
||||||
|
OrgID: 1,
|
||||||
|
CacheId: `[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid_2"],["alertname","test_title"],["instance_label","test"],["label","test"]]`,
|
||||||
|
Labels: data.Labels{
|
||||||
|
"__alert_rule_namespace_uid__": "test_namespace_uid",
|
||||||
|
"__alert_rule_uid__": "test_alert_rule_uid_2",
|
||||||
|
"alertname": "test_title",
|
||||||
|
"label": "test",
|
||||||
|
"instance_label": "test",
|
||||||
|
},
|
||||||
|
State: eval.Alerting,
|
||||||
|
Results: []state.Evaluation{
|
||||||
|
{
|
||||||
|
EvaluationTime: evaluationTime,
|
||||||
|
EvaluationState: eval.Normal,
|
||||||
|
Values: make(map[string]state.EvaluationValue),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
EvaluationTime: evaluationTime.Add(30 * time.Second),
|
||||||
|
EvaluationState: eval.Alerting,
|
||||||
|
Values: make(map[string]state.EvaluationValue),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
EvaluationTime: evaluationTime.Add(40 * time.Second),
|
||||||
|
EvaluationState: eval.Error,
|
||||||
|
Values: make(map[string]state.EvaluationValue),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
EvaluationTime: evaluationTime.Add(70 * time.Second),
|
||||||
|
EvaluationState: eval.Alerting,
|
||||||
|
Values: make(map[string]state.EvaluationValue),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
StartsAt: evaluationTime.Add(70 * time.Second),
|
||||||
|
EndsAt: evaluationTime.Add(70 * time.Second).Add(state.ResendDelay * 3),
|
||||||
|
LastEvaluationTime: evaluationTime.Add(70 * time.Second),
|
||||||
|
EvaluationDuration: evaluationDuration,
|
||||||
|
Annotations: map[string]string{"annotation": "test"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "normal -> alerting -> error -> no data - it should clear the error",
|
||||||
|
alertRule: &models.AlertRule{
|
||||||
|
OrgID: 1,
|
||||||
|
Title: "test_title",
|
||||||
|
UID: "test_alert_rule_uid_2",
|
||||||
|
NamespaceUID: "test_namespace_uid",
|
||||||
|
Annotations: map[string]string{"annotation": "test"},
|
||||||
|
Labels: map[string]string{"label": "test"},
|
||||||
|
IntervalSeconds: 10,
|
||||||
|
For: 30 * time.Second,
|
||||||
|
NoDataState: models.NoData,
|
||||||
|
},
|
||||||
|
evalResults: []eval.Results{
|
||||||
|
{
|
||||||
|
eval.Result{
|
||||||
|
Instance: data.Labels{"instance_label": "test"},
|
||||||
|
State: eval.Normal,
|
||||||
|
EvaluatedAt: evaluationTime,
|
||||||
|
EvaluationDuration: evaluationDuration,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
eval.Result{
|
||||||
|
Instance: data.Labels{"instance_label": "test"},
|
||||||
|
State: eval.Alerting,
|
||||||
|
EvaluatedAt: evaluationTime.Add(30 * time.Second),
|
||||||
|
EvaluationDuration: evaluationDuration,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
eval.Result{
|
||||||
|
Instance: data.Labels{"instance_label": "test"},
|
||||||
|
State: eval.Error,
|
||||||
|
Error: fmt.Errorf("Failed to query data"),
|
||||||
|
EvaluatedAt: evaluationTime.Add(40 * time.Second),
|
||||||
|
EvaluationDuration: evaluationDuration,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
eval.Result{
|
||||||
|
Instance: data.Labels{"instance_label": "test"},
|
||||||
|
State: eval.NoData,
|
||||||
|
EvaluatedAt: evaluationTime.Add(50 * time.Second),
|
||||||
|
EvaluationDuration: evaluationDuration,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedStates: map[string]*state.State{
|
||||||
|
`[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid_2"],["alertname","test_title"],["instance_label","test"],["label","test"]]`: {
|
||||||
|
AlertRuleUID: "test_alert_rule_uid_2",
|
||||||
|
OrgID: 1,
|
||||||
|
CacheId: `[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid_2"],["alertname","test_title"],["instance_label","test"],["label","test"]]`,
|
||||||
|
Labels: data.Labels{
|
||||||
|
"__alert_rule_namespace_uid__": "test_namespace_uid",
|
||||||
|
"__alert_rule_uid__": "test_alert_rule_uid_2",
|
||||||
|
"alertname": "test_title",
|
||||||
|
"label": "test",
|
||||||
|
"instance_label": "test",
|
||||||
|
},
|
||||||
|
State: eval.NoData,
|
||||||
|
Results: []state.Evaluation{
|
||||||
|
{
|
||||||
|
EvaluationTime: evaluationTime,
|
||||||
|
EvaluationState: eval.Normal,
|
||||||
|
Values: make(map[string]state.EvaluationValue),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
EvaluationTime: evaluationTime.Add(30 * time.Second),
|
||||||
|
EvaluationState: eval.Alerting,
|
||||||
|
Values: make(map[string]state.EvaluationValue),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
EvaluationTime: evaluationTime.Add(40 * time.Second),
|
||||||
|
EvaluationState: eval.Error,
|
||||||
|
Values: make(map[string]state.EvaluationValue),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
EvaluationTime: evaluationTime.Add(50 * time.Second),
|
||||||
|
EvaluationState: eval.NoData,
|
||||||
|
Values: make(map[string]state.EvaluationValue),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
StartsAt: evaluationTime.Add(30 * time.Second),
|
||||||
|
EndsAt: evaluationTime.Add(50 * time.Second).Add(state.ResendDelay * 3),
|
||||||
|
LastEvaluationTime: evaluationTime.Add(50 * time.Second),
|
||||||
|
EvaluationDuration: evaluationDuration,
|
||||||
|
Annotations: map[string]string{"annotation": "test"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
desc: "template is correctly expanded",
|
desc: "template is correctly expanded",
|
||||||
alertRule: &models.AlertRule{
|
alertRule: &models.AlertRule{
|
||||||
|
@ -57,15 +57,18 @@ func NewEvaluationValues(m map[string]eval.NumberValueCapture) map[string]Evalua
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (a *State) resultNormal(alertRule *ngModels.AlertRule, result eval.Result) {
|
func (a *State) resultNormal(alertRule *ngModels.AlertRule, result eval.Result) {
|
||||||
|
a.Error = result.Error // should be nil since state is not error
|
||||||
|
|
||||||
if a.State != eval.Normal {
|
if a.State != eval.Normal {
|
||||||
a.EndsAt = result.EvaluatedAt
|
a.EndsAt = result.EvaluatedAt
|
||||||
a.StartsAt = result.EvaluatedAt
|
a.StartsAt = result.EvaluatedAt
|
||||||
}
|
}
|
||||||
a.Error = result.Error // should be nil since state is not error
|
|
||||||
a.State = eval.Normal
|
a.State = eval.Normal
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *State) resultAlerting(alertRule *ngModels.AlertRule, result eval.Result) {
|
func (a *State) resultAlerting(alertRule *ngModels.AlertRule, result eval.Result) {
|
||||||
|
a.Error = result.Error // should be nil since the state is not an error
|
||||||
|
|
||||||
switch a.State {
|
switch a.State {
|
||||||
case eval.Alerting:
|
case eval.Alerting:
|
||||||
a.setEndsAt(alertRule, result)
|
a.setEndsAt(alertRule, result)
|
||||||
@ -118,6 +121,8 @@ func (a *State) resultError(alertRule *ngModels.AlertRule, result eval.Result) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (a *State) resultNoData(alertRule *ngModels.AlertRule, result eval.Result) {
|
func (a *State) resultNoData(alertRule *ngModels.AlertRule, result eval.Result) {
|
||||||
|
a.Error = result.Error
|
||||||
|
|
||||||
if a.StartsAt.IsZero() {
|
if a.StartsAt.IsZero() {
|
||||||
a.StartsAt = result.EvaluatedAt
|
a.StartsAt = result.EvaluatedAt
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user