Alerting: Fix bug where state did not change between Alerting and Error (#52204)

This commit fixes a bug where the state did not change from Alerting to Error if the evaluation result returned an error, or from Error to Alerting if evaluations stopped returning errors.
This commit is contained in:
George Robinson 2022-07-14 10:53:39 +01:00 committed by GitHub
parent cabdfb4811
commit 34d45977ca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 103 additions and 0 deletions

View File

@ -1655,6 +1655,103 @@ func TestProcessEvalResults(t *testing.T) {
},
},
},
{
desc: "normal -> alerting -> error when result is Error and ExecErrorState is Error",
alertRule: &models.AlertRule{
OrgID: 1,
Title: "test_title",
UID: "test_alert_rule_uid_2",
NamespaceUID: "test_namespace_uid",
Annotations: map[string]string{"annotation": "test"},
Labels: map[string]string{"label": "test"},
IntervalSeconds: 10,
For: 20 * time.Second,
ExecErrState: models.ErrorErrState,
},
evalResults: []eval.Results{
{
eval.Result{
Instance: data.Labels{"instance_label": "test"},
State: eval.Alerting,
EvaluatedAt: evaluationTime,
EvaluationDuration: evaluationDuration,
},
},
{
eval.Result{
Instance: data.Labels{"instance_label": "test"},
State: eval.Alerting,
EvaluatedAt: evaluationTime.Add(10 * time.Second),
EvaluationDuration: evaluationDuration,
},
},
{
eval.Result{
Instance: data.Labels{"instance_label": "test"},
State: eval.Alerting,
EvaluatedAt: evaluationTime.Add(20 * time.Second),
EvaluationDuration: evaluationDuration,
},
},
{
eval.Result{
Instance: data.Labels{"instance_label": "test"},
State: eval.Error,
EvaluatedAt: evaluationTime.Add(30 * time.Second),
EvaluationDuration: evaluationDuration,
},
},
{
eval.Result{
Instance: data.Labels{"instance_label": "test"},
State: eval.Error,
EvaluatedAt: evaluationTime.Add(40 * time.Second),
EvaluationDuration: evaluationDuration,
},
},
{
eval.Result{
Instance: data.Labels{"instance_label": "test"},
State: eval.Error,
EvaluatedAt: evaluationTime.Add(50 * time.Second),
EvaluationDuration: evaluationDuration,
},
},
},
expectedAnnotations: 3,
expectedStates: map[string]*state.State{
`[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid_2"],["alertname","test_title"],["instance_label","test"],["label","test"]]`: {
AlertRuleUID: "test_alert_rule_uid_2",
OrgID: 1,
CacheId: `[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid_2"],["alertname","test_title"],["instance_label","test"],["label","test"]]`,
Labels: data.Labels{
"__alert_rule_namespace_uid__": "test_namespace_uid",
"__alert_rule_uid__": "test_alert_rule_uid_2",
"alertname": "test_title",
"label": "test",
"instance_label": "test",
},
State: eval.Error,
Results: []state.Evaluation{
{
EvaluationTime: evaluationTime.Add(40 * time.Second),
EvaluationState: eval.Error,
Values: make(map[string]*float64),
},
{
EvaluationTime: evaluationTime.Add(50 * time.Second),
EvaluationState: eval.Error,
Values: make(map[string]*float64),
},
},
StartsAt: evaluationTime.Add(20 * time.Second),
EndsAt: evaluationTime.Add(50 * time.Second).Add(state.ResendDelay * 3),
LastEvaluationTime: evaluationTime.Add(50 * time.Second),
EvaluationDuration: evaluationDuration,
Annotations: map[string]string{"annotation": "test"},
},
},
},
{
desc: "normal -> alerting -> error -> alerting - it should clear the error",
alertRule: &models.AlertRule{

View File

@ -122,6 +122,12 @@ func (a *State) resultError(alertRule *models.AlertRule, result eval.Result) {
switch a.State {
case eval.Alerting, eval.Error:
// We must set the state here as the state can change both from Alerting
// to Error and from Error to Alerting. This can happen when the datasource
// is unavailable or queries against the datasource returns errors, and is
// then resolved as soon as the datasource is available and queries return
// without error
a.State = execErrState
a.setEndsAt(alertRule, result)
case eval.Pending:
if result.EvaluatedAt.Sub(a.StartsAt) >= alertRule.For {