Alerting: Alert rule should wait For duration when execution error state is Alerting (#47052)

Alerting: Alert rule should wait For duration when execution error state is Alerting
This commit is contained in:
George Robinson 2022-03-31 09:57:58 +01:00 committed by GitHub
parent 554492ec4e
commit 79769132c0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 125 additions and 15 deletions

View File

@ -69,8 +69,8 @@ Configure alerting behavior in the absence of data using information in the foll
| Alerting | Set alert rule state to `Alerting`. |
| Ok | Set alert rule state to `Normal`. |
| Error or timeout option | Description |
| ----------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
| Alerting | Set alert rule state to `Alerting` |
| OK | Set alert rule state to `Normal` |
| Error | Create a new alert `DatasourceError` with the name and UID of the alert rule, and UID of the datasource that returned no data as labels. |
| Error or timeout option | Description |
| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
| Alerting | Set alert rule state to `Alerting`. From Grafana 8.5, the alert rule waits for the entire duration for which the condition is true before firing. |
| OK | Set alert rule state to `Normal` |
| Error | Create a new alert `DatasourceError` with the name and UID of the alert rule, and UID of the datasource that returned no data as labels. |

View File

@ -1160,7 +1160,7 @@ func TestProcessEvalResults(t *testing.T) {
},
},
{
desc: "normal -> alerting when result is Error and ExecErrState is Alerting",
desc: "normal -> pending when For is set but not exceeded, result is Error and ExecErrState is Alerting",
alertRule: &models.AlertRule{
OrgID: 1,
Title: "test_title",
@ -1203,7 +1203,7 @@ func TestProcessEvalResults(t *testing.T) {
"label": "test",
"instance_label": "test",
},
State: eval.Alerting,
State: eval.Pending,
Results: []state.Evaluation{
{
EvaluationTime: evaluationTime,
@ -1224,6 +1224,100 @@ func TestProcessEvalResults(t *testing.T) {
},
},
},
{
desc: "normal -> alerting when For is exceeded, result is Error and ExecErrState is Alerting",
alertRule: &models.AlertRule{
OrgID: 1,
Title: "test_title",
UID: "test_alert_rule_uid_2",
NamespaceUID: "test_namespace_uid",
Annotations: map[string]string{"annotation": "test"},
Labels: map[string]string{"label": "test"},
IntervalSeconds: 10,
For: 30 * time.Second,
ExecErrState: models.AlertingErrState,
},
evalResults: []eval.Results{
{
eval.Result{
Instance: data.Labels{"instance_label": "test"},
State: eval.Normal,
EvaluatedAt: evaluationTime,
EvaluationDuration: evaluationDuration,
},
},
{
eval.Result{
Instance: data.Labels{"instance_label": "test"},
State: eval.Error,
EvaluatedAt: evaluationTime.Add(10 * time.Second),
EvaluationDuration: evaluationDuration,
},
},
{
eval.Result{
Instance: data.Labels{"instance_label": "test"},
State: eval.Error,
EvaluatedAt: evaluationTime.Add(20 * time.Second),
EvaluationDuration: evaluationDuration,
},
},
{
eval.Result{
Instance: data.Labels{"instance_label": "test"},
State: eval.Error,
EvaluatedAt: evaluationTime.Add(30 * time.Second),
EvaluationDuration: evaluationDuration,
},
},
{
eval.Result{
Instance: data.Labels{"instance_label": "test"},
State: eval.Error,
EvaluatedAt: evaluationTime.Add(40 * time.Second),
EvaluationDuration: evaluationDuration,
},
},
},
expectedAnnotations: 2,
expectedStates: map[string]*state.State{
`[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid_2"],["alertname","test_title"],["instance_label","test"],["label","test"]]`: {
AlertRuleUID: "test_alert_rule_uid_2",
OrgID: 1,
CacheId: `[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid_2"],["alertname","test_title"],["instance_label","test"],["label","test"]]`,
Labels: data.Labels{
"__alert_rule_namespace_uid__": "test_namespace_uid",
"__alert_rule_uid__": "test_alert_rule_uid_2",
"alertname": "test_title",
"label": "test",
"instance_label": "test",
},
State: eval.Alerting,
Results: []state.Evaluation{
{
EvaluationTime: evaluationTime.Add(20 * time.Second),
EvaluationState: eval.Error,
Values: make(map[string]*float64),
},
{
EvaluationTime: evaluationTime.Add(30 * time.Second),
EvaluationState: eval.Error,
Values: make(map[string]*float64),
},
{
EvaluationTime: evaluationTime.Add(40 * time.Second),
EvaluationState: eval.Error,
Values: make(map[string]*float64),
},
},
StartsAt: evaluationTime.Add(40 * time.Second),
EndsAt: evaluationTime.Add(40 * time.Second).Add(state.ResendDelay * 3),
LastEvaluationTime: evaluationTime.Add(40 * time.Second),
EvaluationDuration: evaluationDuration,
Annotations: map[string]string{"annotation": "test"},
},
},
},
{
desc: "normal -> error when result is Error and ExecErrState is Error",
alertRule: &models.AlertRule{

View File

@ -84,16 +84,10 @@ func (a *State) resultAlerting(alertRule *ngModels.AlertRule, result eval.Result
func (a *State) resultError(alertRule *ngModels.AlertRule, result eval.Result) {
a.Error = result.Error
if a.StartsAt.IsZero() {
a.StartsAt = result.EvaluatedAt
}
a.setEndsAt(alertRule, result)
execErrState := eval.Error
if alertRule.ExecErrState == ngModels.AlertingErrState {
a.State = eval.Alerting
execErrState = eval.Alerting
} else if alertRule.ExecErrState == ngModels.ErrorErrState {
a.State = eval.Error
// If the evaluation failed because a query returned an error then
// update the state with the Datasource UID as a label and the error
// message as an annotation so other code can use this metadata to
@ -109,6 +103,28 @@ func (a *State) resultError(alertRule *ngModels.AlertRule, result eval.Result) {
}
a.Annotations["Error"] = queryError.Error()
}
execErrState = eval.Error
}
switch a.State {
case eval.Alerting, eval.Error:
a.setEndsAt(alertRule, result)
case eval.Pending:
if result.EvaluatedAt.Sub(a.StartsAt) >= alertRule.For {
a.State = execErrState
a.StartsAt = result.EvaluatedAt
a.setEndsAt(alertRule, result)
}
default:
// For is observed when Alerting is chosen for the alert state
// if execution error or timeout.
if execErrState == eval.Alerting && alertRule.For > 0 {
a.State = eval.Pending
} else {
a.State = execErrState
}
a.StartsAt = result.EvaluatedAt
a.setEndsAt(alertRule, result)
}
}