grafana/pkg/services/ngalert/state/state_test.go
gotjosh dd502f22eb
Alerting: Fix alert flapping in the internal alertmanager (#38648)
* Alerting: Fix alert flapping in the alertmanager

fixes a bug that caused Alerts that are evaluated at low intervals (sub 1 minute), to flap in the Alertmanager.
Mostly due to a combination of `EndsAt` and resend delay.

The Alertmanager uses `EndsAt` as a heuristic to know whenever it should resolve a firing alert, in the case that it hasn't heard
back from the alert generation system.

Because grafana sent the alert with an `EndsAt` which is equal to the `For` of the alert itself,
and we had a hard-coded 1 minute re-send delay (only applicable to firing alerts) this meant that a firing alert would resolve in the Alertmanager before we re-notify that it still firing.

This commit, increases the `EndsAt` by 3x the the resend delay or alert interval (depending on which one is higher). The resendDelay has been decreased to 30 seconds.
2021-09-02 16:22:59 +01:00

192 lines
5.2 KiB
Go

package state
import (
"testing"
"time"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/stretchr/testify/assert"
)
func TestNeedsSending(t *testing.T) {
evaluationTime, _ := time.Parse("2006-01-02", "2021-03-25")
testCases := []struct {
name string
resendDelay time.Duration
expected bool
testState *State
}{
{
name: "state: alerting and LastSentAt before LastEvaluationTime + ResendDelay",
resendDelay: 1 * time.Minute,
expected: true,
testState: &State{
State: eval.Alerting,
LastEvaluationTime: evaluationTime,
LastSentAt: evaluationTime.Add(-2 * time.Minute),
},
},
{
name: "state: alerting and LastSentAt after LastEvaluationTime + ResendDelay",
resendDelay: 1 * time.Minute,
expected: false,
testState: &State{
State: eval.Alerting,
LastEvaluationTime: evaluationTime,
LastSentAt: evaluationTime,
},
},
{
name: "state: alerting and LastSentAt equals LastEvaluationTime + ResendDelay",
resendDelay: 1 * time.Minute,
expected: true,
testState: &State{
State: eval.Alerting,
LastEvaluationTime: evaluationTime,
LastSentAt: evaluationTime.Add(-1 * time.Minute),
},
},
{
name: "state: pending",
resendDelay: 1 * time.Minute,
expected: false,
testState: &State{
State: eval.Pending,
},
},
{
name: "state: alerting and ResendDelay is zero",
resendDelay: 0 * time.Minute,
expected: true,
testState: &State{
State: eval.Alerting,
LastEvaluationTime: evaluationTime,
LastSentAt: evaluationTime,
},
},
{
name: "state: normal + resolved sends after a minute",
resendDelay: 1 * time.Minute,
expected: true,
testState: &State{
State: eval.Normal,
Resolved: true,
LastEvaluationTime: evaluationTime,
LastSentAt: evaluationTime.Add(-1 * time.Minute),
},
},
{
name: "state: normal + resolved does _not_ send after 30 seconds (before one minute)",
resendDelay: 1 * time.Minute,
expected: false,
testState: &State{
State: eval.Normal,
Resolved: true,
LastEvaluationTime: evaluationTime,
LastSentAt: evaluationTime.Add(-30 * time.Second),
},
},
{
name: "state: normal but not resolved does not send after a minute",
resendDelay: 1 * time.Minute,
expected: false,
testState: &State{
State: eval.Normal,
Resolved: false,
LastEvaluationTime: evaluationTime,
LastSentAt: evaluationTime.Add(-1 * time.Minute),
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
assert.Equal(t, tc.expected, tc.testState.NeedsSending(tc.resendDelay))
})
}
}
func TestSetEndsAt(t *testing.T) {
evaluationTime, _ := time.Parse("2006-01-02", "2021-03-25")
testCases := []struct {
name string
expected time.Time
testRule *ngmodels.AlertRule
testResult eval.Result
}{
{
name: "less than resend delay: for=unset,interval=10s - endsAt = resendDelay * 3",
expected: evaluationTime.Add(ResendDelay * 3),
testRule: &ngmodels.AlertRule{
IntervalSeconds: 10,
},
},
{
name: "less than resend delay: for=0s,interval=10s - endsAt = resendDelay * 3",
expected: evaluationTime.Add(ResendDelay * 3),
testRule: &ngmodels.AlertRule{
For: 0 * time.Second,
IntervalSeconds: 10,
},
},
{
name: "less than resend delay: for=10s,interval=10s - endsAt = resendDelay * 3",
expected: evaluationTime.Add(ResendDelay * 3),
testRule: &ngmodels.AlertRule{
For: 10 * time.Second,
IntervalSeconds: 10,
},
},
{
name: "less than resend delay: for=10s,interval=20s - endsAt = resendDelay * 3",
expected: evaluationTime.Add(ResendDelay * 3),
testRule: &ngmodels.AlertRule{
For: 10 * time.Second,
IntervalSeconds: 20,
},
},
{
name: "more than resend delay: for=unset,interval=1m - endsAt = interval * 3",
expected: evaluationTime.Add(60 * 3),
testRule: &ngmodels.AlertRule{
IntervalSeconds: 60,
},
},
{
name: "more than resend delay: for=0s,interval=1m - endsAt = resendDelay * 3",
expected: evaluationTime.Add(60 * 3),
testRule: &ngmodels.AlertRule{
For: 0 * time.Second,
IntervalSeconds: 60,
},
},
{
name: "more than resend delay: for=1m,interval=5m - endsAt = interval * 3",
expected: evaluationTime.Add(300 * 3),
testRule: &ngmodels.AlertRule{
For: 60 * time.Second,
IntervalSeconds: 300,
},
},
{
name: "more than resend delay: for=5m,interval=1m - endsAt = interval * 3",
expected: evaluationTime.Add(60 * 3),
testRule: &ngmodels.AlertRule{
For: 300 * time.Second,
IntervalSeconds: 60,
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
s := &State{}
r := eval.Result{EvaluatedAt: evaluationTime}
s.setEndsAt(tc.testRule, r)
assert.Equal(t, tc.expected, s.EndsAt)
})
}
}