Alerting: Better define how we set states (#59977)

This commit better defines how we set states in resultNormal,
resultAlerting, resultError and resultNoData. It changes the existing
code to call methods such as SetAlerting, SetPending, SetNormal,
SetError and NoData instead of assigning values to each individual field
whenever the state is changed. This should make it easier to understand
what fields should be set for which states and avoid cases where states are
missing, or have additional unexpected fields.
This commit is contained in:
George Robinson 2022-12-08 20:12:13 +00:00 committed by GitHub
parent b2c4af16d7
commit 76601f3ae7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 426 additions and 140 deletions

View File

@ -116,8 +116,9 @@ const (
ValueStringAnnotation = "__value_string__"
)
var (
const (
StateReasonMissingSeries = "MissingSeries"
StateReasonError = "Error"
)
var (

View File

@ -82,11 +82,62 @@ func (a *State) GetAlertInstanceKey() (models.AlertInstanceKey, error) {
return models.AlertInstanceKey{RuleOrgID: a.OrgID, RuleUID: a.AlertRuleUID, LabelsHash: labelsHash}, nil
}
// SetAlerting sets the state to Alerting. It changes both the start and end time.
func (a *State) SetAlerting(reason string, startsAt, endsAt time.Time) {
a.State = eval.Alerting
a.StateReason = reason
a.StartsAt = startsAt
a.EndsAt = endsAt
a.Error = nil
}
// SetPending the state to Pending. It changes both the start and end time.
func (a *State) SetPending(reason string, startsAt, endsAt time.Time) {
a.State = eval.Pending
a.StateReason = reason
a.StartsAt = startsAt
a.EndsAt = endsAt
a.Error = nil
}
// SetNoData sets the state to NoData. It changes both the start and end time.
func (a *State) SetNoData(reason string, startsAt, endsAt time.Time) {
a.State = eval.NoData
a.StateReason = reason
a.StartsAt = startsAt
a.EndsAt = endsAt
a.Error = nil
}
// SetError sets the state to Error. It changes both the start and end time.
func (a *State) SetError(err error, startsAt, endsAt time.Time) {
a.State = eval.Error
a.StateReason = models.StateReasonError
a.StartsAt = startsAt
a.EndsAt = endsAt
a.Error = err
}
// SetNormal sets the state to Normal. It changes both the start and end time.
func (a *State) SetNormal(reason string, startsAt, endsAt time.Time) {
a.State = eval.Normal
a.StateReason = reason
a.StartsAt = startsAt
a.EndsAt = endsAt
a.Error = nil
}
// Resolve sets the State to Normal. It updates the StateReason, the end time, and sets Resolved to true.
func (a *State) Resolve(reason string, endsAt time.Time) {
a.State = eval.Normal
a.StateReason = reason
a.EndsAt = endsAt
a.Resolved = true
a.EndsAt = endsAt
}
// Maintain updates the end time using the most recent evaluation.
func (a *State) Maintain(interval int64, evaluatedAt time.Time) {
a.EndsAt = nextEndsTime(interval, evaluatedAt)
}
// StateTransition describes the transition from one state to another.
@ -129,83 +180,77 @@ func NewEvaluationValues(m map[string]eval.NumberValueCapture) map[string]*float
}
func resultNormal(state *State, _ *models.AlertRule, result eval.Result, logger log.Logger) {
state.Error = nil // should be nil since state is not error
if state.State != eval.Normal {
if state.State == eval.Normal {
logger.Debug("Keeping state", "state", state.State)
} else {
logger.Debug("Changing state", "previous_state", state.State, "next_state", eval.Normal)
state.State = eval.Normal
state.StartsAt = result.EvaluatedAt
state.EndsAt = result.EvaluatedAt
// Normal states have the same start and end timestamps
state.SetNormal("", result.EvaluatedAt, result.EvaluatedAt)
}
}
func resultAlerting(state *State, rule *models.AlertRule, result eval.Result, logger log.Logger) {
state.Error = result.Error
switch state.State {
case eval.Alerting:
// If the previous state is Alerting then update the expiration time
state.setEndsAt(rule, result)
logger.Debug("Keeping state", "state", state.State)
state.Maintain(rule.IntervalSeconds, result.EvaluatedAt)
case eval.Pending:
// If the previous state is Pending then check if the For duration has been observed
if result.EvaluatedAt.Sub(state.StartsAt) >= rule.For {
logger.Debug("Changing state", "previous_state", state.State, "next_state", eval.Alerting)
state.State = eval.Alerting
state.StartsAt = result.EvaluatedAt
state.setEndsAt(rule, result)
state.SetAlerting("", result.EvaluatedAt, nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt))
}
default:
if rule.For > 0 {
// If the alert rule has a For duration that should be observed then the state should be set to Pending
logger.Debug("Changing state", "previous_state", state.State, "next_state", eval.Pending)
state.State = eval.Pending
state.SetPending("", result.EvaluatedAt, nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt))
} else {
logger.Debug("Changing state", "previous_state", state.State, "next_state", eval.Alerting)
state.State = eval.Alerting
state.SetAlerting("", result.EvaluatedAt, nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt))
}
state.StartsAt = result.EvaluatedAt
state.setEndsAt(rule, result)
}
}
func resultError(state *State, rule *models.AlertRule, result eval.Result, logger log.Logger) {
switch rule.ExecErrState {
case models.AlertingErrState:
logger.Debug("Execution error state is Alerting", "handler", "resultAlerting", "previous_handler", "resultError")
resultAlerting(state, rule, result, logger)
case models.ErrorErrState:
// This is a special case where Alerting and Pending should also have an error and reason
state.Error = result.Error
if result.Error != nil {
// If the evaluation failed because a query returned an error then add the Ref ID and
// Datasource UID as labels
var queryError expr.QueryError
if errors.As(state.Error, &queryError) {
for _, next := range rule.Data {
if next.RefID == queryError.RefID {
state.Labels["ref_id"] = next.RefID
state.Labels["datasource_uid"] = next.DatasourceUID
break
}
}
state.Annotations["Error"] = queryError.Error()
}
}
state.StateReason = "error"
case models.ErrorErrState:
if state.State == eval.Error {
// If the previous state is Error then update the expiration time
state.setEndsAt(rule, result)
logger.Debug("Keeping state", "state", state.State)
state.Maintain(rule.IntervalSeconds, result.EvaluatedAt)
} else {
// This is the first occurrence of an error
logger.Debug("Changing state", "previous_state", state.State, "next_state", eval.Error)
state.State = eval.Error
state.StartsAt = result.EvaluatedAt
state.setEndsAt(rule, result)
state.SetError(result.Error, result.EvaluatedAt, nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt))
if result.Error != nil {
// If the evaluation failed because a query returned an error then add the Ref ID and
// Datasource UID as labels
var queryError expr.QueryError
if errors.As(state.Error, &queryError) {
for _, next := range rule.Data {
if next.RefID == queryError.RefID {
state.Labels["ref_id"] = next.RefID
state.Labels["datasource_uid"] = next.DatasourceUID
break
}
}
state.Annotations["Error"] = queryError.Error()
}
}
}
case models.OkErrState:
logger.Debug("Execution error state is Normal", "handler", "resultNormal", "previous_handler", "resultError")
resultNormal(state, rule, result, logger)
default:
state.State = eval.Error
state.Error = fmt.Errorf("unsupported execution error state: %s", rule.ExecErrState)
state.Annotations["Error"] = state.Error.Error()
err := fmt.Errorf("unsupported execution error state: %s", rule.ExecErrState)
state.SetError(err, state.StartsAt, nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt))
state.Annotations["Error"] = err.Error()
}
}
@ -215,7 +260,7 @@ func resultNoData(state *State, rule *models.AlertRule, result eval.Result, _ lo
if state.StartsAt.IsZero() {
state.StartsAt = result.EvaluatedAt
}
state.setEndsAt(rule, result)
state.EndsAt = nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt)
switch rule.NoDataState {
case models.Alerting:
@ -268,17 +313,13 @@ func (a *State) TrimResults(alertRule *models.AlertRule) {
a.Results = newResults
}
// setEndsAt sets the ending timestamp of the alert.
// The internal Alertmanager will use this time to know when it should automatically resolve the alert
// in case it hasn't received additional alerts. Under regular operations the scheduler will continue to send the
// alert with an updated EndsAt, if the alert is resolved then a last alert is sent with EndsAt = last evaluation time.
func (a *State) setEndsAt(alertRule *models.AlertRule, result eval.Result) {
func nextEndsTime(interval int64, evaluatedAt time.Time) time.Time {
ends := ResendDelay
if alertRule.IntervalSeconds > int64(ResendDelay.Seconds()) {
ends = time.Second * time.Duration(alertRule.IntervalSeconds)
intv := time.Second * time.Duration(interval)
if intv > ResendDelay {
ends = intv
}
a.EndsAt = result.EvaluatedAt.Add(ends * 3)
return evaluatedAt.Add(3 * ends)
}
func (a *State) GetLabels(opts ...models.LabelOption) map[string]string {

View File

@ -8,15 +8,341 @@ import (
"testing"
"time"
"github.com/benbjohnson/clock"
"github.com/golang/mock/gomock"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/screenshot"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
ptr "github.com/xorcare/pointer"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/screenshot"
)
func TestSetAlerting(t *testing.T) {
mock := clock.NewMock()
tests := []struct {
name string
state State
reason string
startsAt time.Time
endsAt time.Time
expected State
}{{
name: "state is set to Alerting",
reason: "this is a reason",
startsAt: mock.Now(),
endsAt: mock.Now().Add(time.Minute),
expected: State{
State: eval.Alerting,
StateReason: "this is a reason",
StartsAt: mock.Now(),
EndsAt: mock.Now().Add(time.Minute),
},
}, {
name: "previous state is removed",
state: State{
State: eval.Normal,
StateReason: "this is a reason",
Error: errors.New("this is an error"),
},
startsAt: mock.Now(),
endsAt: mock.Now().Add(time.Minute),
expected: State{
State: eval.Alerting,
StartsAt: mock.Now(),
EndsAt: mock.Now().Add(time.Minute),
},
}}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
actual := test.state
actual.SetAlerting(test.reason, test.startsAt, test.endsAt)
assert.Equal(t, test.expected, actual)
})
}
}
func TestSetPending(t *testing.T) {
mock := clock.NewMock()
tests := []struct {
name string
state State
reason string
startsAt time.Time
endsAt time.Time
expected State
}{{
name: "state is set to Pending",
reason: "this is a reason",
startsAt: mock.Now(),
endsAt: mock.Now().Add(time.Minute),
expected: State{
State: eval.Pending,
StateReason: "this is a reason",
StartsAt: mock.Now(),
EndsAt: mock.Now().Add(time.Minute),
},
}, {
name: "previous state is removed",
state: State{
State: eval.Pending,
StateReason: "this is a reason",
Error: errors.New("this is an error"),
},
startsAt: mock.Now(),
endsAt: mock.Now().Add(time.Minute),
expected: State{
State: eval.Pending,
StartsAt: mock.Now(),
EndsAt: mock.Now().Add(time.Minute),
},
}}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
actual := test.state
actual.SetPending(test.reason, test.startsAt, test.endsAt)
assert.Equal(t, test.expected, actual)
})
}
}
func TestNormal(t *testing.T) {
mock := clock.NewMock()
tests := []struct {
name string
state State
reason string
startsAt time.Time
endsAt time.Time
expected State
}{{
name: "state is set to Normal",
reason: "this is a reason",
startsAt: mock.Now(),
endsAt: mock.Now().Add(time.Minute),
expected: State{
State: eval.Normal,
StateReason: "this is a reason",
StartsAt: mock.Now(),
EndsAt: mock.Now().Add(time.Minute),
},
}, {
name: "previous state is removed",
state: State{
State: eval.Normal,
StateReason: "this is a reason",
Error: errors.New("this is an error"),
},
startsAt: mock.Now(),
endsAt: mock.Now().Add(time.Minute),
expected: State{
State: eval.Normal,
StartsAt: mock.Now(),
EndsAt: mock.Now().Add(time.Minute),
},
}}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
actual := test.state
actual.SetNormal(test.reason, test.startsAt, test.endsAt)
assert.Equal(t, test.expected, actual)
})
}
}
func TestNoData(t *testing.T) {
mock := clock.NewMock()
tests := []struct {
name string
state State
reason string
startsAt time.Time
endsAt time.Time
expected State
}{{
name: "state is set to No Data",
startsAt: mock.Now(),
endsAt: mock.Now().Add(time.Minute),
expected: State{
State: eval.NoData,
StartsAt: mock.Now(),
EndsAt: mock.Now().Add(time.Minute),
},
}, {
name: "previous state is removed",
state: State{
State: eval.NoData,
StateReason: "this is a reason",
Error: errors.New("this is an error"),
},
startsAt: mock.Now(),
endsAt: mock.Now().Add(time.Minute),
expected: State{
State: eval.NoData,
StartsAt: mock.Now(),
EndsAt: mock.Now().Add(time.Minute),
},
}}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
actual := test.state
actual.SetNoData(test.reason, test.startsAt, test.endsAt)
assert.Equal(t, test.expected, actual)
})
}
}
func TestSetError(t *testing.T) {
mock := clock.NewMock()
tests := []struct {
name string
state State
startsAt time.Time
endsAt time.Time
error error
expected State
}{{
name: "state is set to Error",
startsAt: mock.Now(),
endsAt: mock.Now().Add(time.Minute),
error: errors.New("this is an error"),
expected: State{
State: eval.Error,
StateReason: ngmodels.StateReasonError,
Error: errors.New("this is an error"),
StartsAt: mock.Now(),
EndsAt: mock.Now().Add(time.Minute),
},
}, {
name: "previous state is removed",
state: State{
State: eval.Error,
StateReason: "this is a reason",
Error: errors.New("this is an error"),
},
startsAt: mock.Now(),
endsAt: mock.Now().Add(time.Minute),
error: errors.New("this is another error"),
expected: State{
State: eval.Error,
StateReason: ngmodels.StateReasonError,
Error: errors.New("this is another error"),
StartsAt: mock.Now(),
EndsAt: mock.Now().Add(time.Minute),
},
}}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
actual := test.state
actual.SetError(test.error, test.startsAt, test.endsAt)
assert.Equal(t, test.expected, actual)
})
}
}
func TestMaintain(t *testing.T) {
mock := clock.NewMock()
now := mock.Now()
// the interval is less than the resend interval of 30 seconds
s := State{State: eval.Alerting, StartsAt: now, EndsAt: now.Add(time.Second)}
s.Maintain(10, now.Add(10*time.Second))
// 10 seconds + 3 x 30 seconds is 100 seconds
assert.Equal(t, now.Add(100*time.Second), s.EndsAt)
// the interval is above the resend interval of 30 seconds
s = State{State: eval.Alerting, StartsAt: now, EndsAt: now.Add(time.Second)}
s.Maintain(60, now.Add(10*time.Second))
// 10 seconds + 3 x 60 seconds is 190 seconds
assert.Equal(t, now.Add(190*time.Second), s.EndsAt)
}
func TestEnd(t *testing.T) {
evaluationTime, _ := time.Parse("2006-01-02", "2021-03-25")
testCases := []struct {
name string
expected time.Time
testRule *ngmodels.AlertRule
testResult eval.Result
}{
{
name: "less than resend delay: for=unset,interval=10s - endsAt = resendDelay * 3",
expected: evaluationTime.Add(ResendDelay * 3),
testRule: &ngmodels.AlertRule{
IntervalSeconds: 10,
},
},
{
name: "less than resend delay: for=0s,interval=10s - endsAt = resendDelay * 3",
expected: evaluationTime.Add(ResendDelay * 3),
testRule: &ngmodels.AlertRule{
For: 0 * time.Second,
IntervalSeconds: 10,
},
},
{
name: "less than resend delay: for=10s,interval=10s - endsAt = resendDelay * 3",
expected: evaluationTime.Add(ResendDelay * 3),
testRule: &ngmodels.AlertRule{
For: 10 * time.Second,
IntervalSeconds: 10,
},
},
{
name: "less than resend delay: for=10s,interval=20s - endsAt = resendDelay * 3",
expected: evaluationTime.Add(ResendDelay * 3),
testRule: &ngmodels.AlertRule{
For: 10 * time.Second,
IntervalSeconds: 20,
},
},
{
name: "more than resend delay: for=unset,interval=1m - endsAt = interval * 3",
expected: evaluationTime.Add(time.Second * 60 * 3),
testRule: &ngmodels.AlertRule{
IntervalSeconds: 60,
},
},
{
name: "more than resend delay: for=0s,interval=1m - endsAt = resendDelay * 3",
expected: evaluationTime.Add(time.Second * 60 * 3),
testRule: &ngmodels.AlertRule{
For: 0 * time.Second,
IntervalSeconds: 60,
},
},
{
name: "more than resend delay: for=1m,interval=5m - endsAt = interval * 3",
expected: evaluationTime.Add(time.Second * 300 * 3),
testRule: &ngmodels.AlertRule{
For: time.Minute,
IntervalSeconds: 300,
},
},
{
name: "more than resend delay: for=5m,interval=1m - endsAt = interval * 3",
expected: evaluationTime.Add(time.Second * 60 * 3),
testRule: &ngmodels.AlertRule{
For: 300 * time.Second,
IntervalSeconds: 60,
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
r := eval.Result{EvaluatedAt: evaluationTime}
assert.Equal(t, tc.expected, nextEndsTime(tc.testRule.IntervalSeconds, r.EvaluatedAt))
})
}
}
func TestNeedsSending(t *testing.T) {
evaluationTime, _ := time.Parse("2006-01-02", "2021-03-25")
testCases := []struct {
@ -144,88 +470,6 @@ func TestNeedsSending(t *testing.T) {
}
}
func TestSetEndsAt(t *testing.T) {
evaluationTime, _ := time.Parse("2006-01-02", "2021-03-25")
testCases := []struct {
name string
expected time.Time
testRule *ngmodels.AlertRule
testResult eval.Result
}{
{
name: "less than resend delay: for=unset,interval=10s - endsAt = resendDelay * 3",
expected: evaluationTime.Add(ResendDelay * 3),
testRule: &ngmodels.AlertRule{
IntervalSeconds: 10,
},
},
{
name: "less than resend delay: for=0s,interval=10s - endsAt = resendDelay * 3",
expected: evaluationTime.Add(ResendDelay * 3),
testRule: &ngmodels.AlertRule{
For: 0 * time.Second,
IntervalSeconds: 10,
},
},
{
name: "less than resend delay: for=10s,interval=10s - endsAt = resendDelay * 3",
expected: evaluationTime.Add(ResendDelay * 3),
testRule: &ngmodels.AlertRule{
For: 10 * time.Second,
IntervalSeconds: 10,
},
},
{
name: "less than resend delay: for=10s,interval=20s - endsAt = resendDelay * 3",
expected: evaluationTime.Add(ResendDelay * 3),
testRule: &ngmodels.AlertRule{
For: 10 * time.Second,
IntervalSeconds: 20,
},
},
{
name: "more than resend delay: for=unset,interval=1m - endsAt = interval * 3",
expected: evaluationTime.Add(time.Second * 60 * 3),
testRule: &ngmodels.AlertRule{
IntervalSeconds: 60,
},
},
{
name: "more than resend delay: for=0s,interval=1m - endsAt = resendDelay * 3",
expected: evaluationTime.Add(time.Second * 60 * 3),
testRule: &ngmodels.AlertRule{
For: 0 * time.Second,
IntervalSeconds: 60,
},
},
{
name: "more than resend delay: for=1m,interval=5m - endsAt = interval * 3",
expected: evaluationTime.Add(time.Second * 300 * 3),
testRule: &ngmodels.AlertRule{
For: time.Minute,
IntervalSeconds: 300,
},
},
{
name: "more than resend delay: for=5m,interval=1m - endsAt = interval * 3",
expected: evaluationTime.Add(time.Second * 60 * 3),
testRule: &ngmodels.AlertRule{
For: 300 * time.Second,
IntervalSeconds: 60,
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
s := &State{}
r := eval.Result{EvaluatedAt: evaluationTime}
s.setEndsAt(tc.testRule, r)
assert.Equal(t, tc.expected, s.EndsAt)
})
}
}
func TestGetLastEvaluationValuesForCondition(t *testing.T) {
genState := func(results []Evaluation) *State {
return &State{