From ccfd9c89b2645fde4b12aad0819c178ef5afb979 Mon Sep 17 00:00:00 2001 From: bergquist Date: Thu, 1 Nov 2018 16:04:38 +0100 Subject: [PATCH] introduces hard coded deboucing for alerting --- pkg/services/alerting/eval_context.go | 21 ++- pkg/services/alerting/eval_context_test.go | 186 +++++++++++++-------- pkg/services/alerting/result_handler.go | 3 + pkg/services/alerting/rule.go | 5 + 4 files changed, 145 insertions(+), 70 deletions(-) diff --git a/pkg/services/alerting/eval_context.go b/pkg/services/alerting/eval_context.go index d0441d379b7..49e28bbf5ec 100644 --- a/pkg/services/alerting/eval_context.go +++ b/pkg/services/alerting/eval_context.go @@ -69,7 +69,7 @@ func (c *EvalContext) GetStateModel() *StateDescription { Text: "Alerting", } default: - panic("Unknown rule state " + c.Rule.State) + panic("Unknown rule state for alert notifications " + c.Rule.State) } } @@ -125,11 +125,26 @@ func (c *EvalContext) GetNewState() m.AlertStateType { return c.PrevAlertState } return c.Rule.ExecutionErrorState.ToAlertState() + } - } else if c.Firing { + if c.Firing && c.Rule.DebounceDuration != 0 { + since := time.Now().Sub(c.Rule.LastStateChange) + if since > c.Rule.DebounceDuration { + return m.AlertStateAlerting + } + + if c.PrevAlertState == m.AlertStateAlerting { + return m.AlertStateAlerting + } + + return m.AlertStatePending + } + + if c.Firing { return m.AlertStateAlerting + } - } else if c.NoDataFound { + if c.NoDataFound { c.log.Info("Alert Rule returned no data", "ruleId", c.Rule.Id, "name", c.Rule.Name, diff --git a/pkg/services/alerting/eval_context_test.go b/pkg/services/alerting/eval_context_test.go index 750fa959683..2abf581d830 100644 --- a/pkg/services/alerting/eval_context_test.go +++ b/pkg/services/alerting/eval_context_test.go @@ -2,11 +2,11 @@ package alerting import ( "context" - "fmt" + "errors" "testing" + "time" "github.com/grafana/grafana/pkg/models" - . "github.com/smartystreets/goconvey/convey" ) func TestStateIsUpdatedWhenNeeded(t *testing.T) { @@ -31,71 +31,123 @@ func TestStateIsUpdatedWhenNeeded(t *testing.T) { }) } -func TestAlertingEvalContext(t *testing.T) { - Convey("Should compute and replace properly new rule state", t, func() { +func TestGetStateFromEvalContext(t *testing.T) { + tcs := []struct { + name string + expected models.AlertStateType + applyFn func(ec *EvalContext) + focus bool + }{ + { + name: "ok -> alerting", + expected: models.AlertStateAlerting, + applyFn: func(ec *EvalContext) { + ec.Firing = true + ec.PrevAlertState = models.AlertStateOK + }, + }, + { + name: "ok -> error(alerting)", + expected: models.AlertStateAlerting, + applyFn: func(ec *EvalContext) { + ec.PrevAlertState = models.AlertStateOK + ec.Error = errors.New("test error") + ec.Rule.ExecutionErrorState = models.ExecutionErrorSetAlerting + }, + }, + { + name: "ok -> pending. since its been firing for less than FOR", + expected: models.AlertStatePending, + applyFn: func(ec *EvalContext) { + ec.PrevAlertState = models.AlertStateOK + ec.Firing = true + ec.Rule.LastStateChange = time.Now().Add(-time.Minute * 2) + ec.Rule.DebounceDuration = time.Minute * 5 + }, + }, + { + name: "ok -> alerting. since its been firing for more than FOR", + expected: models.AlertStateAlerting, + applyFn: func(ec *EvalContext) { + ec.PrevAlertState = models.AlertStateOK + ec.Firing = true + ec.Rule.LastStateChange = time.Now().Add(-(time.Hour * 5)) + ec.Rule.DebounceDuration = time.Minute * 2 + }, + }, + { + name: "alerting -> alerting. should not update regardless of FOR", + expected: models.AlertStateAlerting, + applyFn: func(ec *EvalContext) { + ec.PrevAlertState = models.AlertStateAlerting + ec.Firing = true + ec.Rule.LastStateChange = time.Now().Add(-time.Minute * 5) + ec.Rule.DebounceDuration = time.Minute * 2 + }, + }, + { + name: "ok -> ok. should not update regardless of FOR", + expected: models.AlertStateOK, + applyFn: func(ec *EvalContext) { + ec.PrevAlertState = models.AlertStateOK + ec.Rule.LastStateChange = time.Now().Add(-time.Minute * 5) + ec.Rule.DebounceDuration = time.Minute * 2 + }, + }, + { + name: "ok -> error(keep_last)", + expected: models.AlertStateOK, + applyFn: func(ec *EvalContext) { + ec.PrevAlertState = models.AlertStateOK + ec.Error = errors.New("test error") + ec.Rule.ExecutionErrorState = models.ExecutionErrorKeepState + }, + }, + { + name: "pending -> error(keep_last)", + expected: models.AlertStatePending, + applyFn: func(ec *EvalContext) { + ec.PrevAlertState = models.AlertStatePending + ec.Error = errors.New("test error") + ec.Rule.ExecutionErrorState = models.ExecutionErrorKeepState + }, + }, + { + name: "ok -> no_data(alerting)", + expected: models.AlertStateAlerting, + applyFn: func(ec *EvalContext) { + ec.PrevAlertState = models.AlertStateOK + ec.Rule.NoDataState = models.NoDataSetAlerting + ec.NoDataFound = true + }, + }, + { + name: "ok -> no_data(keep_last)", + expected: models.AlertStateOK, + applyFn: func(ec *EvalContext) { + ec.PrevAlertState = models.AlertStateOK + ec.Rule.NoDataState = models.NoDataKeepState + ec.NoDataFound = true + }, + }, + { + name: "pending -> no_data(keep_last)", + expected: models.AlertStatePending, + applyFn: func(ec *EvalContext) { + ec.PrevAlertState = models.AlertStatePending + ec.Rule.NoDataState = models.NoDataKeepState + ec.NoDataFound = true + }, + }, + } + + for _, tc := range tcs { ctx := NewEvalContext(context.TODO(), &Rule{Conditions: []Condition{&conditionStub{firing: true}}}) - dummieError := fmt.Errorf("dummie error") - Convey("ok -> alerting", func() { - ctx.PrevAlertState = models.AlertStateOK - ctx.Firing = true - - ctx.Rule.State = ctx.GetNewState() - So(ctx.Rule.State, ShouldEqual, models.AlertStateAlerting) - }) - - Convey("ok -> error(alerting)", func() { - ctx.PrevAlertState = models.AlertStateOK - ctx.Error = dummieError - ctx.Rule.ExecutionErrorState = models.ExecutionErrorSetAlerting - - ctx.Rule.State = ctx.GetNewState() - So(ctx.Rule.State, ShouldEqual, models.AlertStateAlerting) - }) - - Convey("ok -> error(keep_last)", func() { - ctx.PrevAlertState = models.AlertStateOK - ctx.Error = dummieError - ctx.Rule.ExecutionErrorState = models.ExecutionErrorKeepState - - ctx.Rule.State = ctx.GetNewState() - So(ctx.Rule.State, ShouldEqual, models.AlertStateOK) - }) - - Convey("pending -> error(keep_last)", func() { - ctx.PrevAlertState = models.AlertStatePending - ctx.Error = dummieError - ctx.Rule.ExecutionErrorState = models.ExecutionErrorKeepState - - ctx.Rule.State = ctx.GetNewState() - So(ctx.Rule.State, ShouldEqual, models.AlertStatePending) - }) - - Convey("ok -> no_data(alerting)", func() { - ctx.PrevAlertState = models.AlertStateOK - ctx.Rule.NoDataState = models.NoDataSetAlerting - ctx.NoDataFound = true - - ctx.Rule.State = ctx.GetNewState() - So(ctx.Rule.State, ShouldEqual, models.AlertStateAlerting) - }) - - Convey("ok -> no_data(keep_last)", func() { - ctx.PrevAlertState = models.AlertStateOK - ctx.Rule.NoDataState = models.NoDataKeepState - ctx.NoDataFound = true - - ctx.Rule.State = ctx.GetNewState() - So(ctx.Rule.State, ShouldEqual, models.AlertStateOK) - }) - - Convey("pending -> no_data(keep_last)", func() { - ctx.PrevAlertState = models.AlertStatePending - ctx.Rule.NoDataState = models.NoDataKeepState - ctx.NoDataFound = true - - ctx.Rule.State = ctx.GetNewState() - So(ctx.Rule.State, ShouldEqual, models.AlertStatePending) - }) - }) + tc.applyFn(ctx) + have := ctx.GetNewState() + if have != tc.expected { + t.Errorf("failed: %s \n expected '%s' have '%s'\n", tc.name, tc.expected, string(have)) + } + } } diff --git a/pkg/services/alerting/result_handler.go b/pkg/services/alerting/result_handler.go index 420ffeb9a55..ce12a8a6b96 100644 --- a/pkg/services/alerting/result_handler.go +++ b/pkg/services/alerting/result_handler.go @@ -73,6 +73,9 @@ func (handler *DefaultResultHandler) Handle(evalContext *EvalContext) error { // when two servers are raising. This makes sure that the server // with the last state change always sends a notification. evalContext.Rule.StateChanges = cmd.Result.StateChanges + + // Update the last state change of the alert rule in memory + evalContext.Rule.LastStateChange = time.Now() } // save annotation diff --git a/pkg/services/alerting/rule.go b/pkg/services/alerting/rule.go index 999611f15c4..3fb69b48f9f 100644 --- a/pkg/services/alerting/rule.go +++ b/pkg/services/alerting/rule.go @@ -4,6 +4,7 @@ import ( "fmt" "regexp" "strconv" + "time" "github.com/grafana/grafana/pkg/components/simplejson" @@ -18,6 +19,8 @@ type Rule struct { Frequency int64 Name string Message string + LastStateChange time.Time + DebounceDuration time.Duration NoDataState m.NoDataOption ExecutionErrorState m.ExecutionErrorOption State m.AlertStateType @@ -100,6 +103,8 @@ func NewRuleFromDBAlert(ruleDef *m.Alert) (*Rule, error) { model.Message = ruleDef.Message model.Frequency = ruleDef.Frequency model.State = ruleDef.State + model.LastStateChange = ruleDef.NewStateDate + model.DebounceDuration = time.Minute * 2 // hard coded for now model.NoDataState = m.NoDataOption(ruleDef.Settings.Get("noDataState").MustString("no_data")) model.ExecutionErrorState = m.ExecutionErrorOption(ruleDef.Settings.Get("executionErrorState").MustString("alerting")) model.StateChanges = ruleDef.StateChanges