diff --git a/pkg/services/ngalert/schedule/compat.go b/pkg/services/ngalert/schedule/compat.go index a713cd3d434..850e4551a21 100644 --- a/pkg/services/ngalert/schedule/compat.go +++ b/pkg/services/ngalert/schedule/compat.go @@ -1,19 +1,21 @@ package schedule import ( + "time" + "github.com/go-openapi/strfmt" apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" "github.com/prometheus/alertmanager/api/v2/models" - "github.com/grafana/grafana/pkg/services/ngalert/eval" "github.com/grafana/grafana/pkg/services/ngalert/state" ) -func FromAlertStateToPostableAlerts(firingStates []*state.State) apimodels.PostableAlerts { +func FromAlertStateToPostableAlerts(firingStates []*state.State, stateManager *state.Manager) apimodels.PostableAlerts { alerts := apimodels.PostableAlerts{PostableAlerts: make([]models.PostableAlert, 0, len(firingStates))} - + var sentAlerts []*state.State + ts := time.Now() for _, alertState := range firingStates { - if alertState.State == eval.Alerting { + if alertState.NeedsSending(stateManager.ResendDelay) { nL := alertState.Labels.Copy() if len(alertState.Results) > 0 { nL["__value__"] = alertState.Results[0].EvaluationString @@ -26,7 +28,10 @@ func FromAlertStateToPostableAlerts(firingStates []*state.State) apimodels.Posta Labels: models.LabelSet(nL), }, }) + alertState.LastSentAt = ts + sentAlerts = append(sentAlerts, alertState) } } + stateManager.Put(sentAlerts) return alerts } diff --git a/pkg/services/ngalert/schedule/schedule.go b/pkg/services/ngalert/schedule/schedule.go index 40862f2fc81..1a07e51a7cc 100644 --- a/pkg/services/ngalert/schedule/schedule.go +++ b/pkg/services/ngalert/schedule/schedule.go @@ -88,7 +88,7 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key models.AlertRul processedStates := stateManager.ProcessEvalResults(alertRule, results) sch.saveAlertStates(processedStates) - alerts := FromAlertStateToPostableAlerts(processedStates) + alerts := FromAlertStateToPostableAlerts(processedStates, stateManager) sch.log.Debug("sending alerts to notifier", "count", len(alerts.PostableAlerts), "alerts", alerts.PostableAlerts) err = sch.sendAlerts(alerts) if err != nil { diff --git a/pkg/services/ngalert/state/manager.go b/pkg/services/ngalert/state/manager.go index 3c7259143fa..c50d86f827d 100644 --- a/pkg/services/ngalert/state/manager.go +++ b/pkg/services/ngalert/state/manager.go @@ -11,18 +11,20 @@ import ( ) type Manager struct { - cache *cache - quit chan struct{} - Log log.Logger - metrics *metrics.Metrics + cache *cache + quit chan struct{} + ResendDelay time.Duration + Log log.Logger + metrics *metrics.Metrics } func NewManager(logger log.Logger, metrics *metrics.Metrics) *Manager { manager := &Manager{ - cache: newCache(logger, metrics), - quit: make(chan struct{}), - Log: logger, - metrics: metrics, + cache: newCache(logger, metrics), + quit: make(chan struct{}), + ResendDelay: 1 * time.Minute, // TODO: make this configurable + Log: logger, + metrics: metrics, } go manager.recordMetrics() return manager @@ -65,7 +67,6 @@ func (st *Manager) ProcessEvalResults(alertRule *ngModels.AlertRule, results eva return states } -//TODO: When calculating if an alert should not be firing anymore, we should take into account the re-send delay if any. We don't want to send every firing alert every time, we should have a fixed delay across all alerts to avoid saturating the notification system //Set the current state based on evaluation results func (st *Manager) setNextState(alertRule *ngModels.AlertRule, result eval.Result) *State { currentState := st.getOrCreate(alertRule, result) diff --git a/pkg/services/ngalert/state/state.go b/pkg/services/ngalert/state/state.go index dc11b6a4a7e..f4ded8ba574 100644 --- a/pkg/services/ngalert/state/state.go +++ b/pkg/services/ngalert/state/state.go @@ -12,14 +12,15 @@ type State struct { AlertRuleUID string OrgID int64 CacheId string - Labels data.Labels State eval.State Results []Evaluation StartsAt time.Time EndsAt time.Time LastEvaluationTime time.Time EvaluationDuration time.Duration + LastSentAt time.Time Annotations map[string]string + Labels data.Labels Error error } @@ -112,6 +113,16 @@ func (a *State) resultNoData(alertRule *ngModels.AlertRule, result eval.Result) return a } +func (a *State) NeedsSending(resendDelay time.Duration) bool { + if a.State != eval.Alerting { + return false + } + + // if LastSentAt is before or equal to LastEvaluationTime + resendDelay, send again + return a.LastSentAt.Add(resendDelay).Before(a.LastEvaluationTime) || + a.LastSentAt.Add(resendDelay).Equal(a.LastEvaluationTime) +} + func (a *State) Equals(b *State) bool { return a.AlertRuleUID == b.AlertRuleUID && a.OrgID == b.OrgID && diff --git a/pkg/services/ngalert/state/state_test.go b/pkg/services/ngalert/state/state_test.go new file mode 100644 index 00000000000..46dcf517290 --- /dev/null +++ b/pkg/services/ngalert/state/state_test.go @@ -0,0 +1,75 @@ +package state + +import ( + "testing" + "time" + + "github.com/grafana/grafana/pkg/services/ngalert/eval" + + "github.com/stretchr/testify/assert" +) + +func TestNeedsSending(t *testing.T) { + evaluationTime, _ := time.Parse("2006-01-02", "2021-03-25") + testCases := []struct { + name string + resendDelay time.Duration + expected bool + testState *State + }{ + { + name: "state: alerting and LastSentAt before LastEvaluationTime + ResendDelay", + resendDelay: 1 * time.Minute, + expected: true, + testState: &State{ + State: eval.Alerting, + LastEvaluationTime: evaluationTime, + LastSentAt: evaluationTime.Add(-2 * time.Minute), + }, + }, + { + name: "state: alerting and LastSentAt after LastEvaluationTime + ResendDelay", + resendDelay: 1 * time.Minute, + expected: false, + testState: &State{ + State: eval.Alerting, + LastEvaluationTime: evaluationTime, + LastSentAt: evaluationTime, + }, + }, + { + name: "state: alerting and LastSentAt equals LastEvaluationTime + ResendDelay", + resendDelay: 1 * time.Minute, + expected: true, + testState: &State{ + State: eval.Alerting, + LastEvaluationTime: evaluationTime, + LastSentAt: evaluationTime.Add(-1 * time.Minute), + }, + }, + { + name: "state: pending", + resendDelay: 1 * time.Minute, + expected: false, + testState: &State{ + State: eval.Pending, + }, + }, + { + name: "state: alerting and ResendDelay is zero", + resendDelay: 0 * time.Minute, + expected: true, + testState: &State{ + State: eval.Alerting, + LastEvaluationTime: evaluationTime, + LastSentAt: evaluationTime, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expected, tc.testState.NeedsSending(tc.resendDelay)) + }) + } +}