Alerting resend delay for sending to notifiers (#34312)

* adds resend delay to avoid saturating notifier

* correct method signatures

* pr feedback
This commit is contained in:
David Parrott 2021-05-19 13:15:09 -07:00 committed by GitHub
parent ffa222b4d3
commit 7a83d1f9ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 107 additions and 15 deletions

View File

@ -1,19 +1,21 @@
package schedule
import (
"time"
"github.com/go-openapi/strfmt"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/prometheus/alertmanager/api/v2/models"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/state"
)
func FromAlertStateToPostableAlerts(firingStates []*state.State) apimodels.PostableAlerts {
func FromAlertStateToPostableAlerts(firingStates []*state.State, stateManager *state.Manager) apimodels.PostableAlerts {
alerts := apimodels.PostableAlerts{PostableAlerts: make([]models.PostableAlert, 0, len(firingStates))}
var sentAlerts []*state.State
ts := time.Now()
for _, alertState := range firingStates {
if alertState.State == eval.Alerting {
if alertState.NeedsSending(stateManager.ResendDelay) {
nL := alertState.Labels.Copy()
if len(alertState.Results) > 0 {
nL["__value__"] = alertState.Results[0].EvaluationString
@ -26,7 +28,10 @@ func FromAlertStateToPostableAlerts(firingStates []*state.State) apimodels.Posta
Labels: models.LabelSet(nL),
},
})
alertState.LastSentAt = ts
sentAlerts = append(sentAlerts, alertState)
}
}
stateManager.Put(sentAlerts)
return alerts
}

View File

@ -88,7 +88,7 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key models.AlertRul
processedStates := stateManager.ProcessEvalResults(alertRule, results)
sch.saveAlertStates(processedStates)
alerts := FromAlertStateToPostableAlerts(processedStates)
alerts := FromAlertStateToPostableAlerts(processedStates, stateManager)
sch.log.Debug("sending alerts to notifier", "count", len(alerts.PostableAlerts), "alerts", alerts.PostableAlerts)
err = sch.sendAlerts(alerts)
if err != nil {

View File

@ -11,18 +11,20 @@ import (
)
type Manager struct {
cache *cache
quit chan struct{}
Log log.Logger
metrics *metrics.Metrics
cache *cache
quit chan struct{}
ResendDelay time.Duration
Log log.Logger
metrics *metrics.Metrics
}
func NewManager(logger log.Logger, metrics *metrics.Metrics) *Manager {
manager := &Manager{
cache: newCache(logger, metrics),
quit: make(chan struct{}),
Log: logger,
metrics: metrics,
cache: newCache(logger, metrics),
quit: make(chan struct{}),
ResendDelay: 1 * time.Minute, // TODO: make this configurable
Log: logger,
metrics: metrics,
}
go manager.recordMetrics()
return manager
@ -65,7 +67,6 @@ func (st *Manager) ProcessEvalResults(alertRule *ngModels.AlertRule, results eva
return states
}
//TODO: When calculating if an alert should not be firing anymore, we should take into account the re-send delay if any. We don't want to send every firing alert every time, we should have a fixed delay across all alerts to avoid saturating the notification system
//Set the current state based on evaluation results
func (st *Manager) setNextState(alertRule *ngModels.AlertRule, result eval.Result) *State {
currentState := st.getOrCreate(alertRule, result)

View File

@ -12,14 +12,15 @@ type State struct {
AlertRuleUID string
OrgID int64
CacheId string
Labels data.Labels
State eval.State
Results []Evaluation
StartsAt time.Time
EndsAt time.Time
LastEvaluationTime time.Time
EvaluationDuration time.Duration
LastSentAt time.Time
Annotations map[string]string
Labels data.Labels
Error error
}
@ -112,6 +113,16 @@ func (a *State) resultNoData(alertRule *ngModels.AlertRule, result eval.Result)
return a
}
func (a *State) NeedsSending(resendDelay time.Duration) bool {
if a.State != eval.Alerting {
return false
}
// if LastSentAt is before or equal to LastEvaluationTime + resendDelay, send again
return a.LastSentAt.Add(resendDelay).Before(a.LastEvaluationTime) ||
a.LastSentAt.Add(resendDelay).Equal(a.LastEvaluationTime)
}
func (a *State) Equals(b *State) bool {
return a.AlertRuleUID == b.AlertRuleUID &&
a.OrgID == b.OrgID &&

View File

@ -0,0 +1,75 @@
package state
import (
"testing"
"time"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/stretchr/testify/assert"
)
func TestNeedsSending(t *testing.T) {
evaluationTime, _ := time.Parse("2006-01-02", "2021-03-25")
testCases := []struct {
name string
resendDelay time.Duration
expected bool
testState *State
}{
{
name: "state: alerting and LastSentAt before LastEvaluationTime + ResendDelay",
resendDelay: 1 * time.Minute,
expected: true,
testState: &State{
State: eval.Alerting,
LastEvaluationTime: evaluationTime,
LastSentAt: evaluationTime.Add(-2 * time.Minute),
},
},
{
name: "state: alerting and LastSentAt after LastEvaluationTime + ResendDelay",
resendDelay: 1 * time.Minute,
expected: false,
testState: &State{
State: eval.Alerting,
LastEvaluationTime: evaluationTime,
LastSentAt: evaluationTime,
},
},
{
name: "state: alerting and LastSentAt equals LastEvaluationTime + ResendDelay",
resendDelay: 1 * time.Minute,
expected: true,
testState: &State{
State: eval.Alerting,
LastEvaluationTime: evaluationTime,
LastSentAt: evaluationTime.Add(-1 * time.Minute),
},
},
{
name: "state: pending",
resendDelay: 1 * time.Minute,
expected: false,
testState: &State{
State: eval.Pending,
},
},
{
name: "state: alerting and ResendDelay is zero",
resendDelay: 0 * time.Minute,
expected: true,
testState: &State{
State: eval.Alerting,
LastEvaluationTime: evaluationTime,
LastSentAt: evaluationTime,
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
assert.Equal(t, tc.expected, tc.testState.NeedsSending(tc.resendDelay))
})
}
}