mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting resend delay for sending to notifiers (#34312)
* adds resend delay to avoid saturating notifier * correct method signatures * pr feedback
This commit is contained in:
parent
ffa222b4d3
commit
7a83d1f9ff
@ -1,19 +1,21 @@
|
||||
package schedule
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/go-openapi/strfmt"
|
||||
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
|
||||
"github.com/prometheus/alertmanager/api/v2/models"
|
||||
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/state"
|
||||
)
|
||||
|
||||
func FromAlertStateToPostableAlerts(firingStates []*state.State) apimodels.PostableAlerts {
|
||||
func FromAlertStateToPostableAlerts(firingStates []*state.State, stateManager *state.Manager) apimodels.PostableAlerts {
|
||||
alerts := apimodels.PostableAlerts{PostableAlerts: make([]models.PostableAlert, 0, len(firingStates))}
|
||||
|
||||
var sentAlerts []*state.State
|
||||
ts := time.Now()
|
||||
for _, alertState := range firingStates {
|
||||
if alertState.State == eval.Alerting {
|
||||
if alertState.NeedsSending(stateManager.ResendDelay) {
|
||||
nL := alertState.Labels.Copy()
|
||||
if len(alertState.Results) > 0 {
|
||||
nL["__value__"] = alertState.Results[0].EvaluationString
|
||||
@ -26,7 +28,10 @@ func FromAlertStateToPostableAlerts(firingStates []*state.State) apimodels.Posta
|
||||
Labels: models.LabelSet(nL),
|
||||
},
|
||||
})
|
||||
alertState.LastSentAt = ts
|
||||
sentAlerts = append(sentAlerts, alertState)
|
||||
}
|
||||
}
|
||||
stateManager.Put(sentAlerts)
|
||||
return alerts
|
||||
}
|
||||
|
@ -88,7 +88,7 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key models.AlertRul
|
||||
|
||||
processedStates := stateManager.ProcessEvalResults(alertRule, results)
|
||||
sch.saveAlertStates(processedStates)
|
||||
alerts := FromAlertStateToPostableAlerts(processedStates)
|
||||
alerts := FromAlertStateToPostableAlerts(processedStates, stateManager)
|
||||
sch.log.Debug("sending alerts to notifier", "count", len(alerts.PostableAlerts), "alerts", alerts.PostableAlerts)
|
||||
err = sch.sendAlerts(alerts)
|
||||
if err != nil {
|
||||
|
@ -11,18 +11,20 @@ import (
|
||||
)
|
||||
|
||||
type Manager struct {
|
||||
cache *cache
|
||||
quit chan struct{}
|
||||
Log log.Logger
|
||||
metrics *metrics.Metrics
|
||||
cache *cache
|
||||
quit chan struct{}
|
||||
ResendDelay time.Duration
|
||||
Log log.Logger
|
||||
metrics *metrics.Metrics
|
||||
}
|
||||
|
||||
func NewManager(logger log.Logger, metrics *metrics.Metrics) *Manager {
|
||||
manager := &Manager{
|
||||
cache: newCache(logger, metrics),
|
||||
quit: make(chan struct{}),
|
||||
Log: logger,
|
||||
metrics: metrics,
|
||||
cache: newCache(logger, metrics),
|
||||
quit: make(chan struct{}),
|
||||
ResendDelay: 1 * time.Minute, // TODO: make this configurable
|
||||
Log: logger,
|
||||
metrics: metrics,
|
||||
}
|
||||
go manager.recordMetrics()
|
||||
return manager
|
||||
@ -65,7 +67,6 @@ func (st *Manager) ProcessEvalResults(alertRule *ngModels.AlertRule, results eva
|
||||
return states
|
||||
}
|
||||
|
||||
//TODO: When calculating if an alert should not be firing anymore, we should take into account the re-send delay if any. We don't want to send every firing alert every time, we should have a fixed delay across all alerts to avoid saturating the notification system
|
||||
//Set the current state based on evaluation results
|
||||
func (st *Manager) setNextState(alertRule *ngModels.AlertRule, result eval.Result) *State {
|
||||
currentState := st.getOrCreate(alertRule, result)
|
||||
|
@ -12,14 +12,15 @@ type State struct {
|
||||
AlertRuleUID string
|
||||
OrgID int64
|
||||
CacheId string
|
||||
Labels data.Labels
|
||||
State eval.State
|
||||
Results []Evaluation
|
||||
StartsAt time.Time
|
||||
EndsAt time.Time
|
||||
LastEvaluationTime time.Time
|
||||
EvaluationDuration time.Duration
|
||||
LastSentAt time.Time
|
||||
Annotations map[string]string
|
||||
Labels data.Labels
|
||||
Error error
|
||||
}
|
||||
|
||||
@ -112,6 +113,16 @@ func (a *State) resultNoData(alertRule *ngModels.AlertRule, result eval.Result)
|
||||
return a
|
||||
}
|
||||
|
||||
func (a *State) NeedsSending(resendDelay time.Duration) bool {
|
||||
if a.State != eval.Alerting {
|
||||
return false
|
||||
}
|
||||
|
||||
// if LastSentAt is before or equal to LastEvaluationTime + resendDelay, send again
|
||||
return a.LastSentAt.Add(resendDelay).Before(a.LastEvaluationTime) ||
|
||||
a.LastSentAt.Add(resendDelay).Equal(a.LastEvaluationTime)
|
||||
}
|
||||
|
||||
func (a *State) Equals(b *State) bool {
|
||||
return a.AlertRuleUID == b.AlertRuleUID &&
|
||||
a.OrgID == b.OrgID &&
|
||||
|
75
pkg/services/ngalert/state/state_test.go
Normal file
75
pkg/services/ngalert/state/state_test.go
Normal file
@ -0,0 +1,75 @@
|
||||
package state
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestNeedsSending(t *testing.T) {
|
||||
evaluationTime, _ := time.Parse("2006-01-02", "2021-03-25")
|
||||
testCases := []struct {
|
||||
name string
|
||||
resendDelay time.Duration
|
||||
expected bool
|
||||
testState *State
|
||||
}{
|
||||
{
|
||||
name: "state: alerting and LastSentAt before LastEvaluationTime + ResendDelay",
|
||||
resendDelay: 1 * time.Minute,
|
||||
expected: true,
|
||||
testState: &State{
|
||||
State: eval.Alerting,
|
||||
LastEvaluationTime: evaluationTime,
|
||||
LastSentAt: evaluationTime.Add(-2 * time.Minute),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "state: alerting and LastSentAt after LastEvaluationTime + ResendDelay",
|
||||
resendDelay: 1 * time.Minute,
|
||||
expected: false,
|
||||
testState: &State{
|
||||
State: eval.Alerting,
|
||||
LastEvaluationTime: evaluationTime,
|
||||
LastSentAt: evaluationTime,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "state: alerting and LastSentAt equals LastEvaluationTime + ResendDelay",
|
||||
resendDelay: 1 * time.Minute,
|
||||
expected: true,
|
||||
testState: &State{
|
||||
State: eval.Alerting,
|
||||
LastEvaluationTime: evaluationTime,
|
||||
LastSentAt: evaluationTime.Add(-1 * time.Minute),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "state: pending",
|
||||
resendDelay: 1 * time.Minute,
|
||||
expected: false,
|
||||
testState: &State{
|
||||
State: eval.Pending,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "state: alerting and ResendDelay is zero",
|
||||
resendDelay: 0 * time.Minute,
|
||||
expected: true,
|
||||
testState: &State{
|
||||
State: eval.Alerting,
|
||||
LastEvaluationTime: evaluationTime,
|
||||
LastSentAt: evaluationTime,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
assert.Equal(t, tc.expected, tc.testState.NeedsSending(tc.resendDelay))
|
||||
})
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user