mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting resend delay for sending to notifiers (#34312)
* adds resend delay to avoid saturating notifier * correct method signatures * pr feedback
This commit is contained in:
parent
ffa222b4d3
commit
7a83d1f9ff
@ -1,19 +1,21 @@
|
|||||||
package schedule
|
package schedule
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/go-openapi/strfmt"
|
"github.com/go-openapi/strfmt"
|
||||||
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
|
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
|
||||||
"github.com/prometheus/alertmanager/api/v2/models"
|
"github.com/prometheus/alertmanager/api/v2/models"
|
||||||
|
|
||||||
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
|
||||||
"github.com/grafana/grafana/pkg/services/ngalert/state"
|
"github.com/grafana/grafana/pkg/services/ngalert/state"
|
||||||
)
|
)
|
||||||
|
|
||||||
func FromAlertStateToPostableAlerts(firingStates []*state.State) apimodels.PostableAlerts {
|
func FromAlertStateToPostableAlerts(firingStates []*state.State, stateManager *state.Manager) apimodels.PostableAlerts {
|
||||||
alerts := apimodels.PostableAlerts{PostableAlerts: make([]models.PostableAlert, 0, len(firingStates))}
|
alerts := apimodels.PostableAlerts{PostableAlerts: make([]models.PostableAlert, 0, len(firingStates))}
|
||||||
|
var sentAlerts []*state.State
|
||||||
|
ts := time.Now()
|
||||||
for _, alertState := range firingStates {
|
for _, alertState := range firingStates {
|
||||||
if alertState.State == eval.Alerting {
|
if alertState.NeedsSending(stateManager.ResendDelay) {
|
||||||
nL := alertState.Labels.Copy()
|
nL := alertState.Labels.Copy()
|
||||||
if len(alertState.Results) > 0 {
|
if len(alertState.Results) > 0 {
|
||||||
nL["__value__"] = alertState.Results[0].EvaluationString
|
nL["__value__"] = alertState.Results[0].EvaluationString
|
||||||
@ -26,7 +28,10 @@ func FromAlertStateToPostableAlerts(firingStates []*state.State) apimodels.Posta
|
|||||||
Labels: models.LabelSet(nL),
|
Labels: models.LabelSet(nL),
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
alertState.LastSentAt = ts
|
||||||
|
sentAlerts = append(sentAlerts, alertState)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
stateManager.Put(sentAlerts)
|
||||||
return alerts
|
return alerts
|
||||||
}
|
}
|
||||||
|
@ -88,7 +88,7 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key models.AlertRul
|
|||||||
|
|
||||||
processedStates := stateManager.ProcessEvalResults(alertRule, results)
|
processedStates := stateManager.ProcessEvalResults(alertRule, results)
|
||||||
sch.saveAlertStates(processedStates)
|
sch.saveAlertStates(processedStates)
|
||||||
alerts := FromAlertStateToPostableAlerts(processedStates)
|
alerts := FromAlertStateToPostableAlerts(processedStates, stateManager)
|
||||||
sch.log.Debug("sending alerts to notifier", "count", len(alerts.PostableAlerts), "alerts", alerts.PostableAlerts)
|
sch.log.Debug("sending alerts to notifier", "count", len(alerts.PostableAlerts), "alerts", alerts.PostableAlerts)
|
||||||
err = sch.sendAlerts(alerts)
|
err = sch.sendAlerts(alerts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -11,18 +11,20 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type Manager struct {
|
type Manager struct {
|
||||||
cache *cache
|
cache *cache
|
||||||
quit chan struct{}
|
quit chan struct{}
|
||||||
Log log.Logger
|
ResendDelay time.Duration
|
||||||
metrics *metrics.Metrics
|
Log log.Logger
|
||||||
|
metrics *metrics.Metrics
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewManager(logger log.Logger, metrics *metrics.Metrics) *Manager {
|
func NewManager(logger log.Logger, metrics *metrics.Metrics) *Manager {
|
||||||
manager := &Manager{
|
manager := &Manager{
|
||||||
cache: newCache(logger, metrics),
|
cache: newCache(logger, metrics),
|
||||||
quit: make(chan struct{}),
|
quit: make(chan struct{}),
|
||||||
Log: logger,
|
ResendDelay: 1 * time.Minute, // TODO: make this configurable
|
||||||
metrics: metrics,
|
Log: logger,
|
||||||
|
metrics: metrics,
|
||||||
}
|
}
|
||||||
go manager.recordMetrics()
|
go manager.recordMetrics()
|
||||||
return manager
|
return manager
|
||||||
@ -65,7 +67,6 @@ func (st *Manager) ProcessEvalResults(alertRule *ngModels.AlertRule, results eva
|
|||||||
return states
|
return states
|
||||||
}
|
}
|
||||||
|
|
||||||
//TODO: When calculating if an alert should not be firing anymore, we should take into account the re-send delay if any. We don't want to send every firing alert every time, we should have a fixed delay across all alerts to avoid saturating the notification system
|
|
||||||
//Set the current state based on evaluation results
|
//Set the current state based on evaluation results
|
||||||
func (st *Manager) setNextState(alertRule *ngModels.AlertRule, result eval.Result) *State {
|
func (st *Manager) setNextState(alertRule *ngModels.AlertRule, result eval.Result) *State {
|
||||||
currentState := st.getOrCreate(alertRule, result)
|
currentState := st.getOrCreate(alertRule, result)
|
||||||
|
@ -12,14 +12,15 @@ type State struct {
|
|||||||
AlertRuleUID string
|
AlertRuleUID string
|
||||||
OrgID int64
|
OrgID int64
|
||||||
CacheId string
|
CacheId string
|
||||||
Labels data.Labels
|
|
||||||
State eval.State
|
State eval.State
|
||||||
Results []Evaluation
|
Results []Evaluation
|
||||||
StartsAt time.Time
|
StartsAt time.Time
|
||||||
EndsAt time.Time
|
EndsAt time.Time
|
||||||
LastEvaluationTime time.Time
|
LastEvaluationTime time.Time
|
||||||
EvaluationDuration time.Duration
|
EvaluationDuration time.Duration
|
||||||
|
LastSentAt time.Time
|
||||||
Annotations map[string]string
|
Annotations map[string]string
|
||||||
|
Labels data.Labels
|
||||||
Error error
|
Error error
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -112,6 +113,16 @@ func (a *State) resultNoData(alertRule *ngModels.AlertRule, result eval.Result)
|
|||||||
return a
|
return a
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (a *State) NeedsSending(resendDelay time.Duration) bool {
|
||||||
|
if a.State != eval.Alerting {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// if LastSentAt is before or equal to LastEvaluationTime + resendDelay, send again
|
||||||
|
return a.LastSentAt.Add(resendDelay).Before(a.LastEvaluationTime) ||
|
||||||
|
a.LastSentAt.Add(resendDelay).Equal(a.LastEvaluationTime)
|
||||||
|
}
|
||||||
|
|
||||||
func (a *State) Equals(b *State) bool {
|
func (a *State) Equals(b *State) bool {
|
||||||
return a.AlertRuleUID == b.AlertRuleUID &&
|
return a.AlertRuleUID == b.AlertRuleUID &&
|
||||||
a.OrgID == b.OrgID &&
|
a.OrgID == b.OrgID &&
|
||||||
|
75
pkg/services/ngalert/state/state_test.go
Normal file
75
pkg/services/ngalert/state/state_test.go
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
package state
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNeedsSending(t *testing.T) {
|
||||||
|
evaluationTime, _ := time.Parse("2006-01-02", "2021-03-25")
|
||||||
|
testCases := []struct {
|
||||||
|
name string
|
||||||
|
resendDelay time.Duration
|
||||||
|
expected bool
|
||||||
|
testState *State
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "state: alerting and LastSentAt before LastEvaluationTime + ResendDelay",
|
||||||
|
resendDelay: 1 * time.Minute,
|
||||||
|
expected: true,
|
||||||
|
testState: &State{
|
||||||
|
State: eval.Alerting,
|
||||||
|
LastEvaluationTime: evaluationTime,
|
||||||
|
LastSentAt: evaluationTime.Add(-2 * time.Minute),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "state: alerting and LastSentAt after LastEvaluationTime + ResendDelay",
|
||||||
|
resendDelay: 1 * time.Minute,
|
||||||
|
expected: false,
|
||||||
|
testState: &State{
|
||||||
|
State: eval.Alerting,
|
||||||
|
LastEvaluationTime: evaluationTime,
|
||||||
|
LastSentAt: evaluationTime,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "state: alerting and LastSentAt equals LastEvaluationTime + ResendDelay",
|
||||||
|
resendDelay: 1 * time.Minute,
|
||||||
|
expected: true,
|
||||||
|
testState: &State{
|
||||||
|
State: eval.Alerting,
|
||||||
|
LastEvaluationTime: evaluationTime,
|
||||||
|
LastSentAt: evaluationTime.Add(-1 * time.Minute),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "state: pending",
|
||||||
|
resendDelay: 1 * time.Minute,
|
||||||
|
expected: false,
|
||||||
|
testState: &State{
|
||||||
|
State: eval.Pending,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "state: alerting and ResendDelay is zero",
|
||||||
|
resendDelay: 0 * time.Minute,
|
||||||
|
expected: true,
|
||||||
|
testState: &State{
|
||||||
|
State: eval.Alerting,
|
||||||
|
LastEvaluationTime: evaluationTime,
|
||||||
|
LastSentAt: evaluationTime,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
assert.Equal(t, tc.expected, tc.testState.NeedsSending(tc.resendDelay))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user