mirror of
https://github.com/grafana/grafana.git
synced 2024-11-25 10:20:29 -06:00
Alerting: Refactor ruleRoutine to take an entire ruleInfo instance (#83858)
* Make stop a real method * ruleRoutine takes a ruleInfo reference directly rather than pieces of it * Fix whitespace
This commit is contained in:
parent
3121fce305
commit
f2a9d0a89d
@ -10,12 +10,17 @@ type alertRuleInfo struct {
|
||||
evalCh chan *evaluation
|
||||
updateCh chan ruleVersionAndPauseStatus
|
||||
ctx context.Context
|
||||
stop func(reason error)
|
||||
stopFn util.CancelCauseFunc
|
||||
}
|
||||
|
||||
func newAlertRuleInfo(parent context.Context) *alertRuleInfo {
|
||||
ctx, stop := util.WithCancelCause(parent)
|
||||
return &alertRuleInfo{evalCh: make(chan *evaluation), updateCh: make(chan ruleVersionAndPauseStatus), ctx: ctx, stop: stop}
|
||||
return &alertRuleInfo{
|
||||
evalCh: make(chan *evaluation),
|
||||
updateCh: make(chan ruleVersionAndPauseStatus),
|
||||
ctx: ctx,
|
||||
stopFn: stop,
|
||||
}
|
||||
}
|
||||
|
||||
// eval signals the rule evaluation routine to perform the evaluation of the rule. Does nothing if the loop is stopped.
|
||||
@ -58,3 +63,8 @@ func (a *alertRuleInfo) update(lastVersion ruleVersionAndPauseStatus) bool {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// stop sends an instruction to the rule evaluation routine to shut down. an optional shutdown reason can be given.
|
||||
func (a *alertRuleInfo) stop(reason error) {
|
||||
a.stopFn(reason)
|
||||
}
|
||||
|
@ -256,7 +256,7 @@ func (sch *schedule) processTick(ctx context.Context, dispatcherGroup *errgroup.
|
||||
|
||||
if newRoutine && !invalidInterval {
|
||||
dispatcherGroup.Go(func() error {
|
||||
return sch.ruleRoutine(ruleInfo.ctx, key, ruleInfo.evalCh, ruleInfo.updateCh)
|
||||
return sch.ruleRoutine(key, ruleInfo)
|
||||
})
|
||||
}
|
||||
|
||||
@ -345,8 +345,8 @@ func (sch *schedule) processTick(ctx context.Context, dispatcherGroup *errgroup.
|
||||
}
|
||||
|
||||
//nolint:gocyclo
|
||||
func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertRuleKey, evalCh <-chan *evaluation, updateCh <-chan ruleVersionAndPauseStatus) error {
|
||||
grafanaCtx = ngmodels.WithRuleKey(grafanaCtx, key)
|
||||
func (sch *schedule) ruleRoutine(key ngmodels.AlertRuleKey, ruleInfo *alertRuleInfo) error {
|
||||
grafanaCtx := ngmodels.WithRuleKey(ruleInfo.ctx, key)
|
||||
logger := sch.log.FromContext(grafanaCtx)
|
||||
logger.Debug("Alert rule routine started")
|
||||
|
||||
@ -474,7 +474,7 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertR
|
||||
for {
|
||||
select {
|
||||
// used by external services (API) to notify that rule is updated.
|
||||
case ctx := <-updateCh:
|
||||
case ctx := <-ruleInfo.updateCh:
|
||||
if currentFingerprint == ctx.Fingerprint {
|
||||
logger.Info("Rule's fingerprint has not changed. Skip resetting the state", "currentFingerprint", currentFingerprint)
|
||||
continue
|
||||
@ -485,7 +485,7 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertR
|
||||
resetState(grafanaCtx, ctx.IsPaused)
|
||||
currentFingerprint = ctx.Fingerprint
|
||||
// evalCh - used by the scheduler to signal that evaluation is needed.
|
||||
case ctx, ok := <-evalCh:
|
||||
case ctx, ok := <-ruleInfo.evalCh:
|
||||
if !ok {
|
||||
logger.Debug("Evaluation channel has been closed. Exiting")
|
||||
return nil
|
||||
|
@ -384,22 +384,22 @@ func TestSchedule_ruleRoutine(t *testing.T) {
|
||||
for _, evalState := range normalStates {
|
||||
// TODO rewrite when we are able to mock/fake state manager
|
||||
t.Run(fmt.Sprintf("when rule evaluation happens (evaluation state %s)", evalState), func(t *testing.T) {
|
||||
evalChan := make(chan *evaluation)
|
||||
evalAppliedChan := make(chan time.Time)
|
||||
sch, ruleStore, instanceStore, reg := createSchedule(evalAppliedChan, nil)
|
||||
|
||||
rule := models.AlertRuleGen(withQueryForState(t, evalState))()
|
||||
ruleStore.PutRule(context.Background(), rule)
|
||||
folderTitle := ruleStore.getNamespaceTitle(rule.NamespaceUID)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
t.Cleanup(cancel)
|
||||
ruleInfo := newAlertRuleInfo(ctx)
|
||||
go func() {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
t.Cleanup(cancel)
|
||||
_ = sch.ruleRoutine(ctx, rule.GetKey(), evalChan, make(chan ruleVersionAndPauseStatus))
|
||||
_ = sch.ruleRoutine(rule.GetKey(), ruleInfo)
|
||||
}()
|
||||
|
||||
expectedTime := time.UnixMicro(rand.Int63())
|
||||
|
||||
evalChan <- &evaluation{
|
||||
ruleInfo.evalCh <- &evaluation{
|
||||
scheduledAt: expectedTime,
|
||||
rule: rule,
|
||||
folderTitle: folderTitle,
|
||||
@ -540,8 +540,9 @@ func TestSchedule_ruleRoutine(t *testing.T) {
|
||||
require.NotEmpty(t, expectedStates)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
ruleInfo := newAlertRuleInfo(ctx)
|
||||
go func() {
|
||||
err := sch.ruleRoutine(ctx, models.AlertRuleKey{}, make(chan *evaluation), make(chan ruleVersionAndPauseStatus))
|
||||
err := sch.ruleRoutine(models.AlertRuleKey{}, ruleInfo)
|
||||
stoppedChan <- err
|
||||
}()
|
||||
|
||||
@ -550,7 +551,7 @@ func TestSchedule_ruleRoutine(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, len(expectedStates), len(sch.stateManager.GetStatesForRuleUID(rule.OrgID, rule.UID)))
|
||||
})
|
||||
t.Run("and clean up the state if delete is cancellation reason ", func(t *testing.T) {
|
||||
t.Run("and clean up the state if delete is cancellation reason for inner context", func(t *testing.T) {
|
||||
stoppedChan := make(chan error)
|
||||
sch, _, _, _ := createSchedule(make(chan time.Time), nil)
|
||||
|
||||
@ -558,13 +559,13 @@ func TestSchedule_ruleRoutine(t *testing.T) {
|
||||
_ = sch.stateManager.ProcessEvalResults(context.Background(), sch.clock.Now(), rule, eval.GenerateResults(rand.Intn(5)+1, eval.ResultGen(eval.WithEvaluatedAt(sch.clock.Now()))), nil)
|
||||
require.NotEmpty(t, sch.stateManager.GetStatesForRuleUID(rule.OrgID, rule.UID))
|
||||
|
||||
ctx, cancel := util.WithCancelCause(context.Background())
|
||||
ruleInfo := newAlertRuleInfo(context.Background())
|
||||
go func() {
|
||||
err := sch.ruleRoutine(ctx, rule.GetKey(), make(chan *evaluation), make(chan ruleVersionAndPauseStatus))
|
||||
err := sch.ruleRoutine(rule.GetKey(), ruleInfo)
|
||||
stoppedChan <- err
|
||||
}()
|
||||
|
||||
cancel(errRuleDeleted)
|
||||
ruleInfo.stop(errRuleDeleted)
|
||||
err := waitForErrChannel(t, stoppedChan)
|
||||
require.NoError(t, err)
|
||||
|
||||
@ -577,9 +578,7 @@ func TestSchedule_ruleRoutine(t *testing.T) {
|
||||
folderTitle := "folderName"
|
||||
ruleFp := ruleWithFolder{rule, folderTitle}.Fingerprint()
|
||||
|
||||
evalChan := make(chan *evaluation)
|
||||
evalAppliedChan := make(chan time.Time)
|
||||
updateChan := make(chan ruleVersionAndPauseStatus)
|
||||
|
||||
sender := NewSyncAlertsSenderMock()
|
||||
sender.EXPECT().Send(mock.Anything, rule.GetKey(), mock.Anything).Return()
|
||||
@ -587,15 +586,16 @@ func TestSchedule_ruleRoutine(t *testing.T) {
|
||||
sch, ruleStore, _, _ := createSchedule(evalAppliedChan, sender)
|
||||
ruleStore.PutRule(context.Background(), rule)
|
||||
sch.schedulableAlertRules.set([]*models.AlertRule{rule}, map[models.FolderKey]string{rule.GetFolderKey(): folderTitle})
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
t.Cleanup(cancel)
|
||||
ruleInfo := newAlertRuleInfo(ctx)
|
||||
|
||||
go func() {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
t.Cleanup(cancel)
|
||||
_ = sch.ruleRoutine(ctx, rule.GetKey(), evalChan, updateChan)
|
||||
_ = sch.ruleRoutine(rule.GetKey(), ruleInfo)
|
||||
}()
|
||||
|
||||
// init evaluation loop so it got the rule version
|
||||
evalChan <- &evaluation{
|
||||
ruleInfo.evalCh <- &evaluation{
|
||||
scheduledAt: sch.clock.Now(),
|
||||
rule: rule,
|
||||
folderTitle: folderTitle,
|
||||
@ -631,8 +631,8 @@ func TestSchedule_ruleRoutine(t *testing.T) {
|
||||
require.Greaterf(t, expectedToBeSent, 0, "State manager was expected to return at least one state that can be expired")
|
||||
|
||||
t.Run("should do nothing if version in channel is the same", func(t *testing.T) {
|
||||
updateChan <- ruleVersionAndPauseStatus{ruleFp, false}
|
||||
updateChan <- ruleVersionAndPauseStatus{ruleFp, false} // second time just to make sure that previous messages were handled
|
||||
ruleInfo.updateCh <- ruleVersionAndPauseStatus{ruleFp, false}
|
||||
ruleInfo.updateCh <- ruleVersionAndPauseStatus{ruleFp, false} // second time just to make sure that previous messages were handled
|
||||
|
||||
actualStates := sch.stateManager.GetStatesForRuleUID(rule.OrgID, rule.UID)
|
||||
require.Len(t, actualStates, len(states))
|
||||
@ -641,7 +641,7 @@ func TestSchedule_ruleRoutine(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("should clear the state and expire firing alerts if version in channel is greater", func(t *testing.T) {
|
||||
updateChan <- ruleVersionAndPauseStatus{ruleFp + 1, false}
|
||||
ruleInfo.updateCh <- ruleVersionAndPauseStatus{ruleFp + 1, false}
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
return len(sender.Calls()) > 0
|
||||
@ -659,7 +659,6 @@ func TestSchedule_ruleRoutine(t *testing.T) {
|
||||
rule := models.AlertRuleGen(withQueryForState(t, eval.Error))()
|
||||
rule.ExecErrState = models.ErrorErrState
|
||||
|
||||
evalChan := make(chan *evaluation)
|
||||
evalAppliedChan := make(chan time.Time)
|
||||
|
||||
sender := NewSyncAlertsSenderMock()
|
||||
@ -668,14 +667,15 @@ func TestSchedule_ruleRoutine(t *testing.T) {
|
||||
sch, ruleStore, _, reg := createSchedule(evalAppliedChan, sender)
|
||||
sch.maxAttempts = 3
|
||||
ruleStore.PutRule(context.Background(), rule)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
t.Cleanup(cancel)
|
||||
ruleInfo := newAlertRuleInfo(ctx)
|
||||
|
||||
go func() {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
t.Cleanup(cancel)
|
||||
_ = sch.ruleRoutine(ctx, rule.GetKey(), evalChan, make(chan ruleVersionAndPauseStatus))
|
||||
_ = sch.ruleRoutine(rule.GetKey(), ruleInfo)
|
||||
}()
|
||||
|
||||
evalChan <- &evaluation{
|
||||
ruleInfo.evalCh <- &evaluation{
|
||||
scheduledAt: sch.clock.Now(),
|
||||
rule: rule,
|
||||
}
|
||||
@ -765,7 +765,6 @@ func TestSchedule_ruleRoutine(t *testing.T) {
|
||||
// eval.Alerting makes state manager to create notifications for alertmanagers
|
||||
rule := models.AlertRuleGen(withQueryForState(t, eval.Alerting))()
|
||||
|
||||
evalChan := make(chan *evaluation)
|
||||
evalAppliedChan := make(chan time.Time)
|
||||
|
||||
sender := NewSyncAlertsSenderMock()
|
||||
@ -773,14 +772,15 @@ func TestSchedule_ruleRoutine(t *testing.T) {
|
||||
|
||||
sch, ruleStore, _, _ := createSchedule(evalAppliedChan, sender)
|
||||
ruleStore.PutRule(context.Background(), rule)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
t.Cleanup(cancel)
|
||||
ruleInfo := newAlertRuleInfo(ctx)
|
||||
|
||||
go func() {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
t.Cleanup(cancel)
|
||||
_ = sch.ruleRoutine(ctx, rule.GetKey(), evalChan, make(chan ruleVersionAndPauseStatus))
|
||||
_ = sch.ruleRoutine(rule.GetKey(), ruleInfo)
|
||||
}()
|
||||
|
||||
evalChan <- &evaluation{
|
||||
ruleInfo.evalCh <- &evaluation{
|
||||
scheduledAt: sch.clock.Now(),
|
||||
rule: rule,
|
||||
}
|
||||
@ -798,7 +798,6 @@ func TestSchedule_ruleRoutine(t *testing.T) {
|
||||
t.Run("when there are no alerts to send it should not call notifiers", func(t *testing.T) {
|
||||
rule := models.AlertRuleGen(withQueryForState(t, eval.Normal))()
|
||||
|
||||
evalChan := make(chan *evaluation)
|
||||
evalAppliedChan := make(chan time.Time)
|
||||
|
||||
sender := NewSyncAlertsSenderMock()
|
||||
@ -806,14 +805,15 @@ func TestSchedule_ruleRoutine(t *testing.T) {
|
||||
|
||||
sch, ruleStore, _, _ := createSchedule(evalAppliedChan, sender)
|
||||
ruleStore.PutRule(context.Background(), rule)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
t.Cleanup(cancel)
|
||||
ruleInfo := newAlertRuleInfo(ctx)
|
||||
|
||||
go func() {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
t.Cleanup(cancel)
|
||||
_ = sch.ruleRoutine(ctx, rule.GetKey(), evalChan, make(chan ruleVersionAndPauseStatus))
|
||||
_ = sch.ruleRoutine(rule.GetKey(), ruleInfo)
|
||||
}()
|
||||
|
||||
evalChan <- &evaluation{
|
||||
ruleInfo.evalCh <- &evaluation{
|
||||
scheduledAt: sch.clock.Now(),
|
||||
rule: rule,
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user