Alerting: Refactor ruleRoutine to take an entire ruleInfo instance (#83858)

* Make stop a real method

* ruleRoutine takes a ruleInfo reference directly rather than pieces of it

* Fix whitespace
This commit is contained in:
Alexander Weaver 2024-03-04 15:15:01 -06:00 committed by GitHub
parent 3121fce305
commit f2a9d0a89d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 51 additions and 41 deletions

View File

@ -10,12 +10,17 @@ type alertRuleInfo struct {
evalCh chan *evaluation
updateCh chan ruleVersionAndPauseStatus
ctx context.Context
stop func(reason error)
stopFn util.CancelCauseFunc
}
func newAlertRuleInfo(parent context.Context) *alertRuleInfo {
ctx, stop := util.WithCancelCause(parent)
return &alertRuleInfo{evalCh: make(chan *evaluation), updateCh: make(chan ruleVersionAndPauseStatus), ctx: ctx, stop: stop}
return &alertRuleInfo{
evalCh: make(chan *evaluation),
updateCh: make(chan ruleVersionAndPauseStatus),
ctx: ctx,
stopFn: stop,
}
}
// eval signals the rule evaluation routine to perform the evaluation of the rule. Does nothing if the loop is stopped.
@ -58,3 +63,8 @@ func (a *alertRuleInfo) update(lastVersion ruleVersionAndPauseStatus) bool {
return false
}
}
// stop sends an instruction to the rule evaluation routine to shut down. an optional shutdown reason can be given.
func (a *alertRuleInfo) stop(reason error) {
a.stopFn(reason)
}

View File

@ -256,7 +256,7 @@ func (sch *schedule) processTick(ctx context.Context, dispatcherGroup *errgroup.
if newRoutine && !invalidInterval {
dispatcherGroup.Go(func() error {
return sch.ruleRoutine(ruleInfo.ctx, key, ruleInfo.evalCh, ruleInfo.updateCh)
return sch.ruleRoutine(key, ruleInfo)
})
}
@ -345,8 +345,8 @@ func (sch *schedule) processTick(ctx context.Context, dispatcherGroup *errgroup.
}
//nolint:gocyclo
func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertRuleKey, evalCh <-chan *evaluation, updateCh <-chan ruleVersionAndPauseStatus) error {
grafanaCtx = ngmodels.WithRuleKey(grafanaCtx, key)
func (sch *schedule) ruleRoutine(key ngmodels.AlertRuleKey, ruleInfo *alertRuleInfo) error {
grafanaCtx := ngmodels.WithRuleKey(ruleInfo.ctx, key)
logger := sch.log.FromContext(grafanaCtx)
logger.Debug("Alert rule routine started")
@ -474,7 +474,7 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertR
for {
select {
// used by external services (API) to notify that rule is updated.
case ctx := <-updateCh:
case ctx := <-ruleInfo.updateCh:
if currentFingerprint == ctx.Fingerprint {
logger.Info("Rule's fingerprint has not changed. Skip resetting the state", "currentFingerprint", currentFingerprint)
continue
@ -485,7 +485,7 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertR
resetState(grafanaCtx, ctx.IsPaused)
currentFingerprint = ctx.Fingerprint
// evalCh - used by the scheduler to signal that evaluation is needed.
case ctx, ok := <-evalCh:
case ctx, ok := <-ruleInfo.evalCh:
if !ok {
logger.Debug("Evaluation channel has been closed. Exiting")
return nil

View File

@ -384,22 +384,22 @@ func TestSchedule_ruleRoutine(t *testing.T) {
for _, evalState := range normalStates {
// TODO rewrite when we are able to mock/fake state manager
t.Run(fmt.Sprintf("when rule evaluation happens (evaluation state %s)", evalState), func(t *testing.T) {
evalChan := make(chan *evaluation)
evalAppliedChan := make(chan time.Time)
sch, ruleStore, instanceStore, reg := createSchedule(evalAppliedChan, nil)
rule := models.AlertRuleGen(withQueryForState(t, evalState))()
ruleStore.PutRule(context.Background(), rule)
folderTitle := ruleStore.getNamespaceTitle(rule.NamespaceUID)
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
ruleInfo := newAlertRuleInfo(ctx)
go func() {
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
_ = sch.ruleRoutine(ctx, rule.GetKey(), evalChan, make(chan ruleVersionAndPauseStatus))
_ = sch.ruleRoutine(rule.GetKey(), ruleInfo)
}()
expectedTime := time.UnixMicro(rand.Int63())
evalChan <- &evaluation{
ruleInfo.evalCh <- &evaluation{
scheduledAt: expectedTime,
rule: rule,
folderTitle: folderTitle,
@ -540,8 +540,9 @@ func TestSchedule_ruleRoutine(t *testing.T) {
require.NotEmpty(t, expectedStates)
ctx, cancel := context.WithCancel(context.Background())
ruleInfo := newAlertRuleInfo(ctx)
go func() {
err := sch.ruleRoutine(ctx, models.AlertRuleKey{}, make(chan *evaluation), make(chan ruleVersionAndPauseStatus))
err := sch.ruleRoutine(models.AlertRuleKey{}, ruleInfo)
stoppedChan <- err
}()
@ -550,7 +551,7 @@ func TestSchedule_ruleRoutine(t *testing.T) {
require.NoError(t, err)
require.Equal(t, len(expectedStates), len(sch.stateManager.GetStatesForRuleUID(rule.OrgID, rule.UID)))
})
t.Run("and clean up the state if delete is cancellation reason ", func(t *testing.T) {
t.Run("and clean up the state if delete is cancellation reason for inner context", func(t *testing.T) {
stoppedChan := make(chan error)
sch, _, _, _ := createSchedule(make(chan time.Time), nil)
@ -558,13 +559,13 @@ func TestSchedule_ruleRoutine(t *testing.T) {
_ = sch.stateManager.ProcessEvalResults(context.Background(), sch.clock.Now(), rule, eval.GenerateResults(rand.Intn(5)+1, eval.ResultGen(eval.WithEvaluatedAt(sch.clock.Now()))), nil)
require.NotEmpty(t, sch.stateManager.GetStatesForRuleUID(rule.OrgID, rule.UID))
ctx, cancel := util.WithCancelCause(context.Background())
ruleInfo := newAlertRuleInfo(context.Background())
go func() {
err := sch.ruleRoutine(ctx, rule.GetKey(), make(chan *evaluation), make(chan ruleVersionAndPauseStatus))
err := sch.ruleRoutine(rule.GetKey(), ruleInfo)
stoppedChan <- err
}()
cancel(errRuleDeleted)
ruleInfo.stop(errRuleDeleted)
err := waitForErrChannel(t, stoppedChan)
require.NoError(t, err)
@ -577,9 +578,7 @@ func TestSchedule_ruleRoutine(t *testing.T) {
folderTitle := "folderName"
ruleFp := ruleWithFolder{rule, folderTitle}.Fingerprint()
evalChan := make(chan *evaluation)
evalAppliedChan := make(chan time.Time)
updateChan := make(chan ruleVersionAndPauseStatus)
sender := NewSyncAlertsSenderMock()
sender.EXPECT().Send(mock.Anything, rule.GetKey(), mock.Anything).Return()
@ -587,15 +586,16 @@ func TestSchedule_ruleRoutine(t *testing.T) {
sch, ruleStore, _, _ := createSchedule(evalAppliedChan, sender)
ruleStore.PutRule(context.Background(), rule)
sch.schedulableAlertRules.set([]*models.AlertRule{rule}, map[models.FolderKey]string{rule.GetFolderKey(): folderTitle})
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
ruleInfo := newAlertRuleInfo(ctx)
go func() {
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
_ = sch.ruleRoutine(ctx, rule.GetKey(), evalChan, updateChan)
_ = sch.ruleRoutine(rule.GetKey(), ruleInfo)
}()
// init evaluation loop so it got the rule version
evalChan <- &evaluation{
ruleInfo.evalCh <- &evaluation{
scheduledAt: sch.clock.Now(),
rule: rule,
folderTitle: folderTitle,
@ -631,8 +631,8 @@ func TestSchedule_ruleRoutine(t *testing.T) {
require.Greaterf(t, expectedToBeSent, 0, "State manager was expected to return at least one state that can be expired")
t.Run("should do nothing if version in channel is the same", func(t *testing.T) {
updateChan <- ruleVersionAndPauseStatus{ruleFp, false}
updateChan <- ruleVersionAndPauseStatus{ruleFp, false} // second time just to make sure that previous messages were handled
ruleInfo.updateCh <- ruleVersionAndPauseStatus{ruleFp, false}
ruleInfo.updateCh <- ruleVersionAndPauseStatus{ruleFp, false} // second time just to make sure that previous messages were handled
actualStates := sch.stateManager.GetStatesForRuleUID(rule.OrgID, rule.UID)
require.Len(t, actualStates, len(states))
@ -641,7 +641,7 @@ func TestSchedule_ruleRoutine(t *testing.T) {
})
t.Run("should clear the state and expire firing alerts if version in channel is greater", func(t *testing.T) {
updateChan <- ruleVersionAndPauseStatus{ruleFp + 1, false}
ruleInfo.updateCh <- ruleVersionAndPauseStatus{ruleFp + 1, false}
require.Eventually(t, func() bool {
return len(sender.Calls()) > 0
@ -659,7 +659,6 @@ func TestSchedule_ruleRoutine(t *testing.T) {
rule := models.AlertRuleGen(withQueryForState(t, eval.Error))()
rule.ExecErrState = models.ErrorErrState
evalChan := make(chan *evaluation)
evalAppliedChan := make(chan time.Time)
sender := NewSyncAlertsSenderMock()
@ -668,14 +667,15 @@ func TestSchedule_ruleRoutine(t *testing.T) {
sch, ruleStore, _, reg := createSchedule(evalAppliedChan, sender)
sch.maxAttempts = 3
ruleStore.PutRule(context.Background(), rule)
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
ruleInfo := newAlertRuleInfo(ctx)
go func() {
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
_ = sch.ruleRoutine(ctx, rule.GetKey(), evalChan, make(chan ruleVersionAndPauseStatus))
_ = sch.ruleRoutine(rule.GetKey(), ruleInfo)
}()
evalChan <- &evaluation{
ruleInfo.evalCh <- &evaluation{
scheduledAt: sch.clock.Now(),
rule: rule,
}
@ -765,7 +765,6 @@ func TestSchedule_ruleRoutine(t *testing.T) {
// eval.Alerting makes state manager to create notifications for alertmanagers
rule := models.AlertRuleGen(withQueryForState(t, eval.Alerting))()
evalChan := make(chan *evaluation)
evalAppliedChan := make(chan time.Time)
sender := NewSyncAlertsSenderMock()
@ -773,14 +772,15 @@ func TestSchedule_ruleRoutine(t *testing.T) {
sch, ruleStore, _, _ := createSchedule(evalAppliedChan, sender)
ruleStore.PutRule(context.Background(), rule)
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
ruleInfo := newAlertRuleInfo(ctx)
go func() {
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
_ = sch.ruleRoutine(ctx, rule.GetKey(), evalChan, make(chan ruleVersionAndPauseStatus))
_ = sch.ruleRoutine(rule.GetKey(), ruleInfo)
}()
evalChan <- &evaluation{
ruleInfo.evalCh <- &evaluation{
scheduledAt: sch.clock.Now(),
rule: rule,
}
@ -798,7 +798,6 @@ func TestSchedule_ruleRoutine(t *testing.T) {
t.Run("when there are no alerts to send it should not call notifiers", func(t *testing.T) {
rule := models.AlertRuleGen(withQueryForState(t, eval.Normal))()
evalChan := make(chan *evaluation)
evalAppliedChan := make(chan time.Time)
sender := NewSyncAlertsSenderMock()
@ -806,14 +805,15 @@ func TestSchedule_ruleRoutine(t *testing.T) {
sch, ruleStore, _, _ := createSchedule(evalAppliedChan, sender)
ruleStore.PutRule(context.Background(), rule)
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
ruleInfo := newAlertRuleInfo(ctx)
go func() {
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
_ = sch.ruleRoutine(ctx, rule.GetKey(), evalChan, make(chan ruleVersionAndPauseStatus))
_ = sch.ruleRoutine(rule.GetKey(), ruleInfo)
}()
evalChan <- &evaluation{
ruleInfo.evalCh <- &evaluation{
scheduledAt: sch.clock.Now(),
rule: rule,
}