Alerting: Decouple rule routine from scheduler (#84018)

* create rule factory for more complicated dep injection into rules * Rules get direct access to metrics, logs, traces utilities, use factory in tests * Use clock internal to rule * Use sender, statemanager, evalfactory directly * evalApplied and stopApplied * use schedulableAlertRules behind interface * loaded metrics reader * 3 relevant config options * Drop unused scheduler parameter * Rename ruleRoutine to run * Update READMED * Handle long parameter lists * remove dead branch
2025-02-25 18:55:37 -06:00 · 2024-03-06 13:44:53 -06:00 · 2024-03-06 13:44:53 -06:00 · d5fda06147
commit d5fda06147
parent 8b9bc9a919
7 changed files with 208 additions and 76 deletions
--- a/pkg/services/ngalert/README.md
+++ b/pkg/services/ngalert/README.md
@ -30,7 +30,7 @@ The scheduler runs at a fixed interval, called its heartbeat, in which it does a
 3. Send an `*evaluation` event to the goroutine for each alert rule if its interval has elapsed
 4. Stop the goroutines for all alert rules that have been deleted since the last heartbeat

-The function that evaluates each alert rule is called `ruleRoutine`. It waits for an `*evaluation` event (sent each
+The function that evaluates each alert rule is called `run`. It waits for an `*evaluation` event (sent each
 interval seconds elapsed and is configurable per alert rule) and then evaluates the alert rule. To ensure that the
 scheduler is evaluating the latest version of the alert rule it compares its local version of the alert rule with that
 in the `*evaluation` event, fetching the latest version of the alert rule from the database if the version numbers
--- a/pkg/services/ngalert/schedule/alert_rule.go
+++ b/pkg/services/ngalert/schedule/alert_rule.go
@ -4,10 +4,15 @@ import (
 	context "context"
 	"errors"
 	"fmt"
+	"net/url"
 	"time"

+	"github.com/benbjohnson/clock"
+	"github.com/grafana/grafana/pkg/infra/log"
+	"github.com/grafana/grafana/pkg/infra/tracing"
 	"github.com/grafana/grafana/pkg/services/datasources"
 	"github.com/grafana/grafana/pkg/services/ngalert/eval"
+	"github.com/grafana/grafana/pkg/services/ngalert/metrics"
 	ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
 	"github.com/grafana/grafana/pkg/services/ngalert/state"
 	"github.com/grafana/grafana/pkg/services/org"
@ -18,20 +23,114 @@ import (
 	"go.opentelemetry.io/otel/trace"
 )

+type ruleFactoryFunc func(context.Context) *alertRuleInfo
+
+func (f ruleFactoryFunc) new(ctx context.Context) *alertRuleInfo {
+	return f(ctx)
+}
+
+func newRuleFactory(
+	appURL *url.URL,
+	disableGrafanaFolder bool,
+	maxAttempts int64,
+	sender AlertsSender,
+	stateManager *state.Manager,
+	evalFactory eval.EvaluatorFactory,
+	ruleProvider ruleProvider,
+	clock clock.Clock,
+	met *metrics.Scheduler,
+	logger log.Logger,
+	tracer tracing.Tracer,
+	evalAppliedHook evalAppliedFunc,
+	stopAppliedHook stopAppliedFunc,
+) ruleFactoryFunc {
+	return func(ctx context.Context) *alertRuleInfo {
+		return newAlertRuleInfo(
+			ctx,
+			appURL,
+			disableGrafanaFolder,
+			maxAttempts,
+			sender,
+			stateManager,
+			evalFactory,
+			ruleProvider,
+			clock,
+			met,
+			logger,
+			tracer,
+			evalAppliedHook,
+			stopAppliedHook,
+		)
+	}
+}
+
+type evalAppliedFunc = func(ngmodels.AlertRuleKey, time.Time)
+type stopAppliedFunc = func(ngmodels.AlertRuleKey)
+
+type ruleProvider interface {
+	get(ngmodels.AlertRuleKey) *ngmodels.AlertRule
+}
+
 type alertRuleInfo struct {
 	evalCh   chan *evaluation
 	updateCh chan ruleVersionAndPauseStatus
 	ctx      context.Context
 	stopFn   util.CancelCauseFunc
+
+	appURL               *url.URL
+	disableGrafanaFolder bool
+	maxAttempts          int64
+
+	clock        clock.Clock
+	sender       AlertsSender
+	stateManager *state.Manager
+	evalFactory  eval.EvaluatorFactory
+	ruleProvider ruleProvider
+
+	// Event hooks that are only used in tests.
+	evalAppliedHook evalAppliedFunc
+	stopAppliedHook stopAppliedFunc
+
+	metrics *metrics.Scheduler
+	logger  log.Logger
+	tracer  tracing.Tracer
 }

-func newAlertRuleInfo(parent context.Context) *alertRuleInfo {
+func newAlertRuleInfo(
+	parent context.Context,
+	appURL *url.URL,
+	disableGrafanaFolder bool,
+	maxAttempts int64,
+	sender AlertsSender,
+	stateManager *state.Manager,
+	evalFactory eval.EvaluatorFactory,
+	ruleProvider ruleProvider,
+	clock clock.Clock,
+	met *metrics.Scheduler,
+	logger log.Logger,
+	tracer tracing.Tracer,
+	evalAppliedHook func(ngmodels.AlertRuleKey, time.Time),
+	stopAppliedHook func(ngmodels.AlertRuleKey),
+) *alertRuleInfo {
 	ctx, stop := util.WithCancelCause(parent)
 	return &alertRuleInfo{
-		evalCh:   make(chan *evaluation),
-		updateCh: make(chan ruleVersionAndPauseStatus),
-		ctx:      ctx,
-		stopFn:   stop,
+		evalCh:               make(chan *evaluation),
+		updateCh:             make(chan ruleVersionAndPauseStatus),
+		ctx:                  ctx,
+		stopFn:               stop,
+		appURL:               appURL,
+		disableGrafanaFolder: disableGrafanaFolder,
+		maxAttempts:          maxAttempts,
+		clock:                clock,
+		sender:               sender,
+		stateManager:         stateManager,
+		evalFactory:          evalFactory,
+		ruleProvider:         ruleProvider,
+		evalAppliedHook:      evalAppliedHook,
+		stopAppliedHook:      stopAppliedHook,
+		metrics:              met,
+		logger:               logger,
+		tracer:               tracer,
 	}
 }

@ -82,52 +181,49 @@ func (a *alertRuleInfo) stop(reason error) {
 }

 //nolint:gocyclo
-func (a *alertRuleInfo) ruleRoutine(key ngmodels.AlertRuleKey, sch *schedule) error {
+func (a *alertRuleInfo) run(key ngmodels.AlertRuleKey) error {
 	grafanaCtx := ngmodels.WithRuleKey(a.ctx, key)
-	logger := sch.log.FromContext(grafanaCtx)
+	logger := a.logger.FromContext(grafanaCtx)
 	logger.Debug("Alert rule routine started")

 	orgID := fmt.Sprint(key.OrgID)
-	evalTotal := sch.metrics.EvalTotal.WithLabelValues(orgID)
-	evalDuration := sch.metrics.EvalDuration.WithLabelValues(orgID)
-	evalTotalFailures := sch.metrics.EvalFailures.WithLabelValues(orgID)
-	processDuration := sch.metrics.ProcessDuration.WithLabelValues(orgID)
-	sendDuration := sch.metrics.SendDuration.WithLabelValues(orgID)
+	evalTotal := a.metrics.EvalTotal.WithLabelValues(orgID)
+	evalDuration := a.metrics.EvalDuration.WithLabelValues(orgID)
+	evalTotalFailures := a.metrics.EvalFailures.WithLabelValues(orgID)
+	processDuration := a.metrics.ProcessDuration.WithLabelValues(orgID)
+	sendDuration := a.metrics.SendDuration.WithLabelValues(orgID)

 	notify := func(states []state.StateTransition) {
-		expiredAlerts := state.FromAlertsStateToStoppedAlert(states, sch.appURL, sch.clock)
+		expiredAlerts := state.FromAlertsStateToStoppedAlert(states, a.appURL, a.clock)
 		if len(expiredAlerts.PostableAlerts) > 0 {
-			sch.alertsSender.Send(grafanaCtx, key, expiredAlerts)
+			a.sender.Send(grafanaCtx, key, expiredAlerts)
 		}
 	}

 	resetState := func(ctx context.Context, isPaused bool) {
-		rule := sch.schedulableAlertRules.get(key)
+		rule := a.ruleProvider.get(key)
 		reason := ngmodels.StateReasonUpdated
 		if isPaused {
 			reason = ngmodels.StateReasonPaused
 		}
-		states := sch.stateManager.ResetStateByRuleUID(ctx, rule, reason)
+		states := a.stateManager.ResetStateByRuleUID(ctx, rule, reason)
 		notify(states)
 	}

 	evaluate := func(ctx context.Context, f fingerprint, attempt int64, e *evaluation, span trace.Span, retry bool) error {
 		logger := logger.New("version", e.rule.Version, "fingerprint", f, "attempt", attempt, "now", e.scheduledAt).FromContext(ctx)
-		start := sch.clock.Now()
+		start := a.clock.Now()

-		evalCtx := eval.NewContextWithPreviousResults(ctx, SchedulerUserFor(e.rule.OrgID), sch.newLoadedMetricsReader(e.rule))
-		if sch.evaluatorFactory == nil {
-			panic("evalfactory nil")
-		}
-		ruleEval, err := sch.evaluatorFactory.Create(evalCtx, e.rule.GetEvalCondition())
+		evalCtx := eval.NewContextWithPreviousResults(ctx, SchedulerUserFor(e.rule.OrgID), a.newLoadedMetricsReader(e.rule))
+		ruleEval, err := a.evalFactory.Create(evalCtx, e.rule.GetEvalCondition())
 		var results eval.Results
 		var dur time.Duration
 		if err != nil {
-			dur = sch.clock.Now().Sub(start)
+			dur = a.clock.Now().Sub(start)
 			logger.Error("Failed to build rule evaluator", "error", err)
 		} else {
 			results, err = ruleEval.Evaluate(ctx, e.scheduledAt)
-			dur = sch.clock.Now().Sub(start)
+			dur = a.clock.Now().Sub(start)
 			if err != nil {
 				logger.Error("Failed to evaluate rule", "error", err, "duration", dur)
 			}
@ -181,33 +277,33 @@ func (a *alertRuleInfo) ruleRoutine(key ngmodels.AlertRuleKey, sch *schedule) er
 				attribute.Int64("results", int64(len(results))),
 			))
 		}
-		start = sch.clock.Now()
-		processedStates := sch.stateManager.ProcessEvalResults(
+		start = a.clock.Now()
+		processedStates := a.stateManager.ProcessEvalResults(
 			ctx,
 			e.scheduledAt,
 			e.rule,
 			results,
-			state.GetRuleExtraLabels(logger, e.rule, e.folderTitle, !sch.disableGrafanaFolder),
+			state.GetRuleExtraLabels(logger, e.rule, e.folderTitle, !a.disableGrafanaFolder),
 		)
-		processDuration.Observe(sch.clock.Now().Sub(start).Seconds())
+		processDuration.Observe(a.clock.Now().Sub(start).Seconds())

-		start = sch.clock.Now()
-		alerts := state.FromStateTransitionToPostableAlerts(processedStates, sch.stateManager, sch.appURL)
+		start = a.clock.Now()
+		alerts := state.FromStateTransitionToPostableAlerts(processedStates, a.stateManager, a.appURL)
 		span.AddEvent("results processed", trace.WithAttributes(
 			attribute.Int64("state_transitions", int64(len(processedStates))),
 			attribute.Int64("alerts_to_send", int64(len(alerts.PostableAlerts))),
 		))
 		if len(alerts.PostableAlerts) > 0 {
-			sch.alertsSender.Send(ctx, key, alerts)
+			a.sender.Send(ctx, key, alerts)
 		}
-		sendDuration.Observe(sch.clock.Now().Sub(start).Seconds())
+		sendDuration.Observe(a.clock.Now().Sub(start).Seconds())

 		return nil
 	}

 	evalRunning := false
 	var currentFingerprint fingerprint
-	defer sch.stopApplied(key)
+	defer a.stopApplied(key)
 	for {
 		select {
 		// used by external services (API) to notify that rule is updated.
@ -235,10 +331,10 @@ func (a *alertRuleInfo) ruleRoutine(key ngmodels.AlertRuleKey, sch *schedule) er
 				evalRunning = true
 				defer func() {
 					evalRunning = false
-					sch.evalApplied(key, ctx.scheduledAt)
+					a.evalApplied(key, ctx.scheduledAt)
 				}()

-				for attempt := int64(1); attempt <= sch.maxAttempts; attempt++ {
+				for attempt := int64(1); attempt <= a.maxAttempts; attempt++ {
 					isPaused := ctx.rule.IsPaused
 					f := ruleWithFolder{ctx.rule, ctx.folderTitle}.Fingerprint()
 					// Do not clean up state if the eval loop has just started.
@ -262,7 +358,7 @@ func (a *alertRuleInfo) ruleRoutine(key ngmodels.AlertRuleKey, sch *schedule) er

 					fpStr := currentFingerprint.String()
 					utcTick := ctx.scheduledAt.UTC().Format(time.RFC3339Nano)
-					tracingCtx, span := sch.tracer.Start(grafanaCtx, "alert rule execution", trace.WithAttributes(
+					tracingCtx, span := a.tracer.Start(grafanaCtx, "alert rule execution", trace.WithAttributes(
 						attribute.String("rule_uid", ctx.rule.UID),
 						attribute.Int64("org_id", ctx.rule.OrgID),
 						attribute.Int64("rule_version", ctx.rule.Version),
@ -278,7 +374,7 @@ func (a *alertRuleInfo) ruleRoutine(key ngmodels.AlertRuleKey, sch *schedule) er
 						return
 					}

-					retry := attempt < sch.maxAttempts
+					retry := attempt < a.maxAttempts
 					err := evaluate(tracingCtx, f, attempt, ctx, span, retry)
 					// This is extremely confusing - when we exhaust all retry attempts, or we have no retryable errors
 					// we return nil - so technically, this is meaningless to know whether the evaluation has errors or not.
@ -306,7 +402,7 @@ func (a *alertRuleInfo) ruleRoutine(key ngmodels.AlertRuleKey, sch *schedule) er
 				// cases.
 				ctx, cancelFunc := context.WithTimeout(context.Background(), time.Minute)
 				defer cancelFunc()
-				states := sch.stateManager.DeleteStateByRuleUID(ngmodels.WithRuleKey(ctx, key), key, ngmodels.StateReasonRuleDeleted)
+				states := a.stateManager.DeleteStateByRuleUID(ngmodels.WithRuleKey(ctx, key), key, ngmodels.StateReasonRuleDeleted)
 				notify(states)
 			}
 			logger.Debug("Stopping alert rule routine")
@ -316,21 +412,21 @@ func (a *alertRuleInfo) ruleRoutine(key ngmodels.AlertRuleKey, sch *schedule) er
 }

 // evalApplied is only used on tests.
-func (sch *schedule) evalApplied(alertDefKey ngmodels.AlertRuleKey, now time.Time) {
-	if sch.evalAppliedFunc == nil {
+func (a *alertRuleInfo) evalApplied(alertDefKey ngmodels.AlertRuleKey, now time.Time) {
+	if a.evalAppliedHook == nil {
 		return
 	}

-	sch.evalAppliedFunc(alertDefKey, now)
+	a.evalAppliedHook(alertDefKey, now)
 }

 // stopApplied is only used on tests.
-func (sch *schedule) stopApplied(alertDefKey ngmodels.AlertRuleKey) {
-	if sch.stopAppliedFunc == nil {
+func (a *alertRuleInfo) stopApplied(alertDefKey ngmodels.AlertRuleKey) {
+	if a.stopAppliedHook == nil {
 		return
 	}

-	sch.stopAppliedFunc(alertDefKey)
+	a.stopAppliedHook(alertDefKey)
 }

 func SchedulerUserFor(orgID int64) *user.SignedInUser {
--- a/pkg/services/ngalert/schedule/alert_rule_test.go
+++ b/pkg/services/ngalert/schedule/alert_rule_test.go
@ -34,7 +34,7 @@ func TestAlertRuleInfo(t *testing.T) {

 	t.Run("when rule evaluation is not stopped", func(t *testing.T) {
 		t.Run("update should send to updateCh", func(t *testing.T) {
-			r := newAlertRuleInfo(context.Background())
+			r := blankRuleInfoForTests(context.Background())
 			resultCh := make(chan bool)
 			go func() {
 				resultCh <- r.update(ruleVersionAndPauseStatus{fingerprint(rand.Uint64()), false})
@ -47,7 +47,7 @@ func TestAlertRuleInfo(t *testing.T) {
 			}
 		})
 		t.Run("update should drop any concurrent sending to updateCh", func(t *testing.T) {
-			r := newAlertRuleInfo(context.Background())
+			r := blankRuleInfoForTests(context.Background())
 			version1 := ruleVersionAndPauseStatus{fingerprint(rand.Uint64()), false}
 			version2 := ruleVersionAndPauseStatus{fingerprint(rand.Uint64()), false}

@ -73,7 +73,7 @@ func TestAlertRuleInfo(t *testing.T) {
 			}
 		})
 		t.Run("eval should send to evalCh", func(t *testing.T) {
-			r := newAlertRuleInfo(context.Background())
+			r := blankRuleInfoForTests(context.Background())
 			expected := time.Now()
 			resultCh := make(chan evalResponse)
 			data := &evaluation{
@ -96,7 +96,7 @@ func TestAlertRuleInfo(t *testing.T) {
 			}
 		})
 		t.Run("eval should drop any concurrent sending to evalCh", func(t *testing.T) {
-			r := newAlertRuleInfo(context.Background())
+			r := blankRuleInfoForTests(context.Background())
 			time1 := time.UnixMilli(rand.Int63n(math.MaxInt64))
 			time2 := time.UnixMilli(rand.Int63n(math.MaxInt64))
 			resultCh1 := make(chan evalResponse)
@ -142,7 +142,7 @@ func TestAlertRuleInfo(t *testing.T) {
 			}
 		})
 		t.Run("eval should exit when context is cancelled", func(t *testing.T) {
-			r := newAlertRuleInfo(context.Background())
+			r := blankRuleInfoForTests(context.Background())
 			resultCh := make(chan evalResponse)
 			data := &evaluation{
 				scheduledAt: time.Now(),
@ -166,13 +166,13 @@ func TestAlertRuleInfo(t *testing.T) {
 	})
 	t.Run("when rule evaluation is stopped", func(t *testing.T) {
 		t.Run("Update should do nothing", func(t *testing.T) {
-			r := newAlertRuleInfo(context.Background())
+			r := blankRuleInfoForTests(context.Background())
 			r.stop(errRuleDeleted)
 			require.ErrorIs(t, r.ctx.Err(), errRuleDeleted)
 			require.False(t, r.update(ruleVersionAndPauseStatus{fingerprint(rand.Uint64()), false}))
 		})
 		t.Run("eval should do nothing", func(t *testing.T) {
-			r := newAlertRuleInfo(context.Background())
+			r := blankRuleInfoForTests(context.Background())
 			r.stop(nil)
 			data := &evaluation{
 				scheduledAt: time.Now(),
@ -184,19 +184,19 @@ func TestAlertRuleInfo(t *testing.T) {
 			require.Nilf(t, dropped, "expected no dropped evaluations but got one")
 		})
 		t.Run("stop should do nothing", func(t *testing.T) {
-			r := newAlertRuleInfo(context.Background())
+			r := blankRuleInfoForTests(context.Background())
 			r.stop(nil)
 			r.stop(nil)
 		})
 		t.Run("stop should do nothing if parent context stopped", func(t *testing.T) {
 			ctx, cancelFn := context.WithCancel(context.Background())
-			r := newAlertRuleInfo(ctx)
+			r := blankRuleInfoForTests(ctx)
 			cancelFn()
 			r.stop(nil)
 		})
 	})
 	t.Run("should be thread-safe", func(t *testing.T) {
-		r := newAlertRuleInfo(context.Background())
+		r := blankRuleInfoForTests(context.Background())
 		wg := sync.WaitGroup{}
 		go func() {
 			for {
@ -240,6 +240,11 @@ func TestAlertRuleInfo(t *testing.T) {
 	})
 }

+func blankRuleInfoForTests(ctx context.Context) *alertRuleInfo {
+	factory := newRuleFactory(nil, false, 0, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil)
+	return factory.new(context.Background())
+}
+
 func TestRuleRoutine(t *testing.T) {
 	createSchedule := func(
 		evalAppliedChan chan time.Time,
@ -269,11 +274,12 @@ func TestRuleRoutine(t *testing.T) {
 			rule := models.AlertRuleGen(withQueryForState(t, evalState))()
 			ruleStore.PutRule(context.Background(), rule)
 			folderTitle := ruleStore.getNamespaceTitle(rule.NamespaceUID)
+			factory := ruleFactoryFromScheduler(sch)
 			ctx, cancel := context.WithCancel(context.Background())
 			t.Cleanup(cancel)
-			ruleInfo := newAlertRuleInfo(ctx)
+			ruleInfo := factory.new(ctx)
 			go func() {
-				_ = ruleInfo.ruleRoutine(rule.GetKey(), sch)
+				_ = ruleInfo.run(rule.GetKey())
 			}()

 			expectedTime := time.UnixMicro(rand.Int63())
@ -418,10 +424,11 @@ func TestRuleRoutine(t *testing.T) {
 			expectedStates := sch.stateManager.GetStatesForRuleUID(rule.OrgID, rule.UID)
 			require.NotEmpty(t, expectedStates)

+			factory := ruleFactoryFromScheduler(sch)
 			ctx, cancel := context.WithCancel(context.Background())
-			ruleInfo := newAlertRuleInfo(ctx)
+			ruleInfo := factory.new(ctx)
 			go func() {
-				err := ruleInfo.ruleRoutine(models.AlertRuleKey{}, sch)
+				err := ruleInfo.run(models.AlertRuleKey{})
 				stoppedChan <- err
 			}()

@ -438,9 +445,10 @@ func TestRuleRoutine(t *testing.T) {
 			_ = sch.stateManager.ProcessEvalResults(context.Background(), sch.clock.Now(), rule, eval.GenerateResults(rand.Intn(5)+1, eval.ResultGen(eval.WithEvaluatedAt(sch.clock.Now()))), nil)
 			require.NotEmpty(t, sch.stateManager.GetStatesForRuleUID(rule.OrgID, rule.UID))

-			ruleInfo := newAlertRuleInfo(context.Background())
+			factory := ruleFactoryFromScheduler(sch)
+			ruleInfo := factory.new(context.Background())
 			go func() {
-				err := ruleInfo.ruleRoutine(rule.GetKey(), sch)
+				err := ruleInfo.run(rule.GetKey())
 				stoppedChan <- err
 			}()

@ -465,12 +473,13 @@ func TestRuleRoutine(t *testing.T) {
 		sch, ruleStore, _, _ := createSchedule(evalAppliedChan, sender)
 		ruleStore.PutRule(context.Background(), rule)
 		sch.schedulableAlertRules.set([]*models.AlertRule{rule}, map[models.FolderKey]string{rule.GetFolderKey(): folderTitle})
+		factory := ruleFactoryFromScheduler(sch)
 		ctx, cancel := context.WithCancel(context.Background())
 		t.Cleanup(cancel)
-		ruleInfo := newAlertRuleInfo(ctx)
+		ruleInfo := factory.new(ctx)

 		go func() {
-			_ = ruleInfo.ruleRoutine(rule.GetKey(), sch)
+			_ = ruleInfo.run(rule.GetKey())
 		}()

 		// init evaluation loop so it got the rule version
@ -546,12 +555,13 @@ func TestRuleRoutine(t *testing.T) {
 		sch, ruleStore, _, reg := createSchedule(evalAppliedChan, sender)
 		sch.maxAttempts = 3
 		ruleStore.PutRule(context.Background(), rule)
+		factory := ruleFactoryFromScheduler(sch)
 		ctx, cancel := context.WithCancel(context.Background())
 		t.Cleanup(cancel)
-		ruleInfo := newAlertRuleInfo(ctx)
+		ruleInfo := factory.new(ctx)

 		go func() {
-			_ = ruleInfo.ruleRoutine(rule.GetKey(), sch)
+			_ = ruleInfo.run(rule.GetKey())
 		}()

 		ruleInfo.evalCh <- &evaluation{
@ -651,12 +661,13 @@ func TestRuleRoutine(t *testing.T) {

 			sch, ruleStore, _, _ := createSchedule(evalAppliedChan, sender)
 			ruleStore.PutRule(context.Background(), rule)
+			factory := ruleFactoryFromScheduler(sch)
 			ctx, cancel := context.WithCancel(context.Background())
 			t.Cleanup(cancel)
-			ruleInfo := newAlertRuleInfo(ctx)
+			ruleInfo := factory.new(ctx)

 			go func() {
-				_ = ruleInfo.ruleRoutine(rule.GetKey(), sch)
+				_ = ruleInfo.run(rule.GetKey())
 			}()

 			ruleInfo.evalCh <- &evaluation{
@ -684,12 +695,13 @@ func TestRuleRoutine(t *testing.T) {

 		sch, ruleStore, _, _ := createSchedule(evalAppliedChan, sender)
 		ruleStore.PutRule(context.Background(), rule)
+		factory := ruleFactoryFromScheduler(sch)
 		ctx, cancel := context.WithCancel(context.Background())
 		t.Cleanup(cancel)
-		ruleInfo := newAlertRuleInfo(ctx)
+		ruleInfo := factory.new(ctx)

 		go func() {
-			_ = ruleInfo.ruleRoutine(rule.GetKey(), sch)
+			_ = ruleInfo.run(rule.GetKey())
 		}()

 		ruleInfo.evalCh <- &evaluation{
@ -704,3 +716,7 @@ func TestRuleRoutine(t *testing.T) {
 		require.NotEmpty(t, sch.stateManager.GetStatesForRuleUID(rule.OrgID, rule.UID))
 	})
 }
+
+func ruleFactoryFromScheduler(sch *schedule) ruleFactory {
+	return newRuleFactory(sch.appURL, sch.disableGrafanaFolder, sch.maxAttempts, sch.alertsSender, sch.stateManager, sch.evaluatorFactory, &sch.schedulableAlertRules, sch.clock, sch.metrics, sch.log, sch.tracer, sch.evalAppliedFunc, sch.stopAppliedFunc)
+}
--- a/pkg/services/ngalert/schedule/loaded_metrics_reader.go
+++ b/pkg/services/ngalert/schedule/loaded_metrics_reader.go
@ -10,9 +10,9 @@ import (

 var _ eval.AlertingResultsReader = AlertingResultsFromRuleState{}

-func (sch *schedule) newLoadedMetricsReader(rule *ngmodels.AlertRule) eval.AlertingResultsReader {
+func (a *alertRuleInfo) newLoadedMetricsReader(rule *ngmodels.AlertRule) eval.AlertingResultsReader {
 	return &AlertingResultsFromRuleState{
-		Manager: sch.stateManager,
+		Manager: a.stateManager,
 		Rule:    rule,
 	}
 }
--- a/pkg/services/ngalert/schedule/registry.go
+++ b/pkg/services/ngalert/schedule/registry.go
@ -17,6 +17,10 @@ import (

 var errRuleDeleted = errors.New("rule deleted")

+type ruleFactory interface {
+	new(context.Context) *alertRuleInfo
+}
+
 type alertRuleInfoRegistry struct {
 	mu            sync.Mutex
 	alertRuleInfo map[models.AlertRuleKey]*alertRuleInfo
@ -24,13 +28,13 @@ type alertRuleInfoRegistry struct {

 // getOrCreateInfo gets rule routine information from registry by the key. If it does not exist, it creates a new one.
 // Returns a pointer to the rule routine information and a flag that indicates whether it is a new struct or not.
-func (r *alertRuleInfoRegistry) getOrCreateInfo(context context.Context, key models.AlertRuleKey) (*alertRuleInfo, bool) {
+func (r *alertRuleInfoRegistry) getOrCreateInfo(context context.Context, key models.AlertRuleKey, factory ruleFactory) (*alertRuleInfo, bool) {
 	r.mu.Lock()
 	defer r.mu.Unlock()

 	info, ok := r.alertRuleInfo[key]
 	if !ok {
-		info = newAlertRuleInfo(context)
+		info = factory.new(context)
 		r.alertRuleInfo[key] = info
 	}
 	return info, !ok
--- a/pkg/services/ngalert/schedule/schedule.go
+++ b/pkg/services/ngalert/schedule/schedule.go
@ -235,9 +235,24 @@ func (sch *schedule) processTick(ctx context.Context, dispatcherGroup *errgroup.
 	readyToRun := make([]readyToRunItem, 0)
 	updatedRules := make([]ngmodels.AlertRuleKeyWithVersion, 0, len(updated)) // this is needed for tests only
 	missingFolder := make(map[string][]string)
+	ruleFactory := newRuleFactory(
+		sch.appURL,
+		sch.disableGrafanaFolder,
+		sch.maxAttempts,
+		sch.alertsSender,
+		sch.stateManager,
+		sch.evaluatorFactory,
+		&sch.schedulableAlertRules,
+		sch.clock,
+		sch.metrics,
+		sch.log,
+		sch.tracer,
+		sch.evalAppliedFunc,
+		sch.stopAppliedFunc,
+	)
 	for _, item := range alertRules {
 		key := item.GetKey()
-		ruleInfo, newRoutine := sch.registry.getOrCreateInfo(ctx, key)
+		ruleInfo, newRoutine := sch.registry.getOrCreateInfo(ctx, key, ruleFactory)

 		// enforce minimum evaluation interval
 		if item.IntervalSeconds < int64(sch.minRuleInterval.Seconds()) {
@ -249,7 +264,7 @@ func (sch *schedule) processTick(ctx context.Context, dispatcherGroup *errgroup.

 		if newRoutine && !invalidInterval {
 			dispatcherGroup.Go(func() error {
-				return ruleInfo.ruleRoutine(key, sch)
+				return ruleInfo.run(key)
 			})
 		}

--- a/pkg/services/ngalert/schedule/schedule_unit_test.go
+++ b/pkg/services/ngalert/schedule/schedule_unit_test.go
@ -360,9 +360,10 @@ func TestSchedule_deleteAlertRule(t *testing.T) {
 	t.Run("when rule exists", func(t *testing.T) {
 		t.Run("it should stop evaluation loop and remove the controller from registry", func(t *testing.T) {
 			sch := setupScheduler(t, nil, nil, nil, nil, nil)
+			ruleFactory := ruleFactoryFromScheduler(sch)
 			rule := models.AlertRuleGen()()
 			key := rule.GetKey()
-			info, _ := sch.registry.getOrCreateInfo(context.Background(), key)
+			info, _ := sch.registry.getOrCreateInfo(context.Background(), key, ruleFactory)
 			sch.deleteAlertRule(key)
 			require.ErrorIs(t, info.ctx.Err(), errRuleDeleted)
 			require.False(t, sch.registry.exists(key))