From a28328d7645dbbde185aae1bca6b620899621b61 Mon Sep 17 00:00:00 2001 From: Alexander Akhmetov Date: Tue, 28 Jan 2025 11:34:26 +0100 Subject: [PATCH] Alerting: Call the deletion reason provider even if the rule is no longer scheduled (#99571) Alerting: Call the deletion reason provider even if the rule is not scheduled anymore --- pkg/services/ngalert/schedule/alert_rule.go | 8 +++++++- pkg/services/ngalert/schedule/alert_rule_test.go | 8 ++++++++ pkg/services/ngalert/schedule/recording_rule.go | 14 +++++++++----- .../ngalert/schedule/recording_rule_test.go | 11 ++++++++++- pkg/services/ngalert/schedule/schedule.go | 10 +++++----- .../ngalert/schedule/schedule_unit_test.go | 6 +++--- 6 files changed, 42 insertions(+), 15 deletions(-) diff --git a/pkg/services/ngalert/schedule/alert_rule.go b/pkg/services/ngalert/schedule/alert_rule.go index 8f56fdfa8a9..f4a301aff78 100644 --- a/pkg/services/ngalert/schedule/alert_rule.go +++ b/pkg/services/ngalert/schedule/alert_rule.go @@ -42,6 +42,8 @@ type Rule interface { Type() ngmodels.RuleType // Status indicates the status of the evaluating rule. Status() ngmodels.RuleStatus + // Identifier returns the identifier of the rule. + Identifier() ngmodels.AlertRuleKeyWithGroup } type ruleFactoryFunc func(context.Context, *ngmodels.AlertRule) Rule @@ -71,7 +73,7 @@ func newRuleFactory( if rule.Type() == ngmodels.RuleTypeRecording { return newRecordingRule( ctx, - rule.GetKey(), + rule.GetKeyWithGroup(), maxAttempts, clock, evalFactory, @@ -178,6 +180,10 @@ func newAlertRule( } } +func (a *alertRule) Identifier() ngmodels.AlertRuleKeyWithGroup { + return a.key +} + func (a *alertRule) Type() ngmodels.RuleType { return ngmodels.RuleTypeAlerting } diff --git a/pkg/services/ngalert/schedule/alert_rule_test.go b/pkg/services/ngalert/schedule/alert_rule_test.go index bb9e648c291..4593a4052e0 100644 --- a/pkg/services/ngalert/schedule/alert_rule_test.go +++ b/pkg/services/ngalert/schedule/alert_rule_test.go @@ -269,6 +269,14 @@ func TestAlertRule(t *testing.T) { }) } +func TestAlertRuleIdentifier(t *testing.T) { + t.Run("should return correct identifier", func(t *testing.T) { + key := models.GenerateRuleKeyWithGroup(1) + r := blankRuleForTests(context.Background(), key) + require.Equal(t, key, r.Identifier()) + }) +} + func blankRuleForTests(ctx context.Context, key models.AlertRuleKeyWithGroup) *alertRule { return newAlertRule(ctx, key, nil, false, 0, nil, nil, nil, nil, nil, nil, log.NewNopLogger(), nil, nil, nil) } diff --git a/pkg/services/ngalert/schedule/recording_rule.go b/pkg/services/ngalert/schedule/recording_rule.go index 3abf44b6457..954b0869a4a 100644 --- a/pkg/services/ngalert/schedule/recording_rule.go +++ b/pkg/services/ngalert/schedule/recording_rule.go @@ -30,7 +30,7 @@ type RuleStatus struct { } type recordingRule struct { - key ngmodels.AlertRuleKey + key ngmodels.AlertRuleKeyWithGroup ctx context.Context evalCh chan *Evaluation @@ -56,8 +56,8 @@ type recordingRule struct { tracer tracing.Tracer } -func newRecordingRule(parent context.Context, key ngmodels.AlertRuleKey, maxAttempts int64, clock clock.Clock, evalFactory eval.EvaluatorFactory, cfg setting.RecordingRuleSettings, logger log.Logger, metrics *metrics.Scheduler, tracer tracing.Tracer, writer RecordingWriter, evalAppliedHook evalAppliedFunc, stopAppliedHook stopAppliedFunc) *recordingRule { - ctx, stop := util.WithCancelCause(ngmodels.WithRuleKey(parent, key)) +func newRecordingRule(parent context.Context, key ngmodels.AlertRuleKeyWithGroup, maxAttempts int64, clock clock.Clock, evalFactory eval.EvaluatorFactory, cfg setting.RecordingRuleSettings, logger log.Logger, metrics *metrics.Scheduler, tracer tracing.Tracer, writer RecordingWriter, evalAppliedHook evalAppliedFunc, stopAppliedHook stopAppliedFunc) *recordingRule { + ctx, stop := util.WithCancelCause(ngmodels.WithRuleKey(parent, key.AlertRuleKey)) return &recordingRule{ key: key, ctx: ctx, @@ -80,6 +80,10 @@ func newRecordingRule(parent context.Context, key ngmodels.AlertRuleKey, maxAtte } } +func (r *recordingRule) Identifier() ngmodels.AlertRuleKeyWithGroup { + return r.key +} + func (r *recordingRule) Type() ngmodels.RuleType { return ngmodels.RuleTypeRecording } @@ -301,7 +305,7 @@ func (r *recordingRule) evaluationDoneTestHook(ev *Evaluation) { return } - r.evalAppliedHook(r.key, ev.scheduledAt) + r.evalAppliedHook(r.key.AlertRuleKey, ev.scheduledAt) } // frameRef gets frames from a QueryDataResponse for a particular refID. It returns an error if the frames do not exist or have no data. @@ -328,5 +332,5 @@ func (r *recordingRule) stopApplied() { return } - r.stopAppliedHook(r.key) + r.stopAppliedHook(r.key.AlertRuleKey) } diff --git a/pkg/services/ngalert/schedule/recording_rule_test.go b/pkg/services/ngalert/schedule/recording_rule_test.go index 3a85921fdfe..7e72453bb9f 100644 --- a/pkg/services/ngalert/schedule/recording_rule_test.go +++ b/pkg/services/ngalert/schedule/recording_rule_test.go @@ -152,11 +152,20 @@ func TestRecordingRule(t *testing.T) { }) } +func TestRecordingRuleIdentifier(t *testing.T) { + t.Run("should return correct identifier", func(t *testing.T) { + key := models.GenerateRuleKeyWithGroup(1) + r := blankRecordingRuleForTests(context.Background()) + r.key = key + require.Equal(t, key, r.Identifier()) + }) +} + func blankRecordingRuleForTests(ctx context.Context) *recordingRule { st := setting.RecordingRuleSettings{ Enabled: true, } - return newRecordingRule(context.Background(), models.AlertRuleKey{}, 0, nil, nil, st, log.NewNopLogger(), nil, nil, writer.FakeWriter{}, nil, nil) + return newRecordingRule(context.Background(), models.AlertRuleKeyWithGroup{}, 0, nil, nil, st, log.NewNopLogger(), nil, nil, writer.FakeWriter{}, nil, nil) } func TestRecordingRule_Integration(t *testing.T) { diff --git a/pkg/services/ngalert/schedule/schedule.go b/pkg/services/ngalert/schedule/schedule.go index e02755eb0ec..3bc21115ca0 100644 --- a/pkg/services/ngalert/schedule/schedule.go +++ b/pkg/services/ngalert/schedule/schedule.go @@ -198,7 +198,7 @@ func (sch *schedule) deleteAlertRule(ctx context.Context, keys ...ngmodels.Alert // It can happen that the scheduler has deleted the alert rule before the // Ruler API has called DeleteAlertRule. This can happen as requests to // the Ruler API do not hold an exclusive lock over all scheduler operations. - rule, ok := sch.schedulableAlertRules.del(key) + _, ok := sch.schedulableAlertRules.del(key) if !ok { sch.log.Info("Alert rule cannot be removed from the scheduler as it is not scheduled", key.LogContext()...) } @@ -210,7 +210,7 @@ func (sch *schedule) deleteAlertRule(ctx context.Context, keys ...ngmodels.Alert } // stop rule evaluation - reason := sch.getRuleStopReason(ctx, key, rule) + reason := sch.getRuleStopReason(ctx, ruleRoutine.Identifier()) ruleRoutine.Stop(reason) } // Our best bet at this point is that we update the metrics with what we hope to schedule in the next tick. @@ -218,14 +218,14 @@ func (sch *schedule) deleteAlertRule(ctx context.Context, keys ...ngmodels.Alert sch.updateRulesMetrics(alertRules) } -func (sch *schedule) getRuleStopReason(ctx context.Context, key ngmodels.AlertRuleKey, rule *ngmodels.AlertRule) error { +func (sch *schedule) getRuleStopReason(ctx context.Context, key ngmodels.AlertRuleKeyWithGroup) error { // If the ruleStopReasonProvider is defined, we will use it to get the reason why the // alert rule was stopped. If it returns an error, we will use the default reason. - if sch.ruleStopReasonProvider == nil || rule == nil { + if sch.ruleStopReasonProvider == nil { return errRuleDeleted } - stopReason, err := sch.ruleStopReasonProvider.FindReason(ctx, sch.log, rule.GetKeyWithGroup()) + stopReason, err := sch.ruleStopReasonProvider.FindReason(ctx, sch.log, key) if err != nil { sch.log.New(key.LogContext()...).Error("Failed to get stop reason", "error", err) return errRuleDeleted diff --git a/pkg/services/ngalert/schedule/schedule_unit_test.go b/pkg/services/ngalert/schedule/schedule_unit_test.go index 311a419f88b..575f19cda27 100644 --- a/pkg/services/ngalert/schedule/schedule_unit_test.go +++ b/pkg/services/ngalert/schedule/schedule_unit_test.go @@ -991,7 +991,7 @@ func TestSchedule_deleteAlertRule(t *testing.T) { require.False(t, sch.registry.exists(key)) }) - t.Run("it should not call ruleStopReasonProvider if the rule is not found in the registry", func(t *testing.T) { + t.Run("it should still call ruleStopReasonProvider if the rule is not found in the registry", func(t *testing.T) { mockReasonProvider := new(mockAlertRuleStopReasonProvider) expectedReason := errors.New("some rule deletion reason") mockReasonProvider.On("FindReason", mock.Anything, mock.Anything, mock.Anything).Return(expectedReason, nil) @@ -1008,9 +1008,9 @@ func TestSchedule_deleteAlertRule(t *testing.T) { sch.deleteAlertRule(ctx, key) - mockReasonProvider.AssertNotCalled(t, "FindReason") + mockReasonProvider.AssertCalled(t, "FindReason", mock.Anything, mock.Anything, rule.GetKeyWithGroup()) - require.ErrorIs(t, info.(*alertRule).ctx.Err(), errRuleDeleted) + require.ErrorIs(t, info.(*alertRule).ctx.Err(), expectedReason) require.False(t, sch.registry.exists(key)) }) })