Alerting: Call the deletion reason provider even if the rule is no longer scheduled (#99571)

Alerting: Call the deletion reason provider even if the rule is not scheduled anymore
This commit is contained in:
Alexander Akhmetov 2025-01-28 11:34:26 +01:00 committed by GitHub
parent f6fc39e71f
commit a28328d764
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 42 additions and 15 deletions

View File

@ -42,6 +42,8 @@ type Rule interface {
Type() ngmodels.RuleType Type() ngmodels.RuleType
// Status indicates the status of the evaluating rule. // Status indicates the status of the evaluating rule.
Status() ngmodels.RuleStatus Status() ngmodels.RuleStatus
// Identifier returns the identifier of the rule.
Identifier() ngmodels.AlertRuleKeyWithGroup
} }
type ruleFactoryFunc func(context.Context, *ngmodels.AlertRule) Rule type ruleFactoryFunc func(context.Context, *ngmodels.AlertRule) Rule
@ -71,7 +73,7 @@ func newRuleFactory(
if rule.Type() == ngmodels.RuleTypeRecording { if rule.Type() == ngmodels.RuleTypeRecording {
return newRecordingRule( return newRecordingRule(
ctx, ctx,
rule.GetKey(), rule.GetKeyWithGroup(),
maxAttempts, maxAttempts,
clock, clock,
evalFactory, evalFactory,
@ -178,6 +180,10 @@ func newAlertRule(
} }
} }
func (a *alertRule) Identifier() ngmodels.AlertRuleKeyWithGroup {
return a.key
}
func (a *alertRule) Type() ngmodels.RuleType { func (a *alertRule) Type() ngmodels.RuleType {
return ngmodels.RuleTypeAlerting return ngmodels.RuleTypeAlerting
} }

View File

@ -269,6 +269,14 @@ func TestAlertRule(t *testing.T) {
}) })
} }
func TestAlertRuleIdentifier(t *testing.T) {
t.Run("should return correct identifier", func(t *testing.T) {
key := models.GenerateRuleKeyWithGroup(1)
r := blankRuleForTests(context.Background(), key)
require.Equal(t, key, r.Identifier())
})
}
func blankRuleForTests(ctx context.Context, key models.AlertRuleKeyWithGroup) *alertRule { func blankRuleForTests(ctx context.Context, key models.AlertRuleKeyWithGroup) *alertRule {
return newAlertRule(ctx, key, nil, false, 0, nil, nil, nil, nil, nil, nil, log.NewNopLogger(), nil, nil, nil) return newAlertRule(ctx, key, nil, false, 0, nil, nil, nil, nil, nil, nil, log.NewNopLogger(), nil, nil, nil)
} }

View File

@ -30,7 +30,7 @@ type RuleStatus struct {
} }
type recordingRule struct { type recordingRule struct {
key ngmodels.AlertRuleKey key ngmodels.AlertRuleKeyWithGroup
ctx context.Context ctx context.Context
evalCh chan *Evaluation evalCh chan *Evaluation
@ -56,8 +56,8 @@ type recordingRule struct {
tracer tracing.Tracer tracer tracing.Tracer
} }
func newRecordingRule(parent context.Context, key ngmodels.AlertRuleKey, maxAttempts int64, clock clock.Clock, evalFactory eval.EvaluatorFactory, cfg setting.RecordingRuleSettings, logger log.Logger, metrics *metrics.Scheduler, tracer tracing.Tracer, writer RecordingWriter, evalAppliedHook evalAppliedFunc, stopAppliedHook stopAppliedFunc) *recordingRule { func newRecordingRule(parent context.Context, key ngmodels.AlertRuleKeyWithGroup, maxAttempts int64, clock clock.Clock, evalFactory eval.EvaluatorFactory, cfg setting.RecordingRuleSettings, logger log.Logger, metrics *metrics.Scheduler, tracer tracing.Tracer, writer RecordingWriter, evalAppliedHook evalAppliedFunc, stopAppliedHook stopAppliedFunc) *recordingRule {
ctx, stop := util.WithCancelCause(ngmodels.WithRuleKey(parent, key)) ctx, stop := util.WithCancelCause(ngmodels.WithRuleKey(parent, key.AlertRuleKey))
return &recordingRule{ return &recordingRule{
key: key, key: key,
ctx: ctx, ctx: ctx,
@ -80,6 +80,10 @@ func newRecordingRule(parent context.Context, key ngmodels.AlertRuleKey, maxAtte
} }
} }
func (r *recordingRule) Identifier() ngmodels.AlertRuleKeyWithGroup {
return r.key
}
func (r *recordingRule) Type() ngmodels.RuleType { func (r *recordingRule) Type() ngmodels.RuleType {
return ngmodels.RuleTypeRecording return ngmodels.RuleTypeRecording
} }
@ -301,7 +305,7 @@ func (r *recordingRule) evaluationDoneTestHook(ev *Evaluation) {
return return
} }
r.evalAppliedHook(r.key, ev.scheduledAt) r.evalAppliedHook(r.key.AlertRuleKey, ev.scheduledAt)
} }
// frameRef gets frames from a QueryDataResponse for a particular refID. It returns an error if the frames do not exist or have no data. // frameRef gets frames from a QueryDataResponse for a particular refID. It returns an error if the frames do not exist or have no data.
@ -328,5 +332,5 @@ func (r *recordingRule) stopApplied() {
return return
} }
r.stopAppliedHook(r.key) r.stopAppliedHook(r.key.AlertRuleKey)
} }

View File

@ -152,11 +152,20 @@ func TestRecordingRule(t *testing.T) {
}) })
} }
func TestRecordingRuleIdentifier(t *testing.T) {
t.Run("should return correct identifier", func(t *testing.T) {
key := models.GenerateRuleKeyWithGroup(1)
r := blankRecordingRuleForTests(context.Background())
r.key = key
require.Equal(t, key, r.Identifier())
})
}
func blankRecordingRuleForTests(ctx context.Context) *recordingRule { func blankRecordingRuleForTests(ctx context.Context) *recordingRule {
st := setting.RecordingRuleSettings{ st := setting.RecordingRuleSettings{
Enabled: true, Enabled: true,
} }
return newRecordingRule(context.Background(), models.AlertRuleKey{}, 0, nil, nil, st, log.NewNopLogger(), nil, nil, writer.FakeWriter{}, nil, nil) return newRecordingRule(context.Background(), models.AlertRuleKeyWithGroup{}, 0, nil, nil, st, log.NewNopLogger(), nil, nil, writer.FakeWriter{}, nil, nil)
} }
func TestRecordingRule_Integration(t *testing.T) { func TestRecordingRule_Integration(t *testing.T) {

View File

@ -198,7 +198,7 @@ func (sch *schedule) deleteAlertRule(ctx context.Context, keys ...ngmodels.Alert
// It can happen that the scheduler has deleted the alert rule before the // It can happen that the scheduler has deleted the alert rule before the
// Ruler API has called DeleteAlertRule. This can happen as requests to // Ruler API has called DeleteAlertRule. This can happen as requests to
// the Ruler API do not hold an exclusive lock over all scheduler operations. // the Ruler API do not hold an exclusive lock over all scheduler operations.
rule, ok := sch.schedulableAlertRules.del(key) _, ok := sch.schedulableAlertRules.del(key)
if !ok { if !ok {
sch.log.Info("Alert rule cannot be removed from the scheduler as it is not scheduled", key.LogContext()...) sch.log.Info("Alert rule cannot be removed from the scheduler as it is not scheduled", key.LogContext()...)
} }
@ -210,7 +210,7 @@ func (sch *schedule) deleteAlertRule(ctx context.Context, keys ...ngmodels.Alert
} }
// stop rule evaluation // stop rule evaluation
reason := sch.getRuleStopReason(ctx, key, rule) reason := sch.getRuleStopReason(ctx, ruleRoutine.Identifier())
ruleRoutine.Stop(reason) ruleRoutine.Stop(reason)
} }
// Our best bet at this point is that we update the metrics with what we hope to schedule in the next tick. // Our best bet at this point is that we update the metrics with what we hope to schedule in the next tick.
@ -218,14 +218,14 @@ func (sch *schedule) deleteAlertRule(ctx context.Context, keys ...ngmodels.Alert
sch.updateRulesMetrics(alertRules) sch.updateRulesMetrics(alertRules)
} }
func (sch *schedule) getRuleStopReason(ctx context.Context, key ngmodels.AlertRuleKey, rule *ngmodels.AlertRule) error { func (sch *schedule) getRuleStopReason(ctx context.Context, key ngmodels.AlertRuleKeyWithGroup) error {
// If the ruleStopReasonProvider is defined, we will use it to get the reason why the // If the ruleStopReasonProvider is defined, we will use it to get the reason why the
// alert rule was stopped. If it returns an error, we will use the default reason. // alert rule was stopped. If it returns an error, we will use the default reason.
if sch.ruleStopReasonProvider == nil || rule == nil { if sch.ruleStopReasonProvider == nil {
return errRuleDeleted return errRuleDeleted
} }
stopReason, err := sch.ruleStopReasonProvider.FindReason(ctx, sch.log, rule.GetKeyWithGroup()) stopReason, err := sch.ruleStopReasonProvider.FindReason(ctx, sch.log, key)
if err != nil { if err != nil {
sch.log.New(key.LogContext()...).Error("Failed to get stop reason", "error", err) sch.log.New(key.LogContext()...).Error("Failed to get stop reason", "error", err)
return errRuleDeleted return errRuleDeleted

View File

@ -991,7 +991,7 @@ func TestSchedule_deleteAlertRule(t *testing.T) {
require.False(t, sch.registry.exists(key)) require.False(t, sch.registry.exists(key))
}) })
t.Run("it should not call ruleStopReasonProvider if the rule is not found in the registry", func(t *testing.T) { t.Run("it should still call ruleStopReasonProvider if the rule is not found in the registry", func(t *testing.T) {
mockReasonProvider := new(mockAlertRuleStopReasonProvider) mockReasonProvider := new(mockAlertRuleStopReasonProvider)
expectedReason := errors.New("some rule deletion reason") expectedReason := errors.New("some rule deletion reason")
mockReasonProvider.On("FindReason", mock.Anything, mock.Anything, mock.Anything).Return(expectedReason, nil) mockReasonProvider.On("FindReason", mock.Anything, mock.Anything, mock.Anything).Return(expectedReason, nil)
@ -1008,9 +1008,9 @@ func TestSchedule_deleteAlertRule(t *testing.T) {
sch.deleteAlertRule(ctx, key) sch.deleteAlertRule(ctx, key)
mockReasonProvider.AssertNotCalled(t, "FindReason") mockReasonProvider.AssertCalled(t, "FindReason", mock.Anything, mock.Anything, rule.GetKeyWithGroup())
require.ErrorIs(t, info.(*alertRule).ctx.Err(), errRuleDeleted) require.ErrorIs(t, info.(*alertRule).ctx.Err(), expectedReason)
require.False(t, sch.registry.exists(key)) require.False(t, sch.registry.exists(key))
}) })
}) })