Alerting: Call the deletion reason provider even if the rule is no longer scheduled (#99571)

Alerting: Call the deletion reason provider even if the rule is not scheduled anymore
This commit is contained in:
Alexander Akhmetov 2025-01-28 11:34:26 +01:00 committed by GitHub
parent f6fc39e71f
commit a28328d764
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 42 additions and 15 deletions

View File

@ -42,6 +42,8 @@ type Rule interface {
Type() ngmodels.RuleType
// Status indicates the status of the evaluating rule.
Status() ngmodels.RuleStatus
// Identifier returns the identifier of the rule.
Identifier() ngmodels.AlertRuleKeyWithGroup
}
type ruleFactoryFunc func(context.Context, *ngmodels.AlertRule) Rule
@ -71,7 +73,7 @@ func newRuleFactory(
if rule.Type() == ngmodels.RuleTypeRecording {
return newRecordingRule(
ctx,
rule.GetKey(),
rule.GetKeyWithGroup(),
maxAttempts,
clock,
evalFactory,
@ -178,6 +180,10 @@ func newAlertRule(
}
}
func (a *alertRule) Identifier() ngmodels.AlertRuleKeyWithGroup {
return a.key
}
func (a *alertRule) Type() ngmodels.RuleType {
return ngmodels.RuleTypeAlerting
}

View File

@ -269,6 +269,14 @@ func TestAlertRule(t *testing.T) {
})
}
func TestAlertRuleIdentifier(t *testing.T) {
t.Run("should return correct identifier", func(t *testing.T) {
key := models.GenerateRuleKeyWithGroup(1)
r := blankRuleForTests(context.Background(), key)
require.Equal(t, key, r.Identifier())
})
}
func blankRuleForTests(ctx context.Context, key models.AlertRuleKeyWithGroup) *alertRule {
return newAlertRule(ctx, key, nil, false, 0, nil, nil, nil, nil, nil, nil, log.NewNopLogger(), nil, nil, nil)
}

View File

@ -30,7 +30,7 @@ type RuleStatus struct {
}
type recordingRule struct {
key ngmodels.AlertRuleKey
key ngmodels.AlertRuleKeyWithGroup
ctx context.Context
evalCh chan *Evaluation
@ -56,8 +56,8 @@ type recordingRule struct {
tracer tracing.Tracer
}
func newRecordingRule(parent context.Context, key ngmodels.AlertRuleKey, maxAttempts int64, clock clock.Clock, evalFactory eval.EvaluatorFactory, cfg setting.RecordingRuleSettings, logger log.Logger, metrics *metrics.Scheduler, tracer tracing.Tracer, writer RecordingWriter, evalAppliedHook evalAppliedFunc, stopAppliedHook stopAppliedFunc) *recordingRule {
ctx, stop := util.WithCancelCause(ngmodels.WithRuleKey(parent, key))
func newRecordingRule(parent context.Context, key ngmodels.AlertRuleKeyWithGroup, maxAttempts int64, clock clock.Clock, evalFactory eval.EvaluatorFactory, cfg setting.RecordingRuleSettings, logger log.Logger, metrics *metrics.Scheduler, tracer tracing.Tracer, writer RecordingWriter, evalAppliedHook evalAppliedFunc, stopAppliedHook stopAppliedFunc) *recordingRule {
ctx, stop := util.WithCancelCause(ngmodels.WithRuleKey(parent, key.AlertRuleKey))
return &recordingRule{
key: key,
ctx: ctx,
@ -80,6 +80,10 @@ func newRecordingRule(parent context.Context, key ngmodels.AlertRuleKey, maxAtte
}
}
func (r *recordingRule) Identifier() ngmodels.AlertRuleKeyWithGroup {
return r.key
}
func (r *recordingRule) Type() ngmodels.RuleType {
return ngmodels.RuleTypeRecording
}
@ -301,7 +305,7 @@ func (r *recordingRule) evaluationDoneTestHook(ev *Evaluation) {
return
}
r.evalAppliedHook(r.key, ev.scheduledAt)
r.evalAppliedHook(r.key.AlertRuleKey, ev.scheduledAt)
}
// frameRef gets frames from a QueryDataResponse for a particular refID. It returns an error if the frames do not exist or have no data.
@ -328,5 +332,5 @@ func (r *recordingRule) stopApplied() {
return
}
r.stopAppliedHook(r.key)
r.stopAppliedHook(r.key.AlertRuleKey)
}

View File

@ -152,11 +152,20 @@ func TestRecordingRule(t *testing.T) {
})
}
func TestRecordingRuleIdentifier(t *testing.T) {
t.Run("should return correct identifier", func(t *testing.T) {
key := models.GenerateRuleKeyWithGroup(1)
r := blankRecordingRuleForTests(context.Background())
r.key = key
require.Equal(t, key, r.Identifier())
})
}
func blankRecordingRuleForTests(ctx context.Context) *recordingRule {
st := setting.RecordingRuleSettings{
Enabled: true,
}
return newRecordingRule(context.Background(), models.AlertRuleKey{}, 0, nil, nil, st, log.NewNopLogger(), nil, nil, writer.FakeWriter{}, nil, nil)
return newRecordingRule(context.Background(), models.AlertRuleKeyWithGroup{}, 0, nil, nil, st, log.NewNopLogger(), nil, nil, writer.FakeWriter{}, nil, nil)
}
func TestRecordingRule_Integration(t *testing.T) {

View File

@ -198,7 +198,7 @@ func (sch *schedule) deleteAlertRule(ctx context.Context, keys ...ngmodels.Alert
// It can happen that the scheduler has deleted the alert rule before the
// Ruler API has called DeleteAlertRule. This can happen as requests to
// the Ruler API do not hold an exclusive lock over all scheduler operations.
rule, ok := sch.schedulableAlertRules.del(key)
_, ok := sch.schedulableAlertRules.del(key)
if !ok {
sch.log.Info("Alert rule cannot be removed from the scheduler as it is not scheduled", key.LogContext()...)
}
@ -210,7 +210,7 @@ func (sch *schedule) deleteAlertRule(ctx context.Context, keys ...ngmodels.Alert
}
// stop rule evaluation
reason := sch.getRuleStopReason(ctx, key, rule)
reason := sch.getRuleStopReason(ctx, ruleRoutine.Identifier())
ruleRoutine.Stop(reason)
}
// Our best bet at this point is that we update the metrics with what we hope to schedule in the next tick.
@ -218,14 +218,14 @@ func (sch *schedule) deleteAlertRule(ctx context.Context, keys ...ngmodels.Alert
sch.updateRulesMetrics(alertRules)
}
func (sch *schedule) getRuleStopReason(ctx context.Context, key ngmodels.AlertRuleKey, rule *ngmodels.AlertRule) error {
func (sch *schedule) getRuleStopReason(ctx context.Context, key ngmodels.AlertRuleKeyWithGroup) error {
// If the ruleStopReasonProvider is defined, we will use it to get the reason why the
// alert rule was stopped. If it returns an error, we will use the default reason.
if sch.ruleStopReasonProvider == nil || rule == nil {
if sch.ruleStopReasonProvider == nil {
return errRuleDeleted
}
stopReason, err := sch.ruleStopReasonProvider.FindReason(ctx, sch.log, rule.GetKeyWithGroup())
stopReason, err := sch.ruleStopReasonProvider.FindReason(ctx, sch.log, key)
if err != nil {
sch.log.New(key.LogContext()...).Error("Failed to get stop reason", "error", err)
return errRuleDeleted

View File

@ -991,7 +991,7 @@ func TestSchedule_deleteAlertRule(t *testing.T) {
require.False(t, sch.registry.exists(key))
})
t.Run("it should not call ruleStopReasonProvider if the rule is not found in the registry", func(t *testing.T) {
t.Run("it should still call ruleStopReasonProvider if the rule is not found in the registry", func(t *testing.T) {
mockReasonProvider := new(mockAlertRuleStopReasonProvider)
expectedReason := errors.New("some rule deletion reason")
mockReasonProvider.On("FindReason", mock.Anything, mock.Anything, mock.Anything).Return(expectedReason, nil)
@ -1008,9 +1008,9 @@ func TestSchedule_deleteAlertRule(t *testing.T) {
sch.deleteAlertRule(ctx, key)
mockReasonProvider.AssertNotCalled(t, "FindReason")
mockReasonProvider.AssertCalled(t, "FindReason", mock.Anything, mock.Anything, rule.GetKeyWithGroup())
require.ErrorIs(t, info.(*alertRule).ctx.Err(), errRuleDeleted)
require.ErrorIs(t, info.(*alertRule).ctx.Err(), expectedReason)
require.False(t, sch.registry.exists(key))
})
})