From 05d6813a09eae9fdc9e97585e64245f9a1964451 Mon Sep 17 00:00:00 2001 From: Yuri Tseretyan Date: Fri, 17 May 2024 11:38:19 -0400 Subject: [PATCH] Alerting: Fix scheduler to sort rules before evaluation (#88006) sort rules scheduled for evaluation to make sure that the order is stable between evaluations. This is especially important in HA mode. --- pkg/services/ngalert/schedule/schedule.go | 5 +++ .../ngalert/schedule/schedule_unit_test.go | 42 +++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/pkg/services/ngalert/schedule/schedule.go b/pkg/services/ngalert/schedule/schedule.go index f6e4ca710a1..b8759a5220d 100644 --- a/pkg/services/ngalert/schedule/schedule.go +++ b/pkg/services/ngalert/schedule/schedule.go @@ -4,6 +4,8 @@ import ( "context" "fmt" "net/url" + "slices" + "strings" "time" "github.com/benbjohnson/clock" @@ -325,6 +327,9 @@ func (sch *schedule) processTick(ctx context.Context, dispatcherGroup *errgroup. step = sch.baseInterval.Nanoseconds() / int64(len(readyToRun)) } + slices.SortFunc(readyToRun, func(a, b readyToRunItem) int { + return strings.Compare(a.rule.UID, b.rule.UID) + }) for i := range readyToRun { item := readyToRun[i] diff --git a/pkg/services/ngalert/schedule/schedule_unit_test.go b/pkg/services/ngalert/schedule/schedule_unit_test.go index 50258cb2247..b71da365056 100644 --- a/pkg/services/ngalert/schedule/schedule_unit_test.go +++ b/pkg/services/ngalert/schedule/schedule_unit_test.go @@ -7,6 +7,7 @@ import ( "fmt" "math/rand" "net/url" + "slices" "testing" "time" @@ -354,6 +355,47 @@ func TestProcessTicks(t *testing.T) { require.Len(t, updated, 1) require.Equal(t, expectedUpdated, updated[0]) }) + t.Run("on 12th tick all rules should be stopped", func(t *testing.T) { + expectedToBeStopped, err := ruleStore.GetAlertRulesKeysForScheduling(ctx) + require.NoError(t, err) + + ruleStore.rules = map[string]*models.AlertRule{} + tick = tick.Add(cfg.BaseInterval) + scheduled, stopped, updated := sched.processTick(ctx, dispatcherGroup, tick) + + require.Emptyf(t, scheduled, "None rules should be scheduled") + + require.Len(t, stopped, len(expectedToBeStopped)) + + require.Emptyf(t, updated, "No rules should be updated") + }) + + t.Run("scheduled rules should be sorted", func(t *testing.T) { + rules := gen.With(gen.WithOrgID(mainOrgID), gen.WithInterval(cfg.BaseInterval)).GenerateManyRef(10, 20) + ruleStore.rules = map[string]*models.AlertRule{} + ruleStore.PutRule(context.Background(), rules...) + + expectedUids := make([]string, 0, len(rules)) + for _, rule := range rules { + expectedUids = append(expectedUids, rule.UID) + } + slices.Sort(expectedUids) + + tick = tick.Add(cfg.BaseInterval) + + scheduled, stopped, updated := sched.processTick(ctx, dispatcherGroup, tick) + require.Emptyf(t, stopped, "None rules are expected to be stopped") + require.Emptyf(t, updated, "None rules are expected to be updated") + + actualUids := make([]string, 0, len(scheduled)) + for _, rule := range scheduled { + actualUids = append(actualUids, rule.rule.UID) + } + + require.Len(t, scheduled, len(rules)) + assert.Truef(t, slices.IsSorted(actualUids), "The scheduler rules should be sorted by UID but they aren't") + require.Equal(t, expectedUids, actualUids) + }) } func TestSchedule_deleteAlertRule(t *testing.T) {