mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
* Add health fields to rules and an aggregator method to the scheduler * Move health, last error, and last eval time in together to minimize state processing * Wire up a readonly scheduler to prom api * Extract to exported function * Use health in api_prometheus and fix up tests * Rename health struct to status * Fix tests one more time * Several new tests * Handle inactive rules * Push state mapping into state manager * rename to StatusReader * Rectify cyclo complexity rebase * Convert existing package local status implementation to models one * fix tests * undo RuleDefs rename
1127 lines
46 KiB
Go
1127 lines
46 KiB
Go
package schedule
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"math/rand"
|
|
"net/url"
|
|
"slices"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/benbjohnson/clock"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/testutil"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/mock"
|
|
"github.com/stretchr/testify/require"
|
|
"golang.org/x/sync/errgroup"
|
|
|
|
"github.com/grafana/grafana/pkg/expr"
|
|
"github.com/grafana/grafana/pkg/infra/log"
|
|
"github.com/grafana/grafana/pkg/infra/tracing"
|
|
datasources "github.com/grafana/grafana/pkg/services/datasources/fakes"
|
|
"github.com/grafana/grafana/pkg/services/featuremgmt"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/models"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/state"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/writer"
|
|
"github.com/grafana/grafana/pkg/setting"
|
|
)
|
|
|
|
type evalAppliedInfo struct {
|
|
alertDefKey models.AlertRuleKey
|
|
now time.Time
|
|
}
|
|
|
|
func TestProcessTicks(t *testing.T) {
|
|
testTracer := tracing.InitializeTracerForTest()
|
|
reg := prometheus.NewPedanticRegistry()
|
|
testMetrics := metrics.NewNGAlert(reg)
|
|
ctx := context.Background()
|
|
dispatcherGroup, ctx := errgroup.WithContext(ctx)
|
|
|
|
ruleStore := newFakeRulesStore()
|
|
|
|
cfg := setting.UnifiedAlertingSettings{
|
|
BaseInterval: 1 * time.Second,
|
|
AdminConfigPollInterval: 10 * time.Minute, // do not poll in unit tests.
|
|
}
|
|
|
|
const mainOrgID int64 = 1
|
|
|
|
mockedClock := clock.NewMock()
|
|
|
|
notifier := NewSyncAlertsSenderMock()
|
|
notifier.EXPECT().Send(mock.Anything, mock.Anything, mock.Anything).Return()
|
|
|
|
appUrl := &url.URL{
|
|
Scheme: "http",
|
|
Host: "localhost",
|
|
}
|
|
|
|
cacheServ := &datasources.FakeCacheService{}
|
|
evaluator := eval.NewEvaluatorFactory(setting.UnifiedAlertingSettings{}, cacheServ, expr.ProvideService(&setting.Cfg{ExpressionsEnabled: true}, nil, nil, featuremgmt.WithFeatures(), nil, tracing.InitializeTracerForTest()))
|
|
rrSet := setting.RecordingRuleSettings{
|
|
Enabled: true,
|
|
}
|
|
|
|
schedCfg := SchedulerCfg{
|
|
BaseInterval: cfg.BaseInterval,
|
|
C: mockedClock,
|
|
AppURL: appUrl,
|
|
EvaluatorFactory: evaluator,
|
|
RuleStore: ruleStore,
|
|
Metrics: testMetrics.GetSchedulerMetrics(),
|
|
AlertSender: notifier,
|
|
RecordingRulesCfg: rrSet,
|
|
Tracer: testTracer,
|
|
Log: log.New("ngalert.scheduler"),
|
|
}
|
|
managerCfg := state.ManagerCfg{
|
|
Metrics: testMetrics.GetStateMetrics(),
|
|
ExternalURL: nil,
|
|
InstanceStore: nil,
|
|
Images: &state.NoopImageService{},
|
|
Clock: mockedClock,
|
|
Historian: &state.FakeHistorian{},
|
|
Tracer: testTracer,
|
|
Log: log.New("ngalert.state.manager"),
|
|
}
|
|
st := state.NewManager(managerCfg, state.NewNoopPersister())
|
|
|
|
sched := NewScheduler(schedCfg, st)
|
|
|
|
evalAppliedCh := make(chan evalAppliedInfo, 1)
|
|
stopAppliedCh := make(chan models.AlertRuleKey, 1)
|
|
|
|
sched.evalAppliedFunc = func(alertDefKey models.AlertRuleKey, now time.Time) {
|
|
evalAppliedCh <- evalAppliedInfo{alertDefKey: alertDefKey, now: now}
|
|
}
|
|
sched.stopAppliedFunc = func(alertDefKey models.AlertRuleKey) {
|
|
stopAppliedCh <- alertDefKey
|
|
}
|
|
|
|
tick := time.Time{}
|
|
gen := models.RuleGen
|
|
// create alert rule under main org with one second interval
|
|
alertRule1 := gen.With(gen.WithOrgID(mainOrgID), gen.WithInterval(cfg.BaseInterval), gen.WithTitle("rule-1")).GenerateRef()
|
|
ruleStore.PutRule(ctx, alertRule1)
|
|
|
|
folderWithRuleGroup1 := fmt.Sprintf("%s;%s", ruleStore.getNamespaceTitle(alertRule1.NamespaceUID), alertRule1.RuleGroup)
|
|
|
|
t.Run("before 1st tick status should not be available", func(t *testing.T) {
|
|
_, ok := sched.Status(alertRule1.GetKey())
|
|
require.False(t, ok, "status for a rule should not be present before the scheduler has created it")
|
|
})
|
|
|
|
t.Run("on 1st tick alert rule should be evaluated", func(t *testing.T) {
|
|
tick = tick.Add(cfg.BaseInterval)
|
|
|
|
scheduled, stopped, updated := sched.processTick(ctx, dispatcherGroup, tick)
|
|
|
|
require.Len(t, scheduled, 1)
|
|
require.Equal(t, alertRule1, scheduled[0].rule)
|
|
require.Equal(t, tick, scheduled[0].scheduledAt)
|
|
require.Emptyf(t, stopped, "None rules are expected to be stopped")
|
|
require.Emptyf(t, updated, "None rules are expected to be updated")
|
|
assertEvalRun(t, evalAppliedCh, tick, alertRule1.GetKey())
|
|
})
|
|
|
|
t.Run("after 1st tick rule metrics should report one active alert rule", func(t *testing.T) {
|
|
expectedMetric := fmt.Sprintf(
|
|
`# HELP grafana_alerting_rule_group_rules The number of alert rules that are scheduled, by type and state.
|
|
# TYPE grafana_alerting_rule_group_rules gauge
|
|
grafana_alerting_rule_group_rules{org="%[1]d",rule_group="%[2]s",state="active",type="alerting"} 1
|
|
`, alertRule1.OrgID, folderWithRuleGroup1)
|
|
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_rule_group_rules")
|
|
require.NoError(t, err)
|
|
})
|
|
|
|
t.Run("after 1st tick status for rule should be available", func(t *testing.T) {
|
|
_, ok := sched.Status(alertRule1.GetKey())
|
|
require.True(t, ok, "status for a rule that just evaluated was not available")
|
|
// Interestingly, the rules in this test are randomised, and are sometimes invalid.
|
|
// Therefore, we can't reliably assert anything about the actual health. It might be error, it might not, depending on randomness.
|
|
// We are only testing that things were scheduled, not that the rule routine worked internally.
|
|
})
|
|
|
|
// add alert rule under main org with three base intervals
|
|
alertRule2 := gen.With(gen.WithOrgID(mainOrgID), gen.WithInterval(3*cfg.BaseInterval), gen.WithTitle("rule-2")).GenerateRef()
|
|
ruleStore.PutRule(ctx, alertRule2)
|
|
|
|
folderWithRuleGroup2 := fmt.Sprintf("%s;%s", ruleStore.getNamespaceTitle(alertRule2.NamespaceUID), alertRule2.RuleGroup)
|
|
|
|
t.Run("before 2nd tick status for rule should not be available", func(t *testing.T) {
|
|
_, ok := sched.Status(alertRule2.GetKey())
|
|
require.False(t, ok, "status for a rule should not be present before the scheduler has created it")
|
|
})
|
|
|
|
t.Run("on 2nd tick first alert rule should be evaluated", func(t *testing.T) {
|
|
tick = tick.Add(cfg.BaseInterval)
|
|
scheduled, stopped, updated := sched.processTick(ctx, dispatcherGroup, tick)
|
|
|
|
require.Len(t, scheduled, 1)
|
|
require.Equal(t, alertRule1, scheduled[0].rule)
|
|
require.Equal(t, tick, scheduled[0].scheduledAt)
|
|
require.Emptyf(t, stopped, "None rules are expected to be stopped")
|
|
require.Emptyf(t, updated, "None rules are expected to be updated")
|
|
assertEvalRun(t, evalAppliedCh, tick, alertRule1.GetKey())
|
|
})
|
|
|
|
t.Run("after 2nd tick rule metrics should report two active alert rules in two groups", func(t *testing.T) {
|
|
expectedMetric := fmt.Sprintf(
|
|
`# HELP grafana_alerting_rule_group_rules The number of alert rules that are scheduled, by type and state.
|
|
# TYPE grafana_alerting_rule_group_rules gauge
|
|
grafana_alerting_rule_group_rules{org="%[1]d",rule_group="%[2]s",state="active",type="alerting"} 1
|
|
grafana_alerting_rule_group_rules{org="%[1]d",rule_group="%[3]s",state="active",type="alerting"} 1
|
|
`, alertRule1.OrgID, folderWithRuleGroup1, folderWithRuleGroup2)
|
|
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_rule_group_rules")
|
|
require.NoError(t, err)
|
|
})
|
|
|
|
t.Run("on 3rd tick two alert rules should be evaluated", func(t *testing.T) {
|
|
tick = tick.Add(cfg.BaseInterval)
|
|
scheduled, stopped, updated := sched.processTick(ctx, dispatcherGroup, tick)
|
|
require.Len(t, scheduled, 2)
|
|
var keys []models.AlertRuleKey
|
|
for _, item := range scheduled {
|
|
keys = append(keys, item.rule.GetKey())
|
|
require.Equal(t, tick, item.scheduledAt)
|
|
}
|
|
require.Contains(t, keys, alertRule1.GetKey())
|
|
require.Contains(t, keys, alertRule2.GetKey())
|
|
|
|
require.Emptyf(t, stopped, "None rules are expected to be stopped")
|
|
require.Emptyf(t, updated, "None rules are expected to be updated")
|
|
assertEvalRun(t, evalAppliedCh, tick, keys...)
|
|
})
|
|
|
|
t.Run("after 3rd tick status for both rules should be available", func(t *testing.T) {
|
|
_, ok := sched.Status(alertRule1.GetKey())
|
|
require.True(t, ok, "status for a rule that just evaluated was not available")
|
|
_, ok = sched.Status(alertRule2.GetKey())
|
|
require.True(t, ok, "status for a rule that just evaluated was not available")
|
|
// Interestingly, the rules in this test are randomised, and are sometimes invalid.
|
|
// Therefore, we can't reliably assert anything about the actual health. It might be error, it might not, depending on randomness.
|
|
// We are only testing that things were scheduled, not that the rule routine worked internally.
|
|
})
|
|
|
|
t.Run("on 4th tick only one alert rule should be evaluated", func(t *testing.T) {
|
|
tick = tick.Add(cfg.BaseInterval)
|
|
scheduled, stopped, updated := sched.processTick(ctx, dispatcherGroup, tick)
|
|
|
|
require.Len(t, scheduled, 1)
|
|
require.Equal(t, alertRule1, scheduled[0].rule)
|
|
require.Equal(t, tick, scheduled[0].scheduledAt)
|
|
require.Emptyf(t, stopped, "None rules are expected to be stopped")
|
|
require.Emptyf(t, updated, "None rules are expected to be updated")
|
|
assertEvalRun(t, evalAppliedCh, tick, alertRule1.GetKey())
|
|
})
|
|
|
|
t.Run("on 5th tick an alert rule is paused (it still enters evaluation but it is early skipped)", func(t *testing.T) {
|
|
tick = tick.Add(cfg.BaseInterval)
|
|
|
|
alertRule1.IsPaused = true
|
|
|
|
scheduled, stopped, updated := sched.processTick(ctx, dispatcherGroup, tick)
|
|
|
|
require.Len(t, scheduled, 1)
|
|
require.Equal(t, alertRule1, scheduled[0].rule)
|
|
require.Equal(t, tick, scheduled[0].scheduledAt)
|
|
require.Emptyf(t, stopped, "None rules are expected to be stopped")
|
|
require.Emptyf(t, updated, "None rules are expected to be updated")
|
|
assertEvalRun(t, evalAppliedCh, tick, alertRule1.GetKey())
|
|
})
|
|
|
|
t.Run("after 5th tick rule metrics should report one active and one paused alert rules in two groups", func(t *testing.T) {
|
|
expectedMetric := fmt.Sprintf(
|
|
`# HELP grafana_alerting_rule_group_rules The number of alert rules that are scheduled, by type and state.
|
|
# TYPE grafana_alerting_rule_group_rules gauge
|
|
grafana_alerting_rule_group_rules{org="%[1]d",rule_group="%[2]s",state="paused",type="alerting"} 1
|
|
grafana_alerting_rule_group_rules{org="%[1]d",rule_group="%[3]s",state="active",type="alerting"} 1
|
|
`, alertRule1.OrgID, folderWithRuleGroup1, folderWithRuleGroup2)
|
|
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_rule_group_rules")
|
|
require.NoError(t, err)
|
|
})
|
|
|
|
t.Run("after 5th tick status for both rules should be available regardless of pause state", func(t *testing.T) {
|
|
_, ok := sched.Status(alertRule1.GetKey())
|
|
require.True(t, ok, "status for a rule that just evaluated was not available")
|
|
_, ok = sched.Status(alertRule2.GetKey())
|
|
require.True(t, ok, "status for a rule that just evaluated was not available")
|
|
// Interestingly, the rules in this test are randomised, and are sometimes invalid.
|
|
// Therefore, we can't reliably assert anything about the actual health. It might be error, it might not, depending on randomness.
|
|
// We are only testing that things were scheduled, not that the rule routine worked internally.
|
|
})
|
|
|
|
t.Run("on 6th tick all alert rule are paused (it still enters evaluation but it is early skipped)", func(t *testing.T) {
|
|
tick = tick.Add(cfg.BaseInterval)
|
|
|
|
alertRule2.IsPaused = true
|
|
|
|
scheduled, stopped, updated := sched.processTick(ctx, dispatcherGroup, tick)
|
|
|
|
require.Len(t, scheduled, 2)
|
|
var keys []models.AlertRuleKey
|
|
for _, item := range scheduled {
|
|
keys = append(keys, item.rule.GetKey())
|
|
require.Equal(t, tick, item.scheduledAt)
|
|
}
|
|
require.Contains(t, keys, alertRule1.GetKey())
|
|
require.Contains(t, keys, alertRule2.GetKey())
|
|
|
|
require.Emptyf(t, stopped, "None rules are expected to be stopped")
|
|
require.Emptyf(t, updated, "None rules are expected to be updated")
|
|
assertEvalRun(t, evalAppliedCh, tick, keys...)
|
|
})
|
|
|
|
t.Run("after 6th tick rule metrics should report two paused alert rules in two groups", func(t *testing.T) {
|
|
expectedMetric := fmt.Sprintf(
|
|
`# HELP grafana_alerting_rule_group_rules The number of alert rules that are scheduled, by type and state.
|
|
# TYPE grafana_alerting_rule_group_rules gauge
|
|
grafana_alerting_rule_group_rules{org="%[1]d",rule_group="%[2]s",state="paused",type="alerting"} 1
|
|
grafana_alerting_rule_group_rules{org="%[1]d",rule_group="%[3]s",state="paused",type="alerting"} 1
|
|
|
|
`, alertRule1.OrgID, folderWithRuleGroup1, folderWithRuleGroup2)
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_rule_group_rules")
|
|
require.NoError(t, err)
|
|
})
|
|
|
|
t.Run("on 7th tick unpause all alert rules", func(t *testing.T) {
|
|
tick = tick.Add(cfg.BaseInterval)
|
|
|
|
alertRule1.IsPaused = false
|
|
alertRule2.IsPaused = false
|
|
|
|
scheduled, stopped, updated := sched.processTick(ctx, dispatcherGroup, tick)
|
|
|
|
require.Len(t, scheduled, 1)
|
|
require.Equal(t, alertRule1, scheduled[0].rule)
|
|
require.Equal(t, tick, scheduled[0].scheduledAt)
|
|
require.Emptyf(t, stopped, "None rules are expected to be stopped")
|
|
require.Emptyf(t, updated, "None rules are expected to be updated")
|
|
assertEvalRun(t, evalAppliedCh, tick, alertRule1.GetKey())
|
|
})
|
|
|
|
t.Run("after 7th tick rule metrics should report two active alert rules in two groups", func(t *testing.T) {
|
|
expectedMetric := fmt.Sprintf(
|
|
`# HELP grafana_alerting_rule_group_rules The number of alert rules that are scheduled, by type and state.
|
|
# TYPE grafana_alerting_rule_group_rules gauge
|
|
grafana_alerting_rule_group_rules{org="%[1]d",rule_group="%[2]s",state="active",type="alerting"} 1
|
|
grafana_alerting_rule_group_rules{org="%[1]d",rule_group="%[3]s",state="active",type="alerting"} 1
|
|
`, alertRule1.OrgID, folderWithRuleGroup1, folderWithRuleGroup2)
|
|
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_rule_group_rules")
|
|
require.NoError(t, err)
|
|
})
|
|
|
|
t.Run("on 8th tick deleted rule should not be evaluated but stopped", func(t *testing.T) {
|
|
tick = tick.Add(cfg.BaseInterval)
|
|
|
|
ruleStore.DeleteRule(alertRule1)
|
|
|
|
scheduled, stopped, updated := sched.processTick(ctx, dispatcherGroup, tick)
|
|
|
|
require.Empty(t, scheduled)
|
|
require.Len(t, stopped, 1)
|
|
require.Emptyf(t, updated, "None rules are expected to be updated")
|
|
require.Contains(t, stopped, alertRule1.GetKey())
|
|
|
|
assertStopRun(t, stopAppliedCh, alertRule1.GetKey())
|
|
})
|
|
|
|
t.Run("after 8th tick rule metrics should report one active alert rule", func(t *testing.T) {
|
|
expectedMetric := fmt.Sprintf(
|
|
`# HELP grafana_alerting_rule_group_rules The number of alert rules that are scheduled, by type and state.
|
|
# TYPE grafana_alerting_rule_group_rules gauge
|
|
grafana_alerting_rule_group_rules{org="%[1]d",rule_group="%[2]s",state="active",type="alerting"} 1
|
|
`, alertRule1.OrgID, folderWithRuleGroup2)
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_rule_group_rules")
|
|
require.NoError(t, err)
|
|
})
|
|
|
|
t.Run("after 8th tick status for deleted rule should not be available", func(t *testing.T) {
|
|
_, ok := sched.Status(alertRule1.GetKey())
|
|
require.False(t, ok, "status for a rule that was deleted should not be available")
|
|
_, ok = sched.Status(alertRule2.GetKey())
|
|
require.True(t, ok, "status for a rule that just evaluated was not available")
|
|
})
|
|
|
|
t.Run("on 9th tick one alert rule should be evaluated", func(t *testing.T) {
|
|
tick = tick.Add(cfg.BaseInterval)
|
|
|
|
scheduled, stopped, updated := sched.processTick(ctx, dispatcherGroup, tick)
|
|
|
|
require.Len(t, scheduled, 1)
|
|
require.Equal(t, alertRule2, scheduled[0].rule)
|
|
require.Equal(t, tick, scheduled[0].scheduledAt)
|
|
require.Emptyf(t, stopped, "None rules are expected to be stopped")
|
|
require.Emptyf(t, updated, "None rules are expected to be updated")
|
|
assertEvalRun(t, evalAppliedCh, tick, alertRule2.GetKey())
|
|
})
|
|
|
|
// create alert rule with one base interval
|
|
alertRule3 := gen.With(gen.WithOrgID(mainOrgID), gen.WithInterval(cfg.BaseInterval), gen.WithTitle("rule-3")).GenerateRef()
|
|
ruleStore.PutRule(ctx, alertRule3)
|
|
|
|
t.Run("on 10th tick a new alert rule should be evaluated", func(t *testing.T) {
|
|
tick = tick.Add(cfg.BaseInterval)
|
|
|
|
scheduled, stopped, updated := sched.processTick(ctx, dispatcherGroup, tick)
|
|
|
|
require.Len(t, scheduled, 1)
|
|
require.Equal(t, alertRule3, scheduled[0].rule)
|
|
require.Equal(t, tick, scheduled[0].scheduledAt)
|
|
require.Emptyf(t, stopped, "None rules are expected to be stopped")
|
|
require.Emptyf(t, updated, "None rules are expected to be updated")
|
|
assertEvalRun(t, evalAppliedCh, tick, alertRule3.GetKey())
|
|
})
|
|
t.Run("after 10th tick status for remaining rules should be available", func(t *testing.T) {
|
|
_, ok := sched.Status(alertRule1.GetKey())
|
|
require.False(t, ok, "status for a rule that was deleted should not be available")
|
|
_, ok = sched.Status(alertRule2.GetKey())
|
|
require.True(t, ok, "status for a rule that just evaluated was not available")
|
|
_, ok = sched.Status(alertRule3.GetKey())
|
|
require.True(t, ok, "status for a rule that just evaluated was not available")
|
|
})
|
|
t.Run("on 11th tick rule2 should be updated", func(t *testing.T) {
|
|
newRule2 := models.CopyRule(alertRule2)
|
|
newRule2.Version++
|
|
expectedUpdated := models.AlertRuleKeyWithVersion{
|
|
Version: newRule2.Version,
|
|
AlertRuleKey: newRule2.GetKey(),
|
|
}
|
|
|
|
ruleStore.PutRule(context.Background(), newRule2)
|
|
|
|
tick = tick.Add(cfg.BaseInterval)
|
|
scheduled, stopped, updated := sched.processTick(ctx, dispatcherGroup, tick)
|
|
|
|
require.Len(t, scheduled, 1)
|
|
require.Equal(t, alertRule3, scheduled[0].rule)
|
|
require.Equal(t, tick, scheduled[0].scheduledAt)
|
|
|
|
require.Emptyf(t, stopped, "None rules are expected to be stopped")
|
|
|
|
require.Len(t, updated, 1)
|
|
require.Equal(t, expectedUpdated, updated[0])
|
|
})
|
|
|
|
// Add a recording rule with 2 * base interval.
|
|
recordingRule1 := gen.With(gen.WithOrgID(mainOrgID), gen.WithInterval(2*cfg.BaseInterval), gen.WithTitle("recording-1"), gen.WithAllRecordingRules()).GenerateRef()
|
|
ruleStore.PutRule(ctx, recordingRule1)
|
|
|
|
t.Run("on 12th tick recording rule and alert rules should be evaluated", func(t *testing.T) {
|
|
tick = tick.Add(cfg.BaseInterval)
|
|
|
|
scheduled, stopped, updated := sched.processTick(ctx, dispatcherGroup, tick)
|
|
|
|
require.Len(t, scheduled, 3)
|
|
require.Emptyf(t, stopped, "No rules are expected to be stopped")
|
|
require.Emptyf(t, updated, "No rules are expected to be updated")
|
|
contains := false
|
|
for _, sch := range scheduled {
|
|
if sch.rule.Title == recordingRule1.Title {
|
|
contains = true
|
|
}
|
|
}
|
|
require.True(t, contains, "Expected a scheduled rule with title %s but didn't get one, scheduled rules were %v", recordingRule1.Title, scheduled)
|
|
})
|
|
|
|
// Update the recording rule.
|
|
recordingRule1 = models.CopyRule(recordingRule1)
|
|
recordingRule1.Version++
|
|
expectedUpdated := models.AlertRuleKeyWithVersion{
|
|
Version: recordingRule1.Version,
|
|
AlertRuleKey: recordingRule1.GetKey(),
|
|
}
|
|
ruleStore.PutRule(context.Background(), recordingRule1)
|
|
|
|
t.Run("on 13th tick recording rule should be updated", func(t *testing.T) {
|
|
// It has 2 * base interval - so normally it would not have been scheduled for evaluation this tick.
|
|
tick = tick.Add(cfg.BaseInterval)
|
|
scheduled, stopped, updated := sched.processTick(ctx, dispatcherGroup, tick)
|
|
|
|
require.Len(t, scheduled, 1)
|
|
require.Emptyf(t, stopped, "No rules are expected to be stopped")
|
|
require.Len(t, updated, 1)
|
|
require.Equal(t, expectedUpdated, updated[0])
|
|
assertScheduledContains(t, scheduled, alertRule3)
|
|
})
|
|
|
|
t.Run("on 14th tick both 1-tick alert rule and 2-tick recording rule should be evaluated", func(t *testing.T) {
|
|
tick = tick.Add(cfg.BaseInterval)
|
|
|
|
scheduled, stopped, updated := sched.processTick(ctx, dispatcherGroup, tick)
|
|
|
|
require.Len(t, scheduled, 2)
|
|
require.Emptyf(t, stopped, "No rules are expected to be stopped")
|
|
require.Emptyf(t, updated, "No rules are expected to be updated")
|
|
assertScheduledContains(t, scheduled, alertRule3)
|
|
assertScheduledContains(t, scheduled, recordingRule1)
|
|
})
|
|
|
|
// Convert an alerting rule to a recording rule.
|
|
models.ConvertToRecordingRule(alertRule3)
|
|
alertRule3.Version++
|
|
ruleStore.PutRule(ctx, alertRule3)
|
|
|
|
t.Run("prior to 15th tick alertRule3 should still be scheduled as alerting rule", func(t *testing.T) {
|
|
require.Equal(t, models.RuleTypeAlerting, sched.registry.rules[alertRule3.GetKey()].Type())
|
|
})
|
|
|
|
t.Run("on 15th tick converted rule and 3-tick alert rule should be evaluated", func(t *testing.T) {
|
|
tick = tick.Add(cfg.BaseInterval)
|
|
scheduled, stopped, updated := sched.processTick(ctx, dispatcherGroup, tick)
|
|
|
|
require.Len(t, scheduled, 2)
|
|
require.Emptyf(t, stopped, "No rules are expected to be stopped")
|
|
// We never sent the Updated command to the restarted rule, so this should be empty.
|
|
require.Emptyf(t, updated, "No rules are expected to be updated")
|
|
|
|
assertScheduledContains(t, scheduled, alertRule2)
|
|
assertScheduledContains(t, scheduled, alertRule3) // converted
|
|
// Rule in registry should be updated to the correct type.
|
|
require.Equal(t, models.RuleTypeRecording, sched.registry.rules[alertRule3.GetKey()].Type())
|
|
})
|
|
|
|
t.Run("on 16th tick converted rule and 2-tick recording rule should be evaluated", func(t *testing.T) {
|
|
tick = tick.Add(cfg.BaseInterval)
|
|
scheduled, stopped, updated := sched.processTick(ctx, dispatcherGroup, tick)
|
|
|
|
require.Len(t, scheduled, 2)
|
|
require.Emptyf(t, stopped, "No rules are expected to be stopped")
|
|
require.Emptyf(t, updated, "No rules are expected to be updated")
|
|
assertScheduledContains(t, scheduled, recordingRule1)
|
|
assertScheduledContains(t, scheduled, alertRule3)
|
|
})
|
|
|
|
t.Run("on 17th tick all rules should be stopped", func(t *testing.T) {
|
|
expectedToBeStopped, err := ruleStore.GetAlertRulesKeysForScheduling(ctx)
|
|
require.NoError(t, err)
|
|
|
|
// Remove all rules from store.
|
|
ruleStore.rules = map[string]*models.AlertRule{}
|
|
tick = tick.Add(cfg.BaseInterval)
|
|
scheduled, stopped, updated := sched.processTick(ctx, dispatcherGroup, tick)
|
|
|
|
require.Emptyf(t, scheduled, "None rules should be scheduled")
|
|
|
|
require.Len(t, stopped, len(expectedToBeStopped))
|
|
|
|
require.Emptyf(t, updated, "No rules should be updated")
|
|
})
|
|
t.Run("after 12th tick no status should be available", func(t *testing.T) {
|
|
_, ok := sched.Status(alertRule1.GetKey())
|
|
require.False(t, ok, "status for a rule that was deleted should not be available")
|
|
_, ok = sched.Status(alertRule2.GetKey())
|
|
require.False(t, ok, "status for a rule that just evaluated was not available")
|
|
_, ok = sched.Status(alertRule3.GetKey())
|
|
require.False(t, ok, "status for a rule that just evaluated was not available")
|
|
})
|
|
|
|
t.Run("scheduled rules should be sorted", func(t *testing.T) {
|
|
rules := gen.With(gen.WithOrgID(mainOrgID), gen.WithInterval(cfg.BaseInterval)).GenerateManyRef(10, 20)
|
|
ruleStore.rules = map[string]*models.AlertRule{}
|
|
ruleStore.PutRule(context.Background(), rules...)
|
|
|
|
expectedUids := make([]string, 0, len(rules))
|
|
for _, rule := range rules {
|
|
expectedUids = append(expectedUids, rule.UID)
|
|
}
|
|
slices.Sort(expectedUids)
|
|
|
|
tick = tick.Add(cfg.BaseInterval)
|
|
|
|
scheduled, stopped, updated := sched.processTick(ctx, dispatcherGroup, tick)
|
|
require.Emptyf(t, stopped, "None rules are expected to be stopped")
|
|
require.Emptyf(t, updated, "None rules are expected to be updated")
|
|
|
|
actualUids := make([]string, 0, len(scheduled))
|
|
for _, rule := range scheduled {
|
|
actualUids = append(actualUids, rule.rule.UID)
|
|
}
|
|
|
|
require.Len(t, scheduled, len(rules))
|
|
assert.Truef(t, slices.IsSorted(actualUids), "The scheduler rules should be sorted by UID but they aren't")
|
|
require.Equal(t, expectedUids, actualUids)
|
|
})
|
|
}
|
|
|
|
func TestSchedule_updateRulesMetrics(t *testing.T) {
|
|
ruleStore := newFakeRulesStore()
|
|
reg := prometheus.NewPedanticRegistry()
|
|
sch := setupScheduler(t, ruleStore, nil, reg, nil, nil)
|
|
ctx := context.Background()
|
|
const firstOrgID int64 = 1
|
|
|
|
t.Run("grafana_alerting_rule_group_rules metric should reflect the current state", func(t *testing.T) {
|
|
// Without any rules there are no metrics
|
|
t.Run("it should not show metrics", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{})
|
|
|
|
expectedMetric := ""
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_rule_group_rules")
|
|
require.ErrorContains(t, err, fmt.Sprintf("expected metric name(s) not found: [%v]", "grafana_alerting_rule_group_rules"))
|
|
})
|
|
|
|
alertRule1 := models.RuleGen.With(models.RuleGen.WithOrgID(firstOrgID)).GenerateRef()
|
|
folderWithRuleGroup1 := fmt.Sprintf("%s;%s", ruleStore.getNamespaceTitle(alertRule1.NamespaceUID), alertRule1.RuleGroup)
|
|
ruleStore.PutRule(ctx, alertRule1)
|
|
|
|
_, err := sch.updateSchedulableAlertRules(ctx) // to update folderTitles
|
|
require.NoError(t, err)
|
|
|
|
t.Run("it should show one active rule in a single group", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{alertRule1})
|
|
|
|
expectedMetric := fmt.Sprintf(
|
|
`# HELP grafana_alerting_rule_group_rules The number of alert rules that are scheduled, by type and state.
|
|
# TYPE grafana_alerting_rule_group_rules gauge
|
|
grafana_alerting_rule_group_rules{org="%[1]d",rule_group="%[2]s",state="active", type="alerting"} 1
|
|
`, alertRule1.OrgID, folderWithRuleGroup1)
|
|
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_rule_group_rules")
|
|
require.NoError(t, err)
|
|
})
|
|
|
|
// Add a new rule alertRule2 and check that it is reflected in the metrics
|
|
alertRule2 := models.RuleGen.With(models.RuleGen.WithOrgID(firstOrgID)).GenerateRef()
|
|
folderWithRuleGroup2 := fmt.Sprintf("%s;%s", ruleStore.getNamespaceTitle(alertRule2.NamespaceUID), alertRule2.RuleGroup)
|
|
ruleStore.PutRule(ctx, alertRule2)
|
|
|
|
_, err = sch.updateSchedulableAlertRules(ctx) // to update folderTitles
|
|
require.NoError(t, err)
|
|
|
|
t.Run("it should show two active rules in two groups", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{alertRule1, alertRule2})
|
|
|
|
expectedMetric := fmt.Sprintf(
|
|
`# HELP grafana_alerting_rule_group_rules The number of alert rules that are scheduled, by type and state.
|
|
# TYPE grafana_alerting_rule_group_rules gauge
|
|
grafana_alerting_rule_group_rules{org="%[1]d",rule_group="%[2]s",state="active",type="alerting"} 1
|
|
grafana_alerting_rule_group_rules{org="%[1]d",rule_group="%[3]s",state="active",type="alerting"} 1
|
|
`, alertRule1.OrgID, folderWithRuleGroup1, folderWithRuleGroup2)
|
|
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_rule_group_rules")
|
|
require.NoError(t, err)
|
|
})
|
|
|
|
// Now remove the alertRule2
|
|
t.Run("it should show one active rules in one groups", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{alertRule1, alertRule2})
|
|
|
|
expectedMetric := fmt.Sprintf(
|
|
`# HELP grafana_alerting_rule_group_rules The number of alert rules that are scheduled, by type and state.
|
|
# TYPE grafana_alerting_rule_group_rules gauge
|
|
grafana_alerting_rule_group_rules{org="%[1]d",rule_group="%[2]s",state="active",type="alerting"} 1
|
|
grafana_alerting_rule_group_rules{org="%[1]d",rule_group="%[3]s",state="active",type="alerting"} 1
|
|
`, alertRule1.OrgID, folderWithRuleGroup1, folderWithRuleGroup2)
|
|
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_rule_group_rules")
|
|
require.NoError(t, err)
|
|
})
|
|
|
|
// and remove the alertRule1 so there should be no metrics now
|
|
t.Run("it should show one active rules in one groups", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{})
|
|
|
|
expectedMetric := ""
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_rule_group_rules")
|
|
require.ErrorContains(t, err, "expected metric name(s) not found: [grafana_alerting_rule_group_rules]")
|
|
})
|
|
})
|
|
|
|
t.Run("rule_groups metric should reflect the current state", func(t *testing.T) {
|
|
const firstOrgID int64 = 1
|
|
const secondOrgID int64 = 2
|
|
|
|
// Without any rules there are no metrics
|
|
t.Run("it should not show metrics", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{})
|
|
|
|
expectedMetric := ""
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_rule_groups")
|
|
require.ErrorContains(t, err, "expected metric name(s) not found: [grafana_alerting_rule_groups]")
|
|
})
|
|
|
|
alertRule1 := models.RuleGen.With(models.RuleGen.WithOrgID(firstOrgID)).GenerateRef()
|
|
|
|
t.Run("it should show one rule group in a single org", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{alertRule1})
|
|
|
|
expectedMetric := fmt.Sprintf(
|
|
`# HELP grafana_alerting_rule_groups The number of alert rule groups
|
|
# TYPE grafana_alerting_rule_groups gauge
|
|
grafana_alerting_rule_groups{org="%[1]d"} 1
|
|
`, alertRule1.OrgID)
|
|
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_rule_groups")
|
|
require.NoError(t, err)
|
|
})
|
|
|
|
alertRule2 := models.RuleGen.With(models.RuleGen.WithOrgID(secondOrgID)).GenerateRef()
|
|
|
|
t.Run("it should show two rule groups in two orgs", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{alertRule1, alertRule2})
|
|
|
|
expectedMetric := fmt.Sprintf(
|
|
`# HELP grafana_alerting_rule_groups The number of alert rule groups
|
|
# TYPE grafana_alerting_rule_groups gauge
|
|
grafana_alerting_rule_groups{org="%[1]d"} 1
|
|
grafana_alerting_rule_groups{org="%[2]d"} 1
|
|
`, alertRule1.OrgID, alertRule2.OrgID)
|
|
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_rule_groups")
|
|
require.NoError(t, err)
|
|
})
|
|
|
|
t.Run("when the first rule is removed it should show one rule group", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{alertRule2})
|
|
|
|
expectedMetric := fmt.Sprintf(
|
|
`# HELP grafana_alerting_rule_groups The number of alert rule groups
|
|
# TYPE grafana_alerting_rule_groups gauge
|
|
grafana_alerting_rule_groups{org="%[1]d"} 1
|
|
`, alertRule2.OrgID)
|
|
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_rule_groups")
|
|
require.NoError(t, err)
|
|
})
|
|
})
|
|
|
|
t.Run("simple_routing_rules metric should reflect the current state", func(t *testing.T) {
|
|
const firstOrgID int64 = 1
|
|
const secondOrgID int64 = 2
|
|
|
|
// Has no NotificationSettings, should not be in the metrics
|
|
alertRuleWithoutNotificationSettings := models.RuleGen.With(
|
|
models.RuleGen.WithOrgID(firstOrgID),
|
|
models.RuleGen.WithNoNotificationSettings(),
|
|
).GenerateRef()
|
|
|
|
// Without any rules there are no metrics
|
|
t.Run("it should not show metrics", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{alertRuleWithoutNotificationSettings})
|
|
|
|
expectedMetric := ""
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_simple_routing_rules")
|
|
require.ErrorContains(t, err, "expected metric name(s) not found: [grafana_alerting_simple_routing_rules]")
|
|
})
|
|
|
|
alertRule1 := models.RuleGen.With(
|
|
models.RuleGen.WithOrgID(firstOrgID),
|
|
models.RuleGen.WithNotificationSettingsGen(models.NotificationSettingsGen()),
|
|
).GenerateRef()
|
|
|
|
t.Run("it should show one rule in a single org", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{alertRuleWithoutNotificationSettings, alertRule1})
|
|
|
|
expectedMetric := fmt.Sprintf(
|
|
`# HELP grafana_alerting_simple_routing_rules The number of alert rules using simplified routing.
|
|
# TYPE grafana_alerting_simple_routing_rules gauge
|
|
grafana_alerting_simple_routing_rules{org="%[1]d"} 1
|
|
`, alertRule1.OrgID)
|
|
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_simple_routing_rules")
|
|
require.NoError(t, err)
|
|
})
|
|
|
|
alertRule2 := models.RuleGen.With(
|
|
models.RuleGen.WithOrgID(secondOrgID),
|
|
models.RuleGen.WithNotificationSettingsGen(models.NotificationSettingsGen()),
|
|
).GenerateRef()
|
|
|
|
t.Run("it should show two rules in two orgs", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{alertRuleWithoutNotificationSettings, alertRule1, alertRule2})
|
|
|
|
expectedMetric := fmt.Sprintf(
|
|
`# HELP grafana_alerting_simple_routing_rules The number of alert rules using simplified routing.
|
|
# TYPE grafana_alerting_simple_routing_rules gauge
|
|
grafana_alerting_simple_routing_rules{org="%[1]d"} 1
|
|
grafana_alerting_simple_routing_rules{org="%[2]d"} 1
|
|
`, alertRule1.OrgID, alertRule2.OrgID)
|
|
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_simple_routing_rules")
|
|
require.NoError(t, err)
|
|
})
|
|
|
|
t.Run("after removing one of the rules it should show one present rule and two org", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{alertRuleWithoutNotificationSettings, alertRule2})
|
|
|
|
// Because alertRuleWithoutNotificationSettings.orgID is present,
|
|
// the metric is also present but set to 0 because the org has no rules with NotificationSettings.
|
|
expectedMetric := fmt.Sprintf(
|
|
`# HELP grafana_alerting_simple_routing_rules The number of alert rules using simplified routing.
|
|
# TYPE grafana_alerting_simple_routing_rules gauge
|
|
grafana_alerting_simple_routing_rules{org="%[2]d"} 1
|
|
`, alertRuleWithoutNotificationSettings.OrgID, alertRule2.OrgID)
|
|
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_simple_routing_rules")
|
|
require.NoError(t, err)
|
|
})
|
|
|
|
t.Run("after removing all rules it should not show any metrics", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{})
|
|
|
|
expectedMetric := ""
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_simple_routing_rules")
|
|
require.ErrorContains(t, err, "expected metric name(s) not found: [grafana_alerting_simple_routing_rules]")
|
|
})
|
|
})
|
|
|
|
t.Run("simplified_editor_rules metric should reflect the current state", func(t *testing.T) {
|
|
const firstOrgID int64 = 1
|
|
const secondOrgID int64 = 2
|
|
|
|
alertRuleWithAdvancedSettings := models.RuleGen.With(
|
|
models.RuleGen.WithOrgID(firstOrgID),
|
|
models.RuleGen.WithEditorSettingsSimplifiedQueryAndExpressionsSection(false),
|
|
).GenerateRef()
|
|
|
|
// The rule does not have simplified editor enabled, should not be in the metrics
|
|
t.Run("it should not show metrics", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{alertRuleWithAdvancedSettings})
|
|
|
|
expectedMetric := ""
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_simplified_editor_rules")
|
|
require.ErrorContains(t, err, "expected metric name(s) not found: [grafana_alerting_simplified_editor_rules]")
|
|
})
|
|
|
|
alertRule1 := models.RuleGen.With(
|
|
models.RuleGen.WithOrgID(firstOrgID),
|
|
models.RuleGen.WithEditorSettingsSimplifiedQueryAndExpressionsSection(true),
|
|
).GenerateRef()
|
|
|
|
t.Run("it should show one rule in a single org", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{alertRuleWithAdvancedSettings, alertRule1})
|
|
|
|
expectedMetric := fmt.Sprintf(
|
|
`# HELP grafana_alerting_simplified_editor_rules The number of alert rules using simplified editor settings.
|
|
# TYPE grafana_alerting_simplified_editor_rules gauge
|
|
grafana_alerting_simplified_editor_rules{org="%[1]d",setting="simplified_query_and_expressions_section"} 1
|
|
`, alertRule1.OrgID)
|
|
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_simplified_editor_rules")
|
|
require.NoError(t, err)
|
|
})
|
|
|
|
alertRule2 := models.RuleGen.With(
|
|
models.RuleGen.WithOrgID(secondOrgID),
|
|
models.RuleGen.WithEditorSettingsSimplifiedQueryAndExpressionsSection(true),
|
|
).GenerateRef()
|
|
|
|
t.Run("it should show two rules in two orgs", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{alertRuleWithAdvancedSettings, alertRule1, alertRule2})
|
|
|
|
expectedMetric := fmt.Sprintf(
|
|
`# HELP grafana_alerting_simplified_editor_rules The number of alert rules using simplified editor settings.
|
|
# TYPE grafana_alerting_simplified_editor_rules gauge
|
|
grafana_alerting_simplified_editor_rules{org="%[1]d",setting="simplified_query_and_expressions_section"} 1
|
|
grafana_alerting_simplified_editor_rules{org="%[2]d",setting="simplified_query_and_expressions_section"} 1
|
|
`, alertRule1.OrgID, alertRule2.OrgID)
|
|
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_simplified_editor_rules")
|
|
require.NoError(t, err)
|
|
})
|
|
|
|
t.Run("after removing one of the rules it should show one present rule and one org", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{alertRuleWithAdvancedSettings, alertRule2})
|
|
|
|
expectedMetric := fmt.Sprintf(
|
|
`# HELP grafana_alerting_simplified_editor_rules The number of alert rules using simplified editor settings.
|
|
# TYPE grafana_alerting_simplified_editor_rules gauge
|
|
grafana_alerting_simplified_editor_rules{org="%d",setting="simplified_query_and_expressions_section"} 1
|
|
`, alertRule2.OrgID)
|
|
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_simplified_editor_rules")
|
|
require.NoError(t, err)
|
|
})
|
|
|
|
t.Run("after removing all rules it should not show any metrics", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{})
|
|
|
|
expectedMetric := ""
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_simplified_editor_rules")
|
|
require.ErrorContains(t, err, "expected metric name(s) not found: [grafana_alerting_simplified_editor_rules]")
|
|
})
|
|
})
|
|
|
|
t.Run("rule_groups metric should reflect the current state", func(t *testing.T) {
|
|
const firstOrgID int64 = 1
|
|
const secondOrgID int64 = 2
|
|
|
|
// Without any rules there are no metrics
|
|
t.Run("it should not show metrics", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{})
|
|
|
|
expectedMetric := ""
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_rule_groups")
|
|
require.ErrorContains(t, err, "expected metric name(s) not found: [grafana_alerting_rule_groups]")
|
|
})
|
|
|
|
alertRule1 := models.RuleGen.With(models.RuleGen.WithOrgID(firstOrgID)).GenerateRef()
|
|
|
|
t.Run("it should show one rule group in a single org", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{alertRule1})
|
|
|
|
expectedMetric := fmt.Sprintf(
|
|
`# HELP grafana_alerting_rule_groups The number of alert rule groups
|
|
# TYPE grafana_alerting_rule_groups gauge
|
|
grafana_alerting_rule_groups{org="%[1]d"} 1
|
|
`, alertRule1.OrgID)
|
|
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_rule_groups")
|
|
require.NoError(t, err)
|
|
})
|
|
|
|
alertRule2 := models.RuleGen.With(models.RuleGen.WithOrgID(secondOrgID)).GenerateRef()
|
|
|
|
t.Run("it should show two rule groups in two orgs", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{alertRule1, alertRule2})
|
|
|
|
expectedMetric := fmt.Sprintf(
|
|
`# HELP grafana_alerting_rule_groups The number of alert rule groups
|
|
# TYPE grafana_alerting_rule_groups gauge
|
|
grafana_alerting_rule_groups{org="%[1]d"} 1
|
|
grafana_alerting_rule_groups{org="%[2]d"} 1
|
|
`, alertRule1.OrgID, alertRule2.OrgID)
|
|
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_rule_groups")
|
|
require.NoError(t, err)
|
|
})
|
|
|
|
t.Run("when the first rule is removed it should show one rule group", func(t *testing.T) {
|
|
sch.updateRulesMetrics([]*models.AlertRule{alertRule2})
|
|
|
|
expectedMetric := fmt.Sprintf(
|
|
`# HELP grafana_alerting_rule_groups The number of alert rule groups
|
|
# TYPE grafana_alerting_rule_groups gauge
|
|
grafana_alerting_rule_groups{org="%[1]d"} 1
|
|
`, alertRule2.OrgID)
|
|
|
|
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_rule_groups")
|
|
require.NoError(t, err)
|
|
})
|
|
})
|
|
}
|
|
|
|
func TestSchedule_deleteAlertRule(t *testing.T) {
|
|
t.Run("when rule exists", func(t *testing.T) {
|
|
t.Run("it should stop evaluation loop and remove the controller from registry", func(t *testing.T) {
|
|
sch := setupScheduler(t, nil, nil, nil, nil, nil)
|
|
ruleFactory := ruleFactoryFromScheduler(sch)
|
|
rule := models.RuleGen.GenerateRef()
|
|
key := rule.GetKey()
|
|
info, _ := sch.registry.getOrCreate(context.Background(), rule, ruleFactory)
|
|
sch.deleteAlertRule(key)
|
|
require.ErrorIs(t, info.(*alertRule).ctx.Err(), errRuleDeleted)
|
|
require.False(t, sch.registry.exists(key))
|
|
})
|
|
})
|
|
t.Run("when rule does not exist", func(t *testing.T) {
|
|
t.Run("should exit", func(t *testing.T) {
|
|
sch := setupScheduler(t, nil, nil, nil, nil, nil)
|
|
key := models.GenerateRuleKey(rand.Int63())
|
|
sch.deleteAlertRule(key)
|
|
})
|
|
})
|
|
}
|
|
|
|
func setupScheduler(t *testing.T, rs *fakeRulesStore, is *state.FakeInstanceStore, registry *prometheus.Registry, senderMock *SyncAlertsSenderMock, evalMock eval.EvaluatorFactory) *schedule {
|
|
t.Helper()
|
|
testTracer := tracing.InitializeTracerForTest()
|
|
|
|
mockedClock := clock.NewMock()
|
|
|
|
if rs == nil {
|
|
rs = newFakeRulesStore()
|
|
}
|
|
|
|
if is == nil {
|
|
is = &state.FakeInstanceStore{}
|
|
}
|
|
|
|
var evaluator = evalMock
|
|
if evalMock == nil {
|
|
evaluator = eval.NewEvaluatorFactory(setting.UnifiedAlertingSettings{}, &datasources.FakeCacheService{}, expr.ProvideService(&setting.Cfg{ExpressionsEnabled: true}, nil, nil, featuremgmt.WithFeatures(), nil, tracing.InitializeTracerForTest()))
|
|
}
|
|
|
|
if registry == nil {
|
|
registry = prometheus.NewPedanticRegistry()
|
|
}
|
|
m := metrics.NewNGAlert(registry)
|
|
|
|
appUrl := &url.URL{
|
|
Scheme: "http",
|
|
Host: "localhost",
|
|
}
|
|
|
|
if senderMock == nil {
|
|
senderMock = NewSyncAlertsSenderMock()
|
|
senderMock.EXPECT().Send(mock.Anything, mock.Anything, mock.Anything).Return()
|
|
}
|
|
|
|
cfg := setting.UnifiedAlertingSettings{
|
|
BaseInterval: time.Second,
|
|
MaxAttempts: 1,
|
|
RecordingRules: setting.RecordingRuleSettings{
|
|
Enabled: true,
|
|
},
|
|
}
|
|
|
|
fakeRecordingWriter := writer.FakeWriter{}
|
|
|
|
schedCfg := SchedulerCfg{
|
|
BaseInterval: cfg.BaseInterval,
|
|
MaxAttempts: cfg.MaxAttempts,
|
|
C: mockedClock,
|
|
AppURL: appUrl,
|
|
EvaluatorFactory: evaluator,
|
|
RuleStore: rs,
|
|
RecordingRulesCfg: cfg.RecordingRules,
|
|
Metrics: m.GetSchedulerMetrics(),
|
|
AlertSender: senderMock,
|
|
Tracer: testTracer,
|
|
Log: log.New("ngalert.scheduler"),
|
|
RecordingWriter: fakeRecordingWriter,
|
|
}
|
|
managerCfg := state.ManagerCfg{
|
|
Metrics: m.GetStateMetrics(),
|
|
ExternalURL: nil,
|
|
InstanceStore: is,
|
|
Images: &state.NoopImageService{},
|
|
Clock: mockedClock,
|
|
Historian: &state.FakeHistorian{},
|
|
Tracer: testTracer,
|
|
Log: log.New("ngalert.state.manager"),
|
|
MaxStateSaveConcurrency: 1,
|
|
}
|
|
syncStatePersister := state.NewSyncStatePersisiter(log.New("ngalert.state.manager.perist"), managerCfg)
|
|
st := state.NewManager(managerCfg, syncStatePersister)
|
|
|
|
return NewScheduler(schedCfg, st)
|
|
}
|
|
|
|
func withQueryForState(t *testing.T, evalResult eval.State) models.AlertRuleMutator {
|
|
var expression string
|
|
var forMultimplier int64 = 0
|
|
switch evalResult {
|
|
case eval.Normal:
|
|
expression = `{
|
|
"datasourceUid": "__expr__",
|
|
"type":"math",
|
|
"expression":"2 + 1 < 1"
|
|
}`
|
|
case eval.Pending, eval.Alerting:
|
|
expression = `{
|
|
"datasourceUid": "__expr__",
|
|
"type":"math",
|
|
"expression":"2 + 2 > 1"
|
|
}`
|
|
if evalResult == eval.Pending {
|
|
forMultimplier = rand.Int63n(9) + 1
|
|
}
|
|
case eval.Error:
|
|
expression = `{
|
|
"datasourceUid": "__expr__",
|
|
"type":"math",
|
|
"expression":"$A"
|
|
}`
|
|
default:
|
|
require.Fail(t, fmt.Sprintf("Alert rule with desired evaluation result '%s' is not supported yet", evalResult))
|
|
}
|
|
|
|
return func(rule *models.AlertRule) {
|
|
rule.Condition = "A"
|
|
rule.Data = []models.AlertQuery{
|
|
{
|
|
DatasourceUID: expr.DatasourceUID,
|
|
Model: json.RawMessage(expression),
|
|
RelativeTimeRange: models.RelativeTimeRange{
|
|
From: models.Duration(5 * time.Hour),
|
|
To: models.Duration(3 * time.Hour),
|
|
},
|
|
RefID: "A",
|
|
},
|
|
}
|
|
rule.For = time.Duration(rule.IntervalSeconds*forMultimplier) * time.Second
|
|
}
|
|
}
|
|
|
|
func assertEvalRun(t *testing.T, ch <-chan evalAppliedInfo, tick time.Time, keys ...models.AlertRuleKey) {
|
|
timeout := time.After(time.Second)
|
|
|
|
expected := make(map[models.AlertRuleKey]struct{}, len(keys))
|
|
for _, k := range keys {
|
|
expected[k] = struct{}{}
|
|
}
|
|
|
|
for {
|
|
select {
|
|
case info := <-ch:
|
|
_, ok := expected[info.alertDefKey]
|
|
if !ok {
|
|
t.Fatalf("alert rule: %v should not have been evaluated at: %v", info.alertDefKey, info.now)
|
|
}
|
|
t.Logf("alert rule: %v evaluated at: %v", info.alertDefKey, info.now)
|
|
assert.Equal(t, tick, info.now)
|
|
delete(expected, info.alertDefKey)
|
|
if len(expected) == 0 {
|
|
return
|
|
}
|
|
case <-timeout:
|
|
if len(expected) == 0 {
|
|
return
|
|
}
|
|
t.Fatal("cycle has expired")
|
|
}
|
|
}
|
|
}
|
|
|
|
func assertStopRun(t *testing.T, ch <-chan models.AlertRuleKey, keys ...models.AlertRuleKey) {
|
|
timeout := time.After(time.Second)
|
|
|
|
expected := make(map[models.AlertRuleKey]struct{}, len(keys))
|
|
for _, k := range keys {
|
|
expected[k] = struct{}{}
|
|
}
|
|
|
|
for {
|
|
select {
|
|
case alertDefKey := <-ch:
|
|
_, ok := expected[alertDefKey]
|
|
t.Logf("alert rule: %v stopped", alertDefKey)
|
|
assert.True(t, ok)
|
|
delete(expected, alertDefKey)
|
|
if len(expected) == 0 {
|
|
return
|
|
}
|
|
case <-timeout:
|
|
if len(expected) == 0 {
|
|
return
|
|
}
|
|
t.Fatal("cycle has expired")
|
|
}
|
|
}
|
|
}
|
|
|
|
func assertScheduledContains(t *testing.T, scheduled []readyToRunItem, rule *models.AlertRule) {
|
|
t.Helper()
|
|
|
|
contains := false
|
|
for _, sch := range scheduled {
|
|
if sch.rule.GetKey() == rule.GetKey() {
|
|
contains = true
|
|
}
|
|
}
|
|
require.True(t, contains, "Expected a scheduled rule with key %s title %s but didn't get one, scheduled rules were %v", rule.GetKey(), rule.Title, scheduled)
|
|
}
|