mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Add ability to control scheduler tick interval via config (#71980)
* add ability to control scheduler interval via config * add feature flag `configurableSchedulerTick`
This commit is contained in:
parent
8415dd40d7
commit
c7598cc6fb
@ -117,4 +117,5 @@ export interface FeatureToggles {
|
||||
splitScopes?: boolean;
|
||||
azureMonitorDataplane?: boolean;
|
||||
prometheusConfigOverhaulAuth?: boolean;
|
||||
configurableSchedulerTick?: boolean;
|
||||
}
|
||||
|
@ -681,5 +681,14 @@ var (
|
||||
Stage: FeatureStageExperimental,
|
||||
Owner: grafanaObservabilityMetricsSquad,
|
||||
},
|
||||
{
|
||||
Name: "configurableSchedulerTick",
|
||||
Description: "Enable changing the scheduler base interval via configuration option unified_alerting.scheduler_tick_interval",
|
||||
Stage: FeatureStageExperimental,
|
||||
FrontendOnly: false,
|
||||
Owner: grafanaAlertingSquad,
|
||||
RequiresRestart: true,
|
||||
HideFromDocs: true,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
@ -98,3 +98,4 @@ awsAsyncQueryCaching,experimental,@grafana/aws-datasources,false,false,false,fal
|
||||
splitScopes,preview,@grafana/grafana-authnz-team,false,false,true,false
|
||||
azureMonitorDataplane,GA,@grafana/partner-datasources,false,false,false,false
|
||||
prometheusConfigOverhaulAuth,experimental,@grafana/observability-metrics,false,false,false,false
|
||||
configurableSchedulerTick,experimental,@grafana/alerting-squad,false,false,true,false
|
||||
|
|
@ -402,4 +402,8 @@ const (
|
||||
// FlagPrometheusConfigOverhaulAuth
|
||||
// Update the Prometheus configuration page with the new auth component
|
||||
FlagPrometheusConfigOverhaulAuth = "prometheusConfigOverhaulAuth"
|
||||
|
||||
// FlagConfigurableSchedulerTick
|
||||
// Enable changing the scheduler base interval via configuration option unified_alerting.scheduler_tick_interval
|
||||
FlagConfigurableSchedulerTick = "configurableSchedulerTick"
|
||||
)
|
||||
|
@ -130,6 +130,7 @@ func NewScheduler(cfg SchedulerCfg, stateManager *state.Manager) *schedule {
|
||||
}
|
||||
|
||||
func (sch *schedule) Run(ctx context.Context) error {
|
||||
sch.log.Info("Starting scheduler", "tickInterval", sch.baseInterval)
|
||||
t := ticker.New(sch.clock, sch.baseInterval, sch.metrics.Ticker)
|
||||
defer t.Stop()
|
||||
|
||||
|
@ -287,6 +287,27 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
|
||||
|
||||
uaCfg.BaseInterval = SchedulerBaseInterval
|
||||
|
||||
// The base interval of the scheduler for evaluating alerts.
|
||||
// 1. It is used by the internal scheduler's timer to tick at this interval.
|
||||
// 2. to spread evaluations of rules that need to be evaluated at the current tick T. In other words, the evaluation of rules at the tick T will be evenly spread in the interval from T to T+scheduler_tick_interval.
|
||||
// For example, if there are 100 rules that need to be evaluated at tick T, and the base interval is 10s, rules will be evaluated every 100ms.
|
||||
// 3. It increases delay between rule updates and state reset.
|
||||
// NOTE:
|
||||
// 1. All alert rule intervals should be times of this interval. Otherwise, the rules will not be evaluated. It is not recommended to set it lower than 10s or odd numbers. Recommended: 10s, 30s, 1m
|
||||
// 2. The increasing of the interval will affect how slow alert rule updates will reset the state, and therefore reset notification. Higher the interval - slower propagation of the changes.
|
||||
baseInterval, err := gtime.ParseDuration(valueAsString(ua, "scheduler_tick_interval", SchedulerBaseInterval.String()))
|
||||
if cfg.IsFeatureToggleEnabled("configurableSchedulerTick") { // use literal to avoid cycle imports
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse setting 'scheduler_tick_interval' as duration: %w", err)
|
||||
}
|
||||
if baseInterval != SchedulerBaseInterval {
|
||||
cfg.Logger.Warn("Scheduler tick interval is changed to non-default", "interval", baseInterval, "default", SchedulerBaseInterval)
|
||||
}
|
||||
uaCfg.BaseInterval = baseInterval
|
||||
} else if baseInterval != SchedulerBaseInterval {
|
||||
cfg.Logger.Warn("Scheduler tick interval is changed to non-default but the feature flag is not enabled. Using default.", "interval", baseInterval, "default", SchedulerBaseInterval)
|
||||
}
|
||||
|
||||
uaMinInterval, err := gtime.ParseDuration(valueAsString(ua, "min_interval", uaCfg.BaseInterval.String()))
|
||||
if err != nil || uaMinInterval == uaCfg.BaseInterval { // unified option is invalid duration or equals the default
|
||||
// if the legacy option is invalid, fallback to 10 (unified alerting min interval default)
|
||||
|
@ -39,6 +39,39 @@ func TestCfg_ReadUnifiedAlertingSettings(t *testing.T) {
|
||||
require.Len(t, cfg.UnifiedAlerting.HAPeers, 3)
|
||||
require.ElementsMatch(t, []string{"hostname1:9090", "hostname2:9090", "hostname3:9090"}, cfg.UnifiedAlerting.HAPeers)
|
||||
}
|
||||
|
||||
t.Run("should read 'scheduler_tick_interval'", func(t *testing.T) {
|
||||
tmp := cfg.IsFeatureToggleEnabled
|
||||
t.Cleanup(func() {
|
||||
cfg.IsFeatureToggleEnabled = tmp
|
||||
})
|
||||
cfg.IsFeatureToggleEnabled = func(key string) bool { return key == "configurableSchedulerTick" }
|
||||
|
||||
s, err := cfg.Raw.NewSection("unified_alerting")
|
||||
require.NoError(t, err)
|
||||
_, err = s.NewKey("scheduler_tick_interval", "1m")
|
||||
require.NoError(t, err)
|
||||
_, err = s.NewKey("min_interval", "3m")
|
||||
require.NoError(t, err)
|
||||
|
||||
require.NoError(t, cfg.ReadUnifiedAlertingSettings(cfg.Raw))
|
||||
require.Equal(t, time.Minute, cfg.UnifiedAlerting.BaseInterval)
|
||||
require.Equal(t, 3*time.Minute, cfg.UnifiedAlerting.MinInterval)
|
||||
|
||||
t.Run("and fail if it is wrong", func(t *testing.T) {
|
||||
_, err = s.NewKey("scheduler_tick_interval", "test")
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Error(t, cfg.ReadUnifiedAlertingSettings(cfg.Raw))
|
||||
})
|
||||
|
||||
t.Run("and use default if not specified", func(t *testing.T) {
|
||||
s.DeleteKey("scheduler_tick_interval")
|
||||
require.NoError(t, cfg.ReadUnifiedAlertingSettings(cfg.Raw))
|
||||
|
||||
require.Equal(t, SchedulerBaseInterval, cfg.UnifiedAlerting.BaseInterval)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func TestUnifiedAlertingSettings(t *testing.T) {
|
||||
|
Loading…
Reference in New Issue
Block a user