From 5bbe9c6e6117ab5fa585133fdb9a37a736ee9d4a Mon Sep 17 00:00:00 2001 From: Alexander Weaver Date: Fri, 9 Feb 2024 15:53:58 -0600 Subject: [PATCH] Alerting: Enable group-level rule evaluation jittering by default, remove feature toggle (#82212) * remove jitter feature flag * Add an out so users can manually disable jitter * Pass in cfg * Add TODO to remove knob in future --- conf/defaults.ini | 4 ++++ conf/sample.ini | 4 ++++ .../configure-grafana/feature-toggles/index.md | 1 - .../grafana-data/src/types/featureToggles.gen.ts | 1 - pkg/services/featuremgmt/registry.go | 12 ------------ pkg/services/featuremgmt/toggles_gen.csv | 1 - pkg/services/featuremgmt/toggles_gen.go | 4 ---- pkg/services/ngalert/ngalert.go | 2 +- pkg/services/ngalert/schedule/jitter.go | 11 ++++++----- pkg/setting/setting_unified_alerting.go | 5 +++++ 10 files changed, 20 insertions(+), 25 deletions(-) diff --git a/conf/defaults.ini b/conf/defaults.ini index c15a21eb6e6..8bcb30a11e5 100644 --- a/conf/defaults.ini +++ b/conf/defaults.ini @@ -1203,6 +1203,10 @@ max_state_save_concurrency = 1 # The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m. state_periodic_save_interval = 5m +# Disables the smoothing of alert evaluations across their evaluation window. +# Rules will evaluate in sync. +disable_jitter = false + [unified_alerting.screenshots] # Enable screenshots in notifications. You must have either installed the Grafana image rendering # plugin, or set up Grafana to use a remote rendering service. diff --git a/conf/sample.ini b/conf/sample.ini index 17ad690d76e..e938b40ff58 100644 --- a/conf/sample.ini +++ b/conf/sample.ini @@ -1130,6 +1130,10 @@ # The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m. ;state_periodic_save_interval = 5m +# Disables the smoothing of alert evaluations across their evaluation window. +# Rules will evaluate in sync. +;disable_jitter = false + [unified_alerting.reserved_labels] # Comma-separated list of reserved labels added by the Grafana Alerting engine that should be disabled. # For example: `disabled_labels=grafana_folder` diff --git a/docs/sources/setup-grafana/configure-grafana/feature-toggles/index.md b/docs/sources/setup-grafana/configure-grafana/feature-toggles/index.md index d55ddae6997..6cda1d14fe9 100644 --- a/docs/sources/setup-grafana/configure-grafana/feature-toggles/index.md +++ b/docs/sources/setup-grafana/configure-grafana/feature-toggles/index.md @@ -55,7 +55,6 @@ Some features are enabled by default. You can disable these feature by setting t | `lokiQueryHints` | Enables query hints for Loki | Yes | | `alertingPreviewUpgrade` | Show Unified Alerting preview and upgrade page in legacy alerting | Yes | | `alertingQueryOptimization` | Optimizes eligible queries in order to reduce load on datasources | | -| `jitterAlertRules` | Distributes alert rule evaluations more evenly over time, by rule group | | ## Preview feature toggles diff --git a/packages/grafana-data/src/types/featureToggles.gen.ts b/packages/grafana-data/src/types/featureToggles.gen.ts index 90dfc96b506..b87e766734a 100644 --- a/packages/grafana-data/src/types/featureToggles.gen.ts +++ b/packages/grafana-data/src/types/featureToggles.gen.ts @@ -168,7 +168,6 @@ export interface FeatureToggles { cloudRBACRoles?: boolean; alertingQueryOptimization?: boolean; newFolderPicker?: boolean; - jitterAlertRules?: boolean; jitterAlertRulesWithinGroups?: boolean; onPremToCloudMigrations?: boolean; alertingSaveStatePeriodic?: boolean; diff --git a/pkg/services/featuremgmt/registry.go b/pkg/services/featuremgmt/registry.go index 97b62d848ee..56db4acd0dd 100644 --- a/pkg/services/featuremgmt/registry.go +++ b/pkg/services/featuremgmt/registry.go @@ -1265,18 +1265,6 @@ var ( FrontendOnly: true, Created: time.Date(2024, time.January, 12, 12, 0, 0, 0, time.UTC), }, - { - Name: "jitterAlertRules", - Description: "Distributes alert rule evaluations more evenly over time, by rule group", - FrontendOnly: false, - Stage: FeatureStageGeneralAvailability, - Owner: grafanaAlertingSquad, - AllowSelfServe: false, - HideFromDocs: false, - HideFromAdminPage: false, - RequiresRestart: true, - Created: time.Date(2024, time.January, 17, 12, 0, 0, 0, time.UTC), - }, { Name: "jitterAlertRulesWithinGroups", Description: "Distributes alert rule evaluations more evenly over time, including spreading out rules within the same group", diff --git a/pkg/services/featuremgmt/toggles_gen.csv b/pkg/services/featuremgmt/toggles_gen.csv index 40fdc057987..bcd896e222a 100644 --- a/pkg/services/featuremgmt/toggles_gen.csv +++ b/pkg/services/featuremgmt/toggles_gen.csv @@ -149,7 +149,6 @@ enablePluginsTracingByDefault,experimental,@grafana/plugins-platform-backend,202 cloudRBACRoles,experimental,@grafana/identity-access-team,2024-01-10,false,true,false alertingQueryOptimization,GA,@grafana/alerting-squad,2024-01-10,false,false,false newFolderPicker,experimental,@grafana/grafana-frontend-platform,2024-01-12,false,false,true -jitterAlertRules,GA,@grafana/alerting-squad,2024-01-17,false,true,false jitterAlertRulesWithinGroups,preview,@grafana/alerting-squad,2024-01-17,false,true,false onPremToCloudMigrations,experimental,@grafana/grafana-operator-experience-squad,2024-01-22,false,false,false alertingSaveStatePeriodic,privatePreview,@grafana/alerting-squad,2024-01-22,false,false,false diff --git a/pkg/services/featuremgmt/toggles_gen.go b/pkg/services/featuremgmt/toggles_gen.go index 5be1a1a1cc7..4707f9d8afb 100644 --- a/pkg/services/featuremgmt/toggles_gen.go +++ b/pkg/services/featuremgmt/toggles_gen.go @@ -607,10 +607,6 @@ const ( // Enables the nested folder picker without having nested folders enabled FlagNewFolderPicker = "newFolderPicker" - // FlagJitterAlertRules - // Distributes alert rule evaluations more evenly over time, by rule group - FlagJitterAlertRules = "jitterAlertRules" - // FlagJitterAlertRulesWithinGroups // Distributes alert rule evaluations more evenly over time, including spreading out rules within the same group FlagJitterAlertRulesWithinGroups = "jitterAlertRulesWithinGroups" diff --git a/pkg/services/ngalert/ngalert.go b/pkg/services/ngalert/ngalert.go index 1b9b1596235..d75f5d91960 100644 --- a/pkg/services/ngalert/ngalert.go +++ b/pkg/services/ngalert/ngalert.go @@ -274,7 +274,7 @@ func (ng *AlertNG) init() error { BaseInterval: ng.Cfg.UnifiedAlerting.BaseInterval, MinRuleInterval: ng.Cfg.UnifiedAlerting.MinInterval, DisableGrafanaFolder: ng.Cfg.UnifiedAlerting.ReservedLabels.IsReservedLabelDisabled(models.FolderTitleLabel), - JitterEvaluations: schedule.JitterStrategyFrom(ng.FeatureToggles), + JitterEvaluations: schedule.JitterStrategyFrom(ng.Cfg.UnifiedAlerting, ng.FeatureToggles), AppURL: appUrl, EvaluatorFactory: evalFactory, RuleStore: ng.store, diff --git a/pkg/services/ngalert/schedule/jitter.go b/pkg/services/ngalert/schedule/jitter.go index 0db59e567ee..61adadb1286 100644 --- a/pkg/services/ngalert/schedule/jitter.go +++ b/pkg/services/ngalert/schedule/jitter.go @@ -7,6 +7,7 @@ import ( "github.com/grafana/grafana-plugin-sdk-go/data" "github.com/grafana/grafana/pkg/services/featuremgmt" ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models" + "github.com/grafana/grafana/pkg/setting" ) // JitterStrategy represents a modifier to alert rule timing that affects how evaluations are distributed. @@ -19,14 +20,14 @@ const ( ) // JitterStrategyFrom returns the JitterStrategy indicated by the current Grafana feature toggles. -func JitterStrategyFrom(toggles featuremgmt.FeatureToggles) JitterStrategy { - strategy := JitterNever +func JitterStrategyFrom(cfg setting.UnifiedAlertingSettings, toggles featuremgmt.FeatureToggles) JitterStrategy { + strategy := JitterByGroup + if cfg.DisableJitter { + return JitterNever + } if toggles == nil { return strategy } - if toggles.IsEnabledGlobally(featuremgmt.FlagJitterAlertRules) { - strategy = JitterByGroup - } if toggles.IsEnabledGlobally(featuremgmt.FlagJitterAlertRulesWithinGroups) { strategy = JitterByRule } diff --git a/pkg/setting/setting_unified_alerting.go b/pkg/setting/setting_unified_alerting.go index ca27fb228ca..b42f0869fee 100644 --- a/pkg/setting/setting_unified_alerting.go +++ b/pkg/setting/setting_unified_alerting.go @@ -83,6 +83,7 @@ type UnifiedAlertingSettings struct { MaxAttempts int64 MinInterval time.Duration EvaluationTimeout time.Duration + DisableJitter bool ExecuteAlerts bool DefaultConfiguration string Enabled *bool // determines whether unified alerting is enabled. If it is nil then user did not define it and therefore its value will be determined during migration. Services should not use it directly. @@ -300,6 +301,10 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error { uaCfg.BaseInterval = SchedulerBaseInterval + // TODO: This was promoted from a feature toggle and is now the default behavior. + // We can consider removing the knob entirely in a release after 10.4. + uaCfg.DisableJitter = ua.Key("disable_jitter").MustBool(false) + // The base interval of the scheduler for evaluating alerts. // 1. It is used by the internal scheduler's timer to tick at this interval. // 2. to spread evaluations of rules that need to be evaluated at the current tick T. In other words, the evaluation of rules at the tick T will be evenly spread in the interval from T to T+scheduler_tick_interval.