Alerting: Enable group-level rule evaluation jittering by default, remove feature toggle (#82212)

* remove jitter feature flag

* Add an out so users can manually disable jitter

* Pass in cfg

* Add TODO to remove knob in future
This commit is contained in:
Alexander Weaver 2024-02-09 15:53:58 -06:00 committed by GitHub
parent b5d14d03d7
commit 5bbe9c6e61
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 20 additions and 25 deletions

View File

@ -1203,6 +1203,10 @@ max_state_save_concurrency = 1
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m. # The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
state_periodic_save_interval = 5m state_periodic_save_interval = 5m
# Disables the smoothing of alert evaluations across their evaluation window.
# Rules will evaluate in sync.
disable_jitter = false
[unified_alerting.screenshots] [unified_alerting.screenshots]
# Enable screenshots in notifications. You must have either installed the Grafana image rendering # Enable screenshots in notifications. You must have either installed the Grafana image rendering
# plugin, or set up Grafana to use a remote rendering service. # plugin, or set up Grafana to use a remote rendering service.

View File

@ -1130,6 +1130,10 @@
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m. # The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
;state_periodic_save_interval = 5m ;state_periodic_save_interval = 5m
# Disables the smoothing of alert evaluations across their evaluation window.
# Rules will evaluate in sync.
;disable_jitter = false
[unified_alerting.reserved_labels] [unified_alerting.reserved_labels]
# Comma-separated list of reserved labels added by the Grafana Alerting engine that should be disabled. # Comma-separated list of reserved labels added by the Grafana Alerting engine that should be disabled.
# For example: `disabled_labels=grafana_folder` # For example: `disabled_labels=grafana_folder`

View File

@ -55,7 +55,6 @@ Some features are enabled by default. You can disable these feature by setting t
| `lokiQueryHints` | Enables query hints for Loki | Yes | | `lokiQueryHints` | Enables query hints for Loki | Yes |
| `alertingPreviewUpgrade` | Show Unified Alerting preview and upgrade page in legacy alerting | Yes | | `alertingPreviewUpgrade` | Show Unified Alerting preview and upgrade page in legacy alerting | Yes |
| `alertingQueryOptimization` | Optimizes eligible queries in order to reduce load on datasources | | | `alertingQueryOptimization` | Optimizes eligible queries in order to reduce load on datasources | |
| `jitterAlertRules` | Distributes alert rule evaluations more evenly over time, by rule group | |
## Preview feature toggles ## Preview feature toggles

View File

@ -168,7 +168,6 @@ export interface FeatureToggles {
cloudRBACRoles?: boolean; cloudRBACRoles?: boolean;
alertingQueryOptimization?: boolean; alertingQueryOptimization?: boolean;
newFolderPicker?: boolean; newFolderPicker?: boolean;
jitterAlertRules?: boolean;
jitterAlertRulesWithinGroups?: boolean; jitterAlertRulesWithinGroups?: boolean;
onPremToCloudMigrations?: boolean; onPremToCloudMigrations?: boolean;
alertingSaveStatePeriodic?: boolean; alertingSaveStatePeriodic?: boolean;

View File

@ -1265,18 +1265,6 @@ var (
FrontendOnly: true, FrontendOnly: true,
Created: time.Date(2024, time.January, 12, 12, 0, 0, 0, time.UTC), Created: time.Date(2024, time.January, 12, 12, 0, 0, 0, time.UTC),
}, },
{
Name: "jitterAlertRules",
Description: "Distributes alert rule evaluations more evenly over time, by rule group",
FrontendOnly: false,
Stage: FeatureStageGeneralAvailability,
Owner: grafanaAlertingSquad,
AllowSelfServe: false,
HideFromDocs: false,
HideFromAdminPage: false,
RequiresRestart: true,
Created: time.Date(2024, time.January, 17, 12, 0, 0, 0, time.UTC),
},
{ {
Name: "jitterAlertRulesWithinGroups", Name: "jitterAlertRulesWithinGroups",
Description: "Distributes alert rule evaluations more evenly over time, including spreading out rules within the same group", Description: "Distributes alert rule evaluations more evenly over time, including spreading out rules within the same group",

View File

@ -149,7 +149,6 @@ enablePluginsTracingByDefault,experimental,@grafana/plugins-platform-backend,202
cloudRBACRoles,experimental,@grafana/identity-access-team,2024-01-10,false,true,false cloudRBACRoles,experimental,@grafana/identity-access-team,2024-01-10,false,true,false
alertingQueryOptimization,GA,@grafana/alerting-squad,2024-01-10,false,false,false alertingQueryOptimization,GA,@grafana/alerting-squad,2024-01-10,false,false,false
newFolderPicker,experimental,@grafana/grafana-frontend-platform,2024-01-12,false,false,true newFolderPicker,experimental,@grafana/grafana-frontend-platform,2024-01-12,false,false,true
jitterAlertRules,GA,@grafana/alerting-squad,2024-01-17,false,true,false
jitterAlertRulesWithinGroups,preview,@grafana/alerting-squad,2024-01-17,false,true,false jitterAlertRulesWithinGroups,preview,@grafana/alerting-squad,2024-01-17,false,true,false
onPremToCloudMigrations,experimental,@grafana/grafana-operator-experience-squad,2024-01-22,false,false,false onPremToCloudMigrations,experimental,@grafana/grafana-operator-experience-squad,2024-01-22,false,false,false
alertingSaveStatePeriodic,privatePreview,@grafana/alerting-squad,2024-01-22,false,false,false alertingSaveStatePeriodic,privatePreview,@grafana/alerting-squad,2024-01-22,false,false,false

1 Name Stage Owner Created requiresDevMode RequiresRestart FrontendOnly
149 cloudRBACRoles experimental @grafana/identity-access-team 2024-01-10 false true false
150 alertingQueryOptimization GA @grafana/alerting-squad 2024-01-10 false false false
151 newFolderPicker experimental @grafana/grafana-frontend-platform 2024-01-12 false false true
jitterAlertRules GA @grafana/alerting-squad 2024-01-17 false true false
152 jitterAlertRulesWithinGroups preview @grafana/alerting-squad 2024-01-17 false true false
153 onPremToCloudMigrations experimental @grafana/grafana-operator-experience-squad 2024-01-22 false false false
154 alertingSaveStatePeriodic privatePreview @grafana/alerting-squad 2024-01-22 false false false

View File

@ -607,10 +607,6 @@ const (
// Enables the nested folder picker without having nested folders enabled // Enables the nested folder picker without having nested folders enabled
FlagNewFolderPicker = "newFolderPicker" FlagNewFolderPicker = "newFolderPicker"
// FlagJitterAlertRules
// Distributes alert rule evaluations more evenly over time, by rule group
FlagJitterAlertRules = "jitterAlertRules"
// FlagJitterAlertRulesWithinGroups // FlagJitterAlertRulesWithinGroups
// Distributes alert rule evaluations more evenly over time, including spreading out rules within the same group // Distributes alert rule evaluations more evenly over time, including spreading out rules within the same group
FlagJitterAlertRulesWithinGroups = "jitterAlertRulesWithinGroups" FlagJitterAlertRulesWithinGroups = "jitterAlertRulesWithinGroups"

View File

@ -274,7 +274,7 @@ func (ng *AlertNG) init() error {
BaseInterval: ng.Cfg.UnifiedAlerting.BaseInterval, BaseInterval: ng.Cfg.UnifiedAlerting.BaseInterval,
MinRuleInterval: ng.Cfg.UnifiedAlerting.MinInterval, MinRuleInterval: ng.Cfg.UnifiedAlerting.MinInterval,
DisableGrafanaFolder: ng.Cfg.UnifiedAlerting.ReservedLabels.IsReservedLabelDisabled(models.FolderTitleLabel), DisableGrafanaFolder: ng.Cfg.UnifiedAlerting.ReservedLabels.IsReservedLabelDisabled(models.FolderTitleLabel),
JitterEvaluations: schedule.JitterStrategyFrom(ng.FeatureToggles), JitterEvaluations: schedule.JitterStrategyFrom(ng.Cfg.UnifiedAlerting, ng.FeatureToggles),
AppURL: appUrl, AppURL: appUrl,
EvaluatorFactory: evalFactory, EvaluatorFactory: evalFactory,
RuleStore: ng.store, RuleStore: ng.store,

View File

@ -7,6 +7,7 @@ import (
"github.com/grafana/grafana-plugin-sdk-go/data" "github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/grafana/grafana/pkg/services/featuremgmt" "github.com/grafana/grafana/pkg/services/featuremgmt"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models" ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/setting"
) )
// JitterStrategy represents a modifier to alert rule timing that affects how evaluations are distributed. // JitterStrategy represents a modifier to alert rule timing that affects how evaluations are distributed.
@ -19,14 +20,14 @@ const (
) )
// JitterStrategyFrom returns the JitterStrategy indicated by the current Grafana feature toggles. // JitterStrategyFrom returns the JitterStrategy indicated by the current Grafana feature toggles.
func JitterStrategyFrom(toggles featuremgmt.FeatureToggles) JitterStrategy { func JitterStrategyFrom(cfg setting.UnifiedAlertingSettings, toggles featuremgmt.FeatureToggles) JitterStrategy {
strategy := JitterNever strategy := JitterByGroup
if cfg.DisableJitter {
return JitterNever
}
if toggles == nil { if toggles == nil {
return strategy return strategy
} }
if toggles.IsEnabledGlobally(featuremgmt.FlagJitterAlertRules) {
strategy = JitterByGroup
}
if toggles.IsEnabledGlobally(featuremgmt.FlagJitterAlertRulesWithinGroups) { if toggles.IsEnabledGlobally(featuremgmt.FlagJitterAlertRulesWithinGroups) {
strategy = JitterByRule strategy = JitterByRule
} }

View File

@ -83,6 +83,7 @@ type UnifiedAlertingSettings struct {
MaxAttempts int64 MaxAttempts int64
MinInterval time.Duration MinInterval time.Duration
EvaluationTimeout time.Duration EvaluationTimeout time.Duration
DisableJitter bool
ExecuteAlerts bool ExecuteAlerts bool
DefaultConfiguration string DefaultConfiguration string
Enabled *bool // determines whether unified alerting is enabled. If it is nil then user did not define it and therefore its value will be determined during migration. Services should not use it directly. Enabled *bool // determines whether unified alerting is enabled. If it is nil then user did not define it and therefore its value will be determined during migration. Services should not use it directly.
@ -300,6 +301,10 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
uaCfg.BaseInterval = SchedulerBaseInterval uaCfg.BaseInterval = SchedulerBaseInterval
// TODO: This was promoted from a feature toggle and is now the default behavior.
// We can consider removing the knob entirely in a release after 10.4.
uaCfg.DisableJitter = ua.Key("disable_jitter").MustBool(false)
// The base interval of the scheduler for evaluating alerts. // The base interval of the scheduler for evaluating alerts.
// 1. It is used by the internal scheduler's timer to tick at this interval. // 1. It is used by the internal scheduler's timer to tick at this interval.
// 2. to spread evaluations of rules that need to be evaluated at the current tick T. In other words, the evaluation of rules at the tick T will be evenly spread in the interval from T to T+scheduler_tick_interval. // 2. to spread evaluations of rules that need to be evaluated at the current tick T. In other words, the evaluation of rules at the tick T will be evenly spread in the interval from T to T+scheduler_tick_interval.