Alerting: Enable group-level rule evaluation jittering by default, remove feature toggle (#82212)

* remove jitter feature flag

* Add an out so users can manually disable jitter

* Pass in cfg

* Add TODO to remove knob in future
This commit is contained in:
Alexander Weaver 2024-02-09 15:53:58 -06:00 committed by GitHub
parent b5d14d03d7
commit 5bbe9c6e61
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 20 additions and 25 deletions

View File

@ -1203,6 +1203,10 @@ max_state_save_concurrency = 1
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
state_periodic_save_interval = 5m
# Disables the smoothing of alert evaluations across their evaluation window.
# Rules will evaluate in sync.
disable_jitter = false
[unified_alerting.screenshots]
# Enable screenshots in notifications. You must have either installed the Grafana image rendering
# plugin, or set up Grafana to use a remote rendering service.

View File

@ -1130,6 +1130,10 @@
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
;state_periodic_save_interval = 5m
# Disables the smoothing of alert evaluations across their evaluation window.
# Rules will evaluate in sync.
;disable_jitter = false
[unified_alerting.reserved_labels]
# Comma-separated list of reserved labels added by the Grafana Alerting engine that should be disabled.
# For example: `disabled_labels=grafana_folder`

View File

@ -55,7 +55,6 @@ Some features are enabled by default. You can disable these feature by setting t
| `lokiQueryHints` | Enables query hints for Loki | Yes |
| `alertingPreviewUpgrade` | Show Unified Alerting preview and upgrade page in legacy alerting | Yes |
| `alertingQueryOptimization` | Optimizes eligible queries in order to reduce load on datasources | |
| `jitterAlertRules` | Distributes alert rule evaluations more evenly over time, by rule group | |
## Preview feature toggles

View File

@ -168,7 +168,6 @@ export interface FeatureToggles {
cloudRBACRoles?: boolean;
alertingQueryOptimization?: boolean;
newFolderPicker?: boolean;
jitterAlertRules?: boolean;
jitterAlertRulesWithinGroups?: boolean;
onPremToCloudMigrations?: boolean;
alertingSaveStatePeriodic?: boolean;

View File

@ -1265,18 +1265,6 @@ var (
FrontendOnly: true,
Created: time.Date(2024, time.January, 12, 12, 0, 0, 0, time.UTC),
},
{
Name: "jitterAlertRules",
Description: "Distributes alert rule evaluations more evenly over time, by rule group",
FrontendOnly: false,
Stage: FeatureStageGeneralAvailability,
Owner: grafanaAlertingSquad,
AllowSelfServe: false,
HideFromDocs: false,
HideFromAdminPage: false,
RequiresRestart: true,
Created: time.Date(2024, time.January, 17, 12, 0, 0, 0, time.UTC),
},
{
Name: "jitterAlertRulesWithinGroups",
Description: "Distributes alert rule evaluations more evenly over time, including spreading out rules within the same group",

View File

@ -149,7 +149,6 @@ enablePluginsTracingByDefault,experimental,@grafana/plugins-platform-backend,202
cloudRBACRoles,experimental,@grafana/identity-access-team,2024-01-10,false,true,false
alertingQueryOptimization,GA,@grafana/alerting-squad,2024-01-10,false,false,false
newFolderPicker,experimental,@grafana/grafana-frontend-platform,2024-01-12,false,false,true
jitterAlertRules,GA,@grafana/alerting-squad,2024-01-17,false,true,false
jitterAlertRulesWithinGroups,preview,@grafana/alerting-squad,2024-01-17,false,true,false
onPremToCloudMigrations,experimental,@grafana/grafana-operator-experience-squad,2024-01-22,false,false,false
alertingSaveStatePeriodic,privatePreview,@grafana/alerting-squad,2024-01-22,false,false,false

1 Name Stage Owner Created requiresDevMode RequiresRestart FrontendOnly
149 cloudRBACRoles experimental @grafana/identity-access-team 2024-01-10 false true false
150 alertingQueryOptimization GA @grafana/alerting-squad 2024-01-10 false false false
151 newFolderPicker experimental @grafana/grafana-frontend-platform 2024-01-12 false false true
jitterAlertRules GA @grafana/alerting-squad 2024-01-17 false true false
152 jitterAlertRulesWithinGroups preview @grafana/alerting-squad 2024-01-17 false true false
153 onPremToCloudMigrations experimental @grafana/grafana-operator-experience-squad 2024-01-22 false false false
154 alertingSaveStatePeriodic privatePreview @grafana/alerting-squad 2024-01-22 false false false

View File

@ -607,10 +607,6 @@ const (
// Enables the nested folder picker without having nested folders enabled
FlagNewFolderPicker = "newFolderPicker"
// FlagJitterAlertRules
// Distributes alert rule evaluations more evenly over time, by rule group
FlagJitterAlertRules = "jitterAlertRules"
// FlagJitterAlertRulesWithinGroups
// Distributes alert rule evaluations more evenly over time, including spreading out rules within the same group
FlagJitterAlertRulesWithinGroups = "jitterAlertRulesWithinGroups"

View File

@ -274,7 +274,7 @@ func (ng *AlertNG) init() error {
BaseInterval: ng.Cfg.UnifiedAlerting.BaseInterval,
MinRuleInterval: ng.Cfg.UnifiedAlerting.MinInterval,
DisableGrafanaFolder: ng.Cfg.UnifiedAlerting.ReservedLabels.IsReservedLabelDisabled(models.FolderTitleLabel),
JitterEvaluations: schedule.JitterStrategyFrom(ng.FeatureToggles),
JitterEvaluations: schedule.JitterStrategyFrom(ng.Cfg.UnifiedAlerting, ng.FeatureToggles),
AppURL: appUrl,
EvaluatorFactory: evalFactory,
RuleStore: ng.store,

View File

@ -7,6 +7,7 @@ import (
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/grafana/grafana/pkg/services/featuremgmt"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/setting"
)
// JitterStrategy represents a modifier to alert rule timing that affects how evaluations are distributed.
@ -19,14 +20,14 @@ const (
)
// JitterStrategyFrom returns the JitterStrategy indicated by the current Grafana feature toggles.
func JitterStrategyFrom(toggles featuremgmt.FeatureToggles) JitterStrategy {
strategy := JitterNever
func JitterStrategyFrom(cfg setting.UnifiedAlertingSettings, toggles featuremgmt.FeatureToggles) JitterStrategy {
strategy := JitterByGroup
if cfg.DisableJitter {
return JitterNever
}
if toggles == nil {
return strategy
}
if toggles.IsEnabledGlobally(featuremgmt.FlagJitterAlertRules) {
strategy = JitterByGroup
}
if toggles.IsEnabledGlobally(featuremgmt.FlagJitterAlertRulesWithinGroups) {
strategy = JitterByRule
}

View File

@ -83,6 +83,7 @@ type UnifiedAlertingSettings struct {
MaxAttempts int64
MinInterval time.Duration
EvaluationTimeout time.Duration
DisableJitter bool
ExecuteAlerts bool
DefaultConfiguration string
Enabled *bool // determines whether unified alerting is enabled. If it is nil then user did not define it and therefore its value will be determined during migration. Services should not use it directly.
@ -300,6 +301,10 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
uaCfg.BaseInterval = SchedulerBaseInterval
// TODO: This was promoted from a feature toggle and is now the default behavior.
// We can consider removing the knob entirely in a release after 10.4.
uaCfg.DisableJitter = ua.Key("disable_jitter").MustBool(false)
// The base interval of the scheduler for evaluating alerts.
// 1. It is used by the internal scheduler's timer to tick at this interval.
// 2. to spread evaluations of rules that need to be evaluated at the current tick T. In other words, the evaluation of rules at the tick T will be evenly spread in the interval from T to T+scheduler_tick_interval.