mirror of
https://github.com/grafana/grafana.git
synced 2024-11-21 16:38:03 -06:00
Unified Alerting: Set max_attempts
to 1 by default (#79095)
* Unified Alerting: Set `max_attempts` to 1 by default The retry logic for unified alerting has been broken as far as v9.4.x, rather than fixing it in one go and causing a headache to our users with rules putting extra load on their datasources - I think a better approach is to simply set 1 as a default and then let our users change it. I see two cons with this approach: - Configuration for legacy to unified alerting cannot be ported over automatically, users will have to manually set `max_attempts` to 3 when migrating. - Users expecting to get any sort of retrying (as with legacy alerting) will not have it out of the box and will have to manually edit the configuration. Signed-off-by: gotjosh <josue.abreu@gmail.com> --------- Signed-off-by: gotjosh <josue.abreu@gmail.com>
This commit is contained in:
parent
7cdddb2790
commit
0c9356a3c7
@ -1147,8 +1147,8 @@ execute_alerts = true
|
||||
# The timeout string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
|
||||
evaluation_timeout = 30s
|
||||
|
||||
# Number of times we'll attempt to evaluate an alert rule before giving up on that evaluation. This option has a legacy version in the `[alerting]` section that takes precedence.
|
||||
max_attempts = 3
|
||||
# Number of times we'll attempt to evaluate an alert rule before giving up on that evaluation. The default value is 1.
|
||||
max_attempts = 1
|
||||
|
||||
# Minimum interval to enforce between rule evaluations. Rules will be adjusted if they are less than this value or if they are not multiple of the scheduler interval (10s). Higher values can help with resource management as we'll schedule fewer evaluations over time. This option has a legacy version in the `[alerting]` section that takes precedence.
|
||||
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
|
||||
|
@ -1094,8 +1094,8 @@
|
||||
# The timeout string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
|
||||
;evaluation_timeout = 30s
|
||||
|
||||
# Number of times we'll attempt to evaluate an alert rule before giving up on that evaluation. This option has a legacy version in the `[alerting]` section that takes precedence.
|
||||
;max_attempts = 3
|
||||
# Number of times we'll attempt to evaluate an alert rule before giving up on that evaluation. The default value is 1.
|
||||
;max_attempts = 1
|
||||
|
||||
# Minimum interval to enforce between rule evaluations. Rules will be adjusted if they are less than this value or if they are not multiple of the scheduler interval (10s). Higher values can help with resource management as we'll schedule fewer evaluations over time. This option has a legacy version in the `[alerting]` section that takes precedence.
|
||||
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
|
||||
|
@ -1595,7 +1595,7 @@ The timeout string is a possibly signed sequence of decimal numbers, followed by
|
||||
|
||||
### max_attempts
|
||||
|
||||
Sets a maximum number of times we'll attempt to evaluate an alert rule before giving up on that evaluation. The default value is `3`. This option has a [legacy version in the alerting section]({{< relref "#max_attempts-1" >}}) that takes precedence.
|
||||
Sets a maximum number of times we'll attempt to evaluate an alert rule before giving up on that evaluation. The default value is `1`.
|
||||
|
||||
### min_interval
|
||||
|
||||
|
@ -47,7 +47,7 @@ const (
|
||||
evaluatorDefaultEvaluationTimeout = 30 * time.Second
|
||||
schedulerDefaultAdminConfigPollInterval = time.Minute
|
||||
schedulereDefaultExecuteAlerts = true
|
||||
schedulerDefaultMaxAttempts = 3
|
||||
schedulerDefaultMaxAttempts = 1
|
||||
schedulerDefaultLegacyMinInterval = 1
|
||||
screenshotsDefaultCapture = false
|
||||
screenshotsDefaultCaptureTimeout = 10 * time.Second
|
||||
@ -294,15 +294,7 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
|
||||
}
|
||||
uaCfg.EvaluationTimeout = uaEvaluationTimeout
|
||||
|
||||
uaMaxAttempts := ua.Key("max_attempts").MustInt64(schedulerDefaultMaxAttempts)
|
||||
if uaMaxAttempts == schedulerDefaultMaxAttempts { // unified option or equals the default
|
||||
legacyMaxAttempts := alerting.Key("max_attempts").MustInt64(schedulerDefaultMaxAttempts)
|
||||
if legacyMaxAttempts != schedulerDefaultMaxAttempts {
|
||||
cfg.Logger.Warn("falling back to legacy setting of 'max_attempts'; please use the configuration option in the `unified_alerting` section if Grafana 8 alerts are enabled.")
|
||||
}
|
||||
uaMaxAttempts = legacyMaxAttempts
|
||||
}
|
||||
uaCfg.MaxAttempts = uaMaxAttempts
|
||||
uaCfg.MaxAttempts = ua.Key("max_attempts").MustInt64(schedulerDefaultMaxAttempts)
|
||||
|
||||
uaCfg.BaseInterval = SchedulerBaseInterval
|
||||
|
||||
|
@ -110,20 +110,19 @@ func TestUnifiedAlertingSettings(t *testing.T) {
|
||||
desc: "when the unified options equal the defaults, it should apply the legacy ones",
|
||||
unifiedAlertingOptions: map[string]string{
|
||||
"admin_config_poll_interval": "120s",
|
||||
"max_attempts": strconv.FormatInt(schedulerDefaultMaxAttempts, 10),
|
||||
"min_interval": SchedulerBaseInterval.String(),
|
||||
"execute_alerts": strconv.FormatBool(schedulereDefaultExecuteAlerts),
|
||||
"evaluation_timeout": evaluatorDefaultEvaluationTimeout.String(),
|
||||
},
|
||||
alertingOptions: map[string]string{
|
||||
"max_attempts": "12",
|
||||
"max_attempts": "1",
|
||||
"min_interval_seconds": "120",
|
||||
"execute_alerts": "true",
|
||||
"evaluation_timeout_seconds": "160",
|
||||
},
|
||||
verifyCfg: func(t *testing.T, cfg Cfg) {
|
||||
require.Equal(t, 120*time.Second, cfg.UnifiedAlerting.AdminConfigPollInterval)
|
||||
require.Equal(t, int64(12), cfg.UnifiedAlerting.MaxAttempts)
|
||||
require.Equal(t, int64(1), cfg.UnifiedAlerting.MaxAttempts)
|
||||
require.Equal(t, 120*time.Second, cfg.UnifiedAlerting.MinInterval)
|
||||
require.Equal(t, true, cfg.UnifiedAlerting.ExecuteAlerts)
|
||||
require.Equal(t, 160*time.Second, cfg.UnifiedAlerting.EvaluationTimeout)
|
||||
@ -164,14 +163,14 @@ func TestUnifiedAlertingSettings(t *testing.T) {
|
||||
"evaluation_timeout": "invalid",
|
||||
},
|
||||
alertingOptions: map[string]string{
|
||||
"max_attempts": "12",
|
||||
"max_attempts": "1",
|
||||
"min_interval_seconds": "120",
|
||||
"execute_alerts": "false",
|
||||
"evaluation_timeout_seconds": "160",
|
||||
},
|
||||
verifyCfg: func(t *testing.T, cfg Cfg) {
|
||||
require.Equal(t, alertmanagerDefaultConfigPollInterval, cfg.UnifiedAlerting.AdminConfigPollInterval)
|
||||
require.Equal(t, int64(12), cfg.UnifiedAlerting.MaxAttempts)
|
||||
require.Equal(t, int64(1), cfg.UnifiedAlerting.MaxAttempts)
|
||||
require.Equal(t, 120*time.Second, cfg.UnifiedAlerting.MinInterval)
|
||||
require.Equal(t, false, cfg.UnifiedAlerting.ExecuteAlerts)
|
||||
require.Equal(t, 160*time.Second, cfg.UnifiedAlerting.EvaluationTimeout)
|
||||
|
Loading…
Reference in New Issue
Block a user