Unified Alerting: Set max_attempts to 1 by default (#79095)

* Unified Alerting: Set `max_attempts` to 1 by default

The retry logic for unified alerting has been broken as far as v9.4.x, rather than fixing it in one go and causing a headache to our users with rules putting extra load on their datasources - I think a better approach is to simply set 1 as a default and then let our users change it.

I see two cons with this approach:

- Configuration for legacy to unified alerting cannot be ported over automatically, users will have to manually set `max_attempts` to 3 when migrating.
- Users expecting to get any sort of retrying (as with legacy alerting) will not have it out of the box and will have to manually edit the configuration.

Signed-off-by: gotjosh <josue.abreu@gmail.com>
---------

Signed-off-by: gotjosh <josue.abreu@gmail.com>
This commit is contained in:
gotjosh
2023-12-05 17:42:34 +00:00
committed by GitHub
parent 7cdddb2790
commit 0c9356a3c7
5 changed files with 11 additions and 20 deletions

View File

@@ -47,7 +47,7 @@ const (
evaluatorDefaultEvaluationTimeout = 30 * time.Second
schedulerDefaultAdminConfigPollInterval = time.Minute
schedulereDefaultExecuteAlerts = true
schedulerDefaultMaxAttempts = 3
schedulerDefaultMaxAttempts = 1
schedulerDefaultLegacyMinInterval = 1
screenshotsDefaultCapture = false
screenshotsDefaultCaptureTimeout = 10 * time.Second
@@ -294,15 +294,7 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
}
uaCfg.EvaluationTimeout = uaEvaluationTimeout
uaMaxAttempts := ua.Key("max_attempts").MustInt64(schedulerDefaultMaxAttempts)
if uaMaxAttempts == schedulerDefaultMaxAttempts { // unified option or equals the default
legacyMaxAttempts := alerting.Key("max_attempts").MustInt64(schedulerDefaultMaxAttempts)
if legacyMaxAttempts != schedulerDefaultMaxAttempts {
cfg.Logger.Warn("falling back to legacy setting of 'max_attempts'; please use the configuration option in the `unified_alerting` section if Grafana 8 alerts are enabled.")
}
uaMaxAttempts = legacyMaxAttempts
}
uaCfg.MaxAttempts = uaMaxAttempts
uaCfg.MaxAttempts = ua.Key("max_attempts").MustInt64(schedulerDefaultMaxAttempts)
uaCfg.BaseInterval = SchedulerBaseInterval

View File

@@ -110,20 +110,19 @@ func TestUnifiedAlertingSettings(t *testing.T) {
desc: "when the unified options equal the defaults, it should apply the legacy ones",
unifiedAlertingOptions: map[string]string{
"admin_config_poll_interval": "120s",
"max_attempts": strconv.FormatInt(schedulerDefaultMaxAttempts, 10),
"min_interval": SchedulerBaseInterval.String(),
"execute_alerts": strconv.FormatBool(schedulereDefaultExecuteAlerts),
"evaluation_timeout": evaluatorDefaultEvaluationTimeout.String(),
},
alertingOptions: map[string]string{
"max_attempts": "12",
"max_attempts": "1",
"min_interval_seconds": "120",
"execute_alerts": "true",
"evaluation_timeout_seconds": "160",
},
verifyCfg: func(t *testing.T, cfg Cfg) {
require.Equal(t, 120*time.Second, cfg.UnifiedAlerting.AdminConfigPollInterval)
require.Equal(t, int64(12), cfg.UnifiedAlerting.MaxAttempts)
require.Equal(t, int64(1), cfg.UnifiedAlerting.MaxAttempts)
require.Equal(t, 120*time.Second, cfg.UnifiedAlerting.MinInterval)
require.Equal(t, true, cfg.UnifiedAlerting.ExecuteAlerts)
require.Equal(t, 160*time.Second, cfg.UnifiedAlerting.EvaluationTimeout)
@@ -164,14 +163,14 @@ func TestUnifiedAlertingSettings(t *testing.T) {
"evaluation_timeout": "invalid",
},
alertingOptions: map[string]string{
"max_attempts": "12",
"max_attempts": "1",
"min_interval_seconds": "120",
"execute_alerts": "false",
"evaluation_timeout_seconds": "160",
},
verifyCfg: func(t *testing.T, cfg Cfg) {
require.Equal(t, alertmanagerDefaultConfigPollInterval, cfg.UnifiedAlerting.AdminConfigPollInterval)
require.Equal(t, int64(12), cfg.UnifiedAlerting.MaxAttempts)
require.Equal(t, int64(1), cfg.UnifiedAlerting.MaxAttempts)
require.Equal(t, 120*time.Second, cfg.UnifiedAlerting.MinInterval)
require.Equal(t, false, cfg.UnifiedAlerting.ExecuteAlerts)
require.Equal(t, 160*time.Second, cfg.UnifiedAlerting.EvaluationTimeout)