Alerting: tune rule evaluation via configuration (#35623)

* Alerting: Configure max evaluation retries

* Alerting: Enforce minimum rule evaluation interval

* Alerting: Disable rule evaluation from configuration

* Update docs

* Alerting: Configure rule evaluation timeout

* Move options on unified_alerting config section

* Apply suggestions from code review

Co-authored-by: gotjosh <josue@grafana.com>
This commit is contained in:
Sofia Papagiannaki
2021-09-28 13:00:16 +03:00
committed by GitHub
parent cc94c55e48
commit f6f3a54742
14 changed files with 393 additions and 43 deletions

View File

@@ -1,10 +1,12 @@
package setting
import (
"strconv"
"testing"
"time"
"github.com/stretchr/testify/require"
"gopkg.in/ini.v1"
)
func TestCfg_ReadUnifiedAlertingSettings(t *testing.T) {
@@ -37,3 +39,125 @@ func TestCfg_ReadUnifiedAlertingSettings(t *testing.T) {
require.ElementsMatch(t, []string{"hostname1:9090", "hostname2:9090", "hostname3:9090"}, cfg.UnifiedAlerting.HAPeers)
}
}
func TestUnifiedAlertingSettings(t *testing.T) {
testCases := []struct {
desc string
unifiedAlertingOptions map[string]string
alertingOptions map[string]string
verifyCfg func(*testing.T, Cfg)
}{
{
desc: "when the unified options do not equal the defaults, it should not apply the legacy ones",
unifiedAlertingOptions: map[string]string{
"admin_config_poll_interval": "120s",
"max_attempts": "6",
"min_interval": "60s",
"execute_alerts": "false",
"evaluation_timeout": "90s",
},
alertingOptions: map[string]string{
"max_attempts": strconv.FormatInt(schedulerDefaultMaxAttempts, 10),
"min_interval_seconds": strconv.FormatInt(schedulerDefaultLegacyMinInterval, 10),
"execute_alerts": strconv.FormatBool(schedulereDefaultExecuteAlerts),
"evaluation_timeout_seconds": strconv.FormatInt(int64(evaluatorDefaultEvaluationTimeout.Seconds()), 10),
},
verifyCfg: func(t *testing.T, cfg Cfg) {
require.Equal(t, 120*time.Second, cfg.UnifiedAlerting.AdminConfigPollInterval)
require.Equal(t, int64(6), cfg.UnifiedAlerting.MaxAttempts)
require.Equal(t, 60*time.Second, cfg.UnifiedAlerting.MinInterval)
require.Equal(t, false, cfg.UnifiedAlerting.ExecuteAlerts)
require.Equal(t, 90*time.Second, cfg.UnifiedAlerting.EvaluationTimeout)
},
},
{
desc: "when the unified options equal the defaults, it should apply the legacy ones",
unifiedAlertingOptions: map[string]string{
"admin_config_poll_interval": "120s",
"max_attempts": strconv.FormatInt(schedulerDefaultMaxAttempts, 10),
"min_interval": schedulerDefaultMinInterval.String(),
"execute_alerts": strconv.FormatBool(schedulereDefaultExecuteAlerts),
"evaluation_timeout": evaluatorDefaultEvaluationTimeout.String(),
},
alertingOptions: map[string]string{
"max_attempts": "12",
"min_interval_seconds": "120",
"execute_alerts": "true",
"evaluation_timeout_seconds": "160",
},
verifyCfg: func(t *testing.T, cfg Cfg) {
require.Equal(t, 120*time.Second, cfg.UnifiedAlerting.AdminConfigPollInterval)
require.Equal(t, int64(12), cfg.UnifiedAlerting.MaxAttempts)
require.Equal(t, 120*time.Second, cfg.UnifiedAlerting.MinInterval)
require.Equal(t, true, cfg.UnifiedAlerting.ExecuteAlerts)
require.Equal(t, 160*time.Second, cfg.UnifiedAlerting.EvaluationTimeout)
},
},
{
desc: "when both unified and legacy options are invalid, apply the defaults",
unifiedAlertingOptions: map[string]string{
"max_attempts": "invalid",
"min_interval": "invalid",
"execute_alerts": "invalid",
"evaluation_timeouts": "invalid",
},
alertingOptions: map[string]string{
"max_attempts": "invalid",
"min_interval_seconds": "invalid",
"execute_alerts": "invalid",
"evaluation_timeout_seconds": "invalid",
},
verifyCfg: func(t *testing.T, cfg Cfg) {
require.Equal(t, alertmanagerDefaultConfigPollInterval, cfg.UnifiedAlerting.AdminConfigPollInterval)
require.Equal(t, int64(schedulerDefaultMaxAttempts), cfg.UnifiedAlerting.MaxAttempts)
require.Equal(t, schedulerDefaultMinInterval, cfg.UnifiedAlerting.MinInterval)
require.Equal(t, schedulereDefaultExecuteAlerts, cfg.UnifiedAlerting.ExecuteAlerts)
require.Equal(t, evaluatorDefaultEvaluationTimeout, cfg.UnifiedAlerting.EvaluationTimeout)
},
},
{
desc: "when unified alerting options are invalid, apply legacy options",
unifiedAlertingOptions: map[string]string{
"max_attempts": "invalid",
"min_interval": "invalid",
"execute_alerts": "invalid",
"evaluation_timeout": "invalid",
},
alertingOptions: map[string]string{
"max_attempts": "12",
"min_interval_seconds": "120",
"execute_alerts": "false",
"evaluation_timeout_seconds": "160",
},
verifyCfg: func(t *testing.T, cfg Cfg) {
require.Equal(t, alertmanagerDefaultConfigPollInterval, cfg.UnifiedAlerting.AdminConfigPollInterval)
require.Equal(t, int64(12), cfg.UnifiedAlerting.MaxAttempts)
require.Equal(t, 120*time.Second, cfg.UnifiedAlerting.MinInterval)
require.Equal(t, false, cfg.UnifiedAlerting.ExecuteAlerts)
require.Equal(t, 160*time.Second, cfg.UnifiedAlerting.EvaluationTimeout)
},
},
}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
f := ini.Empty()
cfg := NewCfg()
unifiedAlertingSec, err := f.NewSection("unified_alerting")
require.NoError(t, err)
for k, v := range tc.unifiedAlertingOptions {
_, err = unifiedAlertingSec.NewKey(k, v)
require.NoError(t, err)
}
alertingSec, err := f.NewSection("alerting")
require.NoError(t, err)
for k, v := range tc.alertingOptions {
_, err = alertingSec.NewKey(k, v)
require.NoError(t, err)
}
err = cfg.ReadUnifiedAlertingSettings(f)
require.NoError(t, err)
tc.verifyCfg(t, *cfg)
})
}
}