Alerting: Move BaseInterval and MinInterval to UnifiedAlerting config (#45107)

* use base interval if legacy value is less than the base interval
This commit is contained in:
Yuriy Tseretyan 2022-02-11 16:13:49 -05:00 committed by GitHub
parent 4f815e3d8e
commit 095ea44e97
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 145 additions and 137 deletions

View File

@ -3,7 +3,6 @@ package ngalert
import (
"context"
"net/url"
"time"
"github.com/benbjohnson/clock"
"golang.org/x/sync/errgroup"
@ -28,17 +27,6 @@ import (
"github.com/grafana/grafana/pkg/setting"
)
const (
// scheduler interval
// changing this value is discouraged
// because this could cause existing alert definition
// with intervals that are not exactly divided by this number
// not to be evaluated
defaultBaseIntervalSeconds = 10
// default alert definition interval
defaultIntervalSeconds int64 = 6 * defaultBaseIntervalSeconds
)
func ProvideService(cfg *setting.Cfg, dataSourceCache datasources.CacheService, routeRegister routing.RouteRegister,
sqlStore *sqlstore.SQLStore, kvStore kvstore.KVStore, expressionService *expr.Service, dataProxy *datasourceproxy.DataSourceProxyService,
quotaService *quota.QuotaService, secretsService secrets.Service, notificationService notifications.Service, m *metrics.NGAlert) (*AlertNG, error) {
@ -92,15 +80,9 @@ type AlertNG struct {
func (ng *AlertNG) init() error {
var err error
baseInterval := ng.Cfg.AlertingBaseInterval
if baseInterval <= 0 {
baseInterval = defaultBaseIntervalSeconds
}
baseInterval *= time.Second
store := &store.DBstore{
BaseInterval: baseInterval,
DefaultInterval: ng.getRuleDefaultInterval(),
BaseInterval: ng.Cfg.UnifiedAlerting.BaseInterval,
DefaultInterval: ng.Cfg.UnifiedAlerting.DefaultAlertForDuration,
SQLStore: ng.SQLStore,
Logger: ng.Log,
}
@ -119,7 +101,7 @@ func (ng *AlertNG) init() error {
schedCfg := schedule.SchedulerCfg{
C: clock.New(),
BaseInterval: baseInterval,
BaseInterval: ng.Cfg.UnifiedAlerting.BaseInterval,
Logger: ng.Log,
MaxAttempts: ng.Cfg.UnifiedAlerting.MaxAttempts,
Evaluator: eval.NewEvaluator(ng.Cfg, ng.Log, ng.DataSourceCache, ng.SecretsService),
@ -131,7 +113,7 @@ func (ng *AlertNG) init() error {
Metrics: ng.Metrics.GetSchedulerMetrics(),
AdminConfigPollInterval: ng.Cfg.UnifiedAlerting.AdminConfigPollInterval,
DisabledOrgs: ng.Cfg.UnifiedAlerting.DisabledOrgs,
MinRuleInterval: ng.getRuleMinInterval(),
MinRuleInterval: ng.Cfg.UnifiedAlerting.MinInterval,
}
appUrl, err := url.Parse(ng.Cfg.AppURL)
@ -191,30 +173,3 @@ func (ng *AlertNG) IsDisabled() bool {
}
return !ng.Cfg.UnifiedAlerting.IsEnabled()
}
// getRuleDefaultIntervalSeconds returns the default rule interval if the interval is not set.
// If this constant (1 minute) is lower than the configured minimum evaluation interval then
// this configuration is returned.
func (ng *AlertNG) getRuleDefaultInterval() time.Duration {
ruleMinInterval := ng.getRuleMinInterval()
if defaultIntervalSeconds < int64(ruleMinInterval.Seconds()) {
return ruleMinInterval
}
return time.Duration(defaultIntervalSeconds) * time.Second
}
// getRuleMinIntervalSeconds returns the configured minimum rule interval.
// If this value is less or equal to zero or not divided exactly by the scheduler interval
// the scheduler interval (10 seconds) is returned.
func (ng *AlertNG) getRuleMinInterval() time.Duration {
if ng.Cfg.UnifiedAlerting.MinInterval <= 0 {
return defaultBaseIntervalSeconds // if it's not configured; apply default
}
if ng.Cfg.UnifiedAlerting.MinInterval%defaultBaseIntervalSeconds != 0 {
ng.Log.Error("Configured minimum evaluation interval is not divided exactly by the scheduler interval and it will fallback to default", "alertingMinInterval", ng.Cfg.UnifiedAlerting.MinInterval, "baseIntervalSeconds", defaultBaseIntervalSeconds, "defaultIntervalSeconds", defaultIntervalSeconds)
return defaultBaseIntervalSeconds // if it's invalid; apply default
}
return ng.Cfg.UnifiedAlerting.MinInterval
}

View File

@ -1,73 +0,0 @@
package ngalert
import (
"testing"
"time"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/setting"
"github.com/stretchr/testify/require"
)
func TestGetRuleDefaultIntervalSeconds(t *testing.T) {
testCases := []struct {
desc string
alertingMinIntervalCfg time.Duration
// the expected default rule interval (applied if a rule interval is missing)
expDefaultInterval time.Duration
// the expected minimum rule interval (enforced if a rule interval is lower than this value; it is used also for computing the default rule interval)
expMinInterval time.Duration
}{
{
desc: "negative min rule interval",
alertingMinIntervalCfg: -1,
expDefaultInterval: time.Duration(defaultIntervalSeconds) * time.Second, // 60s
expMinInterval: defaultBaseIntervalSeconds, // 10s
},
{
desc: "zero min rule interval",
alertingMinIntervalCfg: 0,
expDefaultInterval: time.Duration(defaultIntervalSeconds) * time.Second, // 60s
expMinInterval: defaultBaseIntervalSeconds, // 10s
},
{
desc: "min rule interval not divided exactly by the scheduler interval",
alertingMinIntervalCfg: 1,
expDefaultInterval: time.Duration(defaultIntervalSeconds) * time.Second, // 60s
expMinInterval: defaultBaseIntervalSeconds, // 10s
},
{
desc: "min rule interval equals base scheduler interval",
alertingMinIntervalCfg: defaultBaseIntervalSeconds, // 10s
expDefaultInterval: time.Duration(defaultIntervalSeconds) * time.Second, // 60s
expMinInterval: defaultBaseIntervalSeconds, // 10s
},
{
desc: "valid min rule interval less than default rule interval",
alertingMinIntervalCfg: time.Duration(defaultIntervalSeconds-defaultBaseIntervalSeconds) * time.Second, // 50s
expDefaultInterval: time.Duration(defaultIntervalSeconds) * time.Second, // 60s
expMinInterval: time.Duration(defaultIntervalSeconds-defaultBaseIntervalSeconds) * time.Second, // 50s
},
{
desc: "valid min rule interval greater than default rule interval",
alertingMinIntervalCfg: time.Duration(defaultIntervalSeconds+defaultBaseIntervalSeconds) * time.Second, // 70s
expDefaultInterval: time.Duration(defaultIntervalSeconds+defaultBaseIntervalSeconds) * time.Second, // 70s
expMinInterval: time.Duration(defaultIntervalSeconds+defaultBaseIntervalSeconds) * time.Second, // 70s
},
}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
alertNG := AlertNG{
Log: log.New("test"),
Cfg: &setting.Cfg{
UnifiedAlerting: setting.UnifiedAlertingSettings{
MinInterval: tc.alertingMinIntervalCfg,
},
},
}
require.Equal(t, tc.expDefaultInterval, alertNG.getRuleDefaultInterval())
require.Equal(t, tc.expMinInterval, alertNG.getRuleMinInterval())
})
}
}

View File

@ -30,7 +30,9 @@ func SetupTestEnv(t *testing.T, baseInterval time.Duration) (*ngalert.AlertNG, *
t.Helper()
cfg := setting.NewCfg()
cfg.AlertingBaseInterval = baseInterval
cfg.UnifiedAlerting = setting.UnifiedAlertingSettings{
BaseInterval: setting.SchedulerBaseInterval,
}
// AlertNG database migrations run and the relative database tables are created only when it's enabled
cfg.UnifiedAlerting.Enabled = new(bool)
*cfg.UnifiedAlerting.Enabled = true

View File

@ -418,12 +418,6 @@ type Cfg struct {
// Grafana.com URL
GrafanaComURL string
// Alerting
// AlertingBaseInterval controls the alerting base interval in seconds.
// Only for internal use and not user configuration.
AlertingBaseInterval time.Duration
// Geomap base layer config
GeomapDefaultBaseLayerConfig map[string]interface{}
GeomapEnableCustomBaseLayers bool

View File

@ -2,6 +2,7 @@ package setting
import (
"errors"
"fmt"
"strconv"
"strings"
"time"
@ -47,7 +48,12 @@ const (
schedulereDefaultExecuteAlerts = true
schedulerDefaultMaxAttempts = 3
schedulerDefaultLegacyMinInterval = 1
schedulerDefaultMinInterval = 10 * time.Second
// SchedulerBaseInterval base interval of the scheduler. Controls how often the scheduler fetches database for new changes as well as schedules evaluation of a rule
// changing this value is discouraged because this could cause existing alert definition
// with intervals that are not exactly divided by this number not to be evaluated
SchedulerBaseInterval = 10 * time.Second
// DefaultAlertForDuration indicates a default interval of for how long a rule should be evaluated to change state from Pending to Alerting
DefaultAlertForDuration = 60 * time.Second
)
type UnifiedAlertingSettings struct {
@ -66,6 +72,11 @@ type UnifiedAlertingSettings struct {
DefaultConfiguration string
Enabled *bool // determines whether unified alerting is enabled. If it is nil then user did not define it and therefore its value will be determined during migration. Services should not use it directly.
DisabledOrgs map[int64]struct{}
// BaseInterval interval of time the scheduler updates the rules and evaluates rules.
// Only for internal use and not user configuration.
BaseInterval time.Duration
// DefaultAlertForDuration default time for how long an alert rule should be evaluated before change state.
DefaultAlertForDuration time.Duration
}
// IsEnabled returns true if UnifiedAlertingSettings.Enabled is either nil or true.
@ -205,17 +216,34 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
}
uaCfg.MaxAttempts = uaMaxAttempts
uaMinInterval, err := gtime.ParseDuration(valueAsString(ua, "min_interval", schedulerDefaultMinInterval.String()))
if err != nil || uaMinInterval == schedulerDefaultMinInterval { // unified option is invalid duration or equals the default
uaCfg.BaseInterval = SchedulerBaseInterval
uaMinInterval, err := gtime.ParseDuration(valueAsString(ua, "min_interval", uaCfg.BaseInterval.String()))
if err != nil || uaMinInterval == uaCfg.BaseInterval { // unified option is invalid duration or equals the default
// if the legacy option is invalid, fallback to 10 (unified alerting min interval default)
legacyMinInterval := time.Duration(alerting.Key("min_interval_seconds").MustInt64(int64(schedulerDefaultMinInterval.Seconds()))) * time.Second
if legacyMinInterval != schedulerDefaultLegacyMinInterval {
legacyMinInterval := time.Duration(alerting.Key("min_interval_seconds").MustInt64(int64(uaCfg.BaseInterval.Seconds()))) * time.Second
if legacyMinInterval > uaCfg.BaseInterval {
cfg.Logger.Warn("falling back to legacy setting of 'min_interval_seconds'; please use the configuration option in the `unified_alerting` section if Grafana 8 alerts are enabled.")
uaMinInterval = legacyMinInterval
} else {
// if legacy interval is smaller than the base interval, adjust it to the base interval
uaMinInterval = uaCfg.BaseInterval
}
uaMinInterval = legacyMinInterval
}
if uaMinInterval < uaCfg.BaseInterval {
return fmt.Errorf("value of setting 'min_interval' should be greater than the base interval (%v)", uaCfg.BaseInterval)
}
if uaMinInterval%uaCfg.BaseInterval != 0 {
return fmt.Errorf("value of setting 'min_interval' should be times of base interval (%v)", uaCfg.BaseInterval)
}
uaCfg.MinInterval = uaMinInterval
uaCfg.DefaultAlertForDuration = DefaultAlertForDuration
if uaMinInterval > uaCfg.DefaultAlertForDuration {
uaCfg.DefaultAlertForDuration = uaMinInterval
}
cfg.UnifiedAlerting = uaCfg
return nil
}

View File

@ -1,6 +1,7 @@
package setting
import (
"math/rand"
"strconv"
"testing"
"time"
@ -68,6 +69,8 @@ func TestUnifiedAlertingSettings(t *testing.T) {
require.Equal(t, 60*time.Second, cfg.UnifiedAlerting.MinInterval)
require.Equal(t, false, cfg.UnifiedAlerting.ExecuteAlerts)
require.Equal(t, 90*time.Second, cfg.UnifiedAlerting.EvaluationTimeout)
require.Equal(t, SchedulerBaseInterval, cfg.UnifiedAlerting.BaseInterval)
require.Equal(t, DefaultAlertForDuration, cfg.UnifiedAlerting.DefaultAlertForDuration)
},
},
{
@ -75,7 +78,7 @@ func TestUnifiedAlertingSettings(t *testing.T) {
unifiedAlertingOptions: map[string]string{
"admin_config_poll_interval": "120s",
"max_attempts": strconv.FormatInt(schedulerDefaultMaxAttempts, 10),
"min_interval": schedulerDefaultMinInterval.String(),
"min_interval": SchedulerBaseInterval.String(),
"execute_alerts": strconv.FormatBool(schedulereDefaultExecuteAlerts),
"evaluation_timeout": evaluatorDefaultEvaluationTimeout.String(),
},
@ -91,6 +94,8 @@ func TestUnifiedAlertingSettings(t *testing.T) {
require.Equal(t, 120*time.Second, cfg.UnifiedAlerting.MinInterval)
require.Equal(t, true, cfg.UnifiedAlerting.ExecuteAlerts)
require.Equal(t, 160*time.Second, cfg.UnifiedAlerting.EvaluationTimeout)
require.Equal(t, SchedulerBaseInterval, cfg.UnifiedAlerting.BaseInterval)
require.Equal(t, 120*time.Second, cfg.UnifiedAlerting.DefaultAlertForDuration)
},
},
{
@ -110,9 +115,11 @@ func TestUnifiedAlertingSettings(t *testing.T) {
verifyCfg: func(t *testing.T, cfg Cfg) {
require.Equal(t, alertmanagerDefaultConfigPollInterval, cfg.UnifiedAlerting.AdminConfigPollInterval)
require.Equal(t, int64(schedulerDefaultMaxAttempts), cfg.UnifiedAlerting.MaxAttempts)
require.Equal(t, schedulerDefaultMinInterval, cfg.UnifiedAlerting.MinInterval)
require.Equal(t, SchedulerBaseInterval, cfg.UnifiedAlerting.MinInterval)
require.Equal(t, schedulereDefaultExecuteAlerts, cfg.UnifiedAlerting.ExecuteAlerts)
require.Equal(t, evaluatorDefaultEvaluationTimeout, cfg.UnifiedAlerting.EvaluationTimeout)
require.Equal(t, SchedulerBaseInterval, cfg.UnifiedAlerting.BaseInterval)
require.Equal(t, DefaultAlertForDuration, cfg.UnifiedAlerting.DefaultAlertForDuration)
},
},
{
@ -135,6 +142,8 @@ func TestUnifiedAlertingSettings(t *testing.T) {
require.Equal(t, 120*time.Second, cfg.UnifiedAlerting.MinInterval)
require.Equal(t, false, cfg.UnifiedAlerting.ExecuteAlerts)
require.Equal(t, 160*time.Second, cfg.UnifiedAlerting.EvaluationTimeout)
require.Equal(t, SchedulerBaseInterval, cfg.UnifiedAlerting.BaseInterval)
require.Equal(t, 120*time.Second, cfg.UnifiedAlerting.DefaultAlertForDuration)
},
},
}
@ -162,3 +171,96 @@ func TestUnifiedAlertingSettings(t *testing.T) {
})
}
}
func TestMinInterval(t *testing.T) {
randPredicate := func(predicate func(dur time.Duration) bool) *time.Duration {
for {
v := time.Duration(rand.Intn(99)+1) * time.Second
if predicate(v) {
return &v
}
}
}
testCases := []struct {
desc string
minInterval *time.Duration
legacyMinInterval *time.Duration
verifyCfg func(*testing.T, *Cfg, error)
}{
{
desc: "should fail if min interval is less than base interval",
minInterval: randPredicate(func(dur time.Duration) bool { return dur < SchedulerBaseInterval }),
verifyCfg: func(t *testing.T, cfg *Cfg, err error) {
require.Error(t, err)
require.Contains(t, err.Error(), "min_interval")
},
},
{
desc: "should fail if min interval is not multiple of base interval",
minInterval: randPredicate(func(dur time.Duration) bool { return dur > SchedulerBaseInterval && dur%SchedulerBaseInterval != 0 }),
verifyCfg: func(t *testing.T, cfg *Cfg, err error) {
require.Error(t, err)
require.Contains(t, err.Error(), "min_interval")
},
},
{
desc: "should not fail if min interval is multiple of base interval",
minInterval: randPredicate(func(dur time.Duration) bool { return dur > SchedulerBaseInterval && dur%SchedulerBaseInterval == 0 }),
verifyCfg: func(t *testing.T, cfg *Cfg, err error) {
require.NoError(t, err)
},
},
{
desc: "should fail if fallback to legacy min interval and it is not multiple of base interval",
legacyMinInterval: randPredicate(func(dur time.Duration) bool { return dur > SchedulerBaseInterval && dur%SchedulerBaseInterval != 0 }),
verifyCfg: func(t *testing.T, cfg *Cfg, err error) {
require.Error(t, err)
require.Contains(t, err.Error(), "min_interval")
},
},
{
desc: "should not fail if fallback to legacy min interval it is multiple of base interval",
legacyMinInterval: randPredicate(func(dur time.Duration) bool { return dur >= SchedulerBaseInterval && dur%SchedulerBaseInterval == 0 }),
verifyCfg: func(t *testing.T, cfg *Cfg, err error) {
require.NoError(t, err)
},
},
{
desc: "should adjust DefaultAlertForDuration to min interval if it is greater",
minInterval: randPredicate(func(dur time.Duration) bool { return dur%SchedulerBaseInterval == 0 && dur > DefaultAlertForDuration }),
verifyCfg: func(t *testing.T, cfg *Cfg, err error) {
require.Equal(t, cfg.UnifiedAlerting.MinInterval, cfg.UnifiedAlerting.DefaultAlertForDuration)
},
},
{
desc: "should fallback to the default if legacy interval is less than base",
legacyMinInterval: randPredicate(func(dur time.Duration) bool { return dur < SchedulerBaseInterval }),
verifyCfg: func(t *testing.T, cfg *Cfg, err error) {
require.Equal(t, SchedulerBaseInterval, cfg.UnifiedAlerting.MinInterval)
},
},
}
for _, testCase := range testCases {
t.Run(testCase.desc, func(t *testing.T) {
f := ini.Empty()
if testCase.minInterval != nil {
section, err := f.NewSection("unified_alerting")
require.NoError(t, err)
_, err = section.NewKey("min_interval", testCase.minInterval.String())
require.NoError(t, err)
}
if testCase.legacyMinInterval != nil {
alertingSec, err := f.NewSection("alerting")
require.NoError(t, err)
_, err = alertingSec.NewKey("min_interval_seconds", strconv.Itoa(int(testCase.legacyMinInterval.Seconds())))
require.NoError(t, err)
}
cfg := NewCfg()
cfg.IsFeatureToggleEnabled = func(key string) bool { return false }
err := cfg.ReadUnifiedAlertingSettings(f)
testCase.verifyCfg(t, cfg, err)
})
}
}