Alerting: Make retention period configurable for the notification log (#85605)

* Alerting: Make retention period configurable for the notification log

* update sample.ini

* fix outdated comment (on disk -> kvstore)

* skip checking cyclomatic complexity for ReadUnifiedAlertingSettings
This commit is contained in:
Santiago
2024-04-05 12:25:43 +02:00
committed by GitHub
parent b19c246a39
commit c7573bb0f7
4 changed files with 25 additions and 10 deletions

View File

@@ -1240,6 +1240,9 @@ state_periodic_save_interval = 5m
# Rules will evaluate in sync.
disable_jitter = false
# Retention period for Alertmanager notification log entries.
notification_log_retention = 5d
[unified_alerting.screenshots]
# Enable screenshots in notifications. You must have either installed the Grafana image rendering
# plugin, or set up Grafana to use a remote rendering service.

View File

@@ -1156,6 +1156,9 @@
# Rules will evaluate in sync.
;disable_jitter = false
# Retention period for Alertmanager notification log entries.
;notification_log_retention = 5d
[unified_alerting.reserved_labels]
# Comma-separated list of reserved labels added by the Grafana Alerting engine that should be disabled.
# For example: `disabled_labels=grafana_folder`

View File

@@ -26,13 +26,12 @@ import (
)
const (
// maintenanceNotificationAndSilences how often should we flush and garbage collect notifications
notificationLogMaintenanceInterval = 15 * time.Minute
)
// How often we flush and garbage collect notifications and silences.
maintenanceInterval = 15 * time.Minute
// How long should we keep silences and notification entries on-disk after they've served their purpose.
var retentionNotificationsAndSilences = 5 * 24 * time.Hour
var silenceMaintenanceInterval = 15 * time.Minute
// How long we keep silences in the kvstore after they've expired.
silenceRetention = 5 * 24 * time.Hour
)
type AlertingStore interface {
store.AlertingStore
@@ -104,8 +103,8 @@ func NewAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store A
silencesOptions := maintenanceOptions{
initialState: silences,
retention: retentionNotificationsAndSilences,
maintenanceFrequency: silenceMaintenanceInterval,
retention: silenceRetention,
maintenanceFrequency: maintenanceInterval,
maintenanceFunc: func(state alertingNotify.State) (int64, error) {
// Detached context here is to make sure that when the service is shut down the persist operation is executed.
return stateStore.SaveSilences(context.Background(), state)
@@ -114,8 +113,8 @@ func NewAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store A
nflogOptions := maintenanceOptions{
initialState: nflog,
retention: retentionNotificationsAndSilences,
maintenanceFrequency: notificationLogMaintenanceInterval,
retention: cfg.UnifiedAlerting.NotificationLogRetention,
maintenanceFrequency: maintenanceInterval,
maintenanceFunc: func(state alertingNotify.State) (int64, error) {
// Detached context here is to make sure that when the service is shut down the persist operation is executed.
return stateStore.SaveNotificationLog(context.Background(), state)

View File

@@ -101,6 +101,9 @@ type UnifiedAlertingSettings struct {
MaxStateSaveConcurrency int
StatePeriodicSaveInterval time.Duration
RulesPerRuleGroupLimit int64
// Retention period for Alertmanager notification log entries.
NotificationLogRetention time.Duration
}
// RemoteAlertmanagerSettings contains the configuration needed
@@ -173,6 +176,8 @@ func (cfg *Cfg) readUnifiedAlertingEnabledSetting(section *ini.Section) (*bool,
// ReadUnifiedAlertingSettings reads both the `unified_alerting` and `alerting` sections of the configuration while preferring configuration the `alerting` section.
// It first reads the `unified_alerting` section, then looks for non-defaults on the `alerting` section and prefers those.
//
// nolint: gocyclo
func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
var err error
uaCfg := UnifiedAlertingSettings{}
@@ -378,6 +383,11 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
return err
}
uaCfg.NotificationLogRetention, err = gtime.ParseDuration(valueAsString(ua, "notification_log_retention", (5 * 24 * time.Hour).String()))
if err != nil {
return err
}
cfg.UnifiedAlerting = uaCfg
return nil
}