2021-09-16 09:33:51 -05:00
package setting
import (
2021-09-29 09:16:40 -05:00
"errors"
2022-02-11 15:13:49 -06:00
"fmt"
2021-09-29 09:16:40 -05:00
"strconv"
2021-09-16 09:33:51 -05:00
"strings"
"time"
2021-09-21 06:08:52 -05:00
"github.com/grafana/grafana-plugin-sdk-go/backend/gtime"
2021-09-16 09:33:51 -05:00
"github.com/prometheus/alertmanager/cluster"
"gopkg.in/ini.v1"
2023-01-30 02:26:42 -06:00
"github.com/grafana/grafana/pkg/util"
2021-09-16 09:33:51 -05:00
)
const (
2021-09-28 05:00:16 -05:00
alertmanagerDefaultClusterAddr = "0.0.0.0:9094"
alertmanagerDefaultPeerTimeout = 15 * time . Second
alertmanagerDefaultGossipInterval = cluster . DefaultGossipInterval
alertmanagerDefaultPushPullInterval = cluster . DefaultPushPullInterval
2022-11-22 01:09:15 -06:00
alertmanagerDefaultConfigPollInterval = time . Minute
2021-09-23 12:52:20 -05:00
// To start, the alertmanager needs at least one route defined.
// TODO: we should move this to Grafana settings and define this as the default.
2021-09-28 05:00:16 -05:00
alertmanagerDefaultConfiguration = ` {
2021-09-23 12:52:20 -05:00
"alertmanager_config" : {
"route" : {
2022-07-11 11:24:43 -05:00
"receiver" : "grafana-default-email" ,
"group_by" : [ "grafana_folder" , "alertname" ]
2021-09-23 12:52:20 -05:00
} ,
"receivers" : [ {
"name" : "grafana-default-email" ,
"grafana_managed_receiver_configs" : [ {
"uid" : "" ,
"name" : "email receiver" ,
"type" : "email" ,
"isDefault" : true ,
"settings" : {
"addresses" : "<example@email.com>"
}
} ]
} ]
}
}
`
2021-09-28 05:00:16 -05:00
evaluatorDefaultEvaluationTimeout = 30 * time . Second
2022-11-22 01:09:15 -06:00
schedulerDefaultAdminConfigPollInterval = time . Minute
2021-09-28 05:00:16 -05:00
schedulereDefaultExecuteAlerts = true
schedulerDefaultMaxAttempts = 3
schedulerDefaultLegacyMinInterval = 1
2022-06-07 22:04:51 -05:00
screenshotsDefaultCapture = false
2023-01-05 10:07:46 -06:00
screenshotsDefaultCaptureTimeout = 10 * time . Second
screenshotsMaxCaptureTimeout = 30 * time . Second
2022-05-22 09:33:49 -05:00
screenshotsDefaultMaxConcurrent = 5
screenshotsDefaultUploadImageStorage = false
2022-02-11 15:13:49 -06:00
// SchedulerBaseInterval base interval of the scheduler. Controls how often the scheduler fetches database for new changes as well as schedules evaluation of a rule
// changing this value is discouraged because this could cause existing alert definition
// with intervals that are not exactly divided by this number not to be evaluated
SchedulerBaseInterval = 10 * time . Second
2022-02-18 09:05:06 -06:00
// DefaultRuleEvaluationInterval indicates a default interval of for how long a rule should be evaluated to change state from Pending to Alerting
DefaultRuleEvaluationInterval = SchedulerBaseInterval * 6 // == 60 seconds
2023-01-05 12:21:07 -06:00
stateHistoryDefaultEnabled = true
2021-09-16 09:33:51 -05:00
)
2021-09-20 02:12:21 -05:00
type UnifiedAlertingSettings struct {
AdminConfigPollInterval time . Duration
AlertmanagerConfigPollInterval time . Duration
HAListenAddr string
HAAdvertiseAddr string
HAPeers [ ] string
HAPeerTimeout time . Duration
HAGossipInterval time . Duration
HAPushPullInterval time . Duration
2021-09-28 05:00:16 -05:00
MaxAttempts int64
MinInterval time . Duration
EvaluationTimeout time . Duration
ExecuteAlerts bool
2021-09-23 12:52:20 -05:00
DefaultConfiguration string
2021-11-24 13:56:07 -06:00
Enabled * bool // determines whether unified alerting is enabled. If it is nil then user did not define it and therefore its value will be determined during migration. Services should not use it directly.
2021-09-29 09:16:40 -05:00
DisabledOrgs map [ int64 ] struct { }
2022-02-11 15:13:49 -06:00
// BaseInterval interval of time the scheduler updates the rules and evaluates rules.
// Only for internal use and not user configuration.
BaseInterval time . Duration
2022-02-18 09:05:06 -06:00
// DefaultRuleEvaluationInterval default interval between evaluations of a rule.
DefaultRuleEvaluationInterval time . Duration
2022-05-22 09:33:49 -05:00
Screenshots UnifiedAlertingScreenshotSettings
2022-07-11 11:41:40 -05:00
ReservedLabels UnifiedAlertingReservedLabelSettings
2023-01-05 12:21:07 -06:00
StateHistory UnifiedAlertingStateHistorySettings
2022-05-22 09:33:49 -05:00
}
type UnifiedAlertingScreenshotSettings struct {
2022-06-07 22:04:51 -05:00
Capture bool
2023-01-05 10:07:46 -06:00
CaptureTimeout time . Duration
2022-05-22 09:33:49 -05:00
MaxConcurrentScreenshots int64
UploadExternalImageStorage bool
2021-09-20 02:12:21 -05:00
}
2022-07-11 11:41:40 -05:00
type UnifiedAlertingReservedLabelSettings struct {
DisabledLabels map [ string ] struct { }
}
2023-01-05 12:21:07 -06:00
type UnifiedAlertingStateHistorySettings struct {
2023-01-17 13:58:52 -06:00
Enabled bool
Backend string
LokiRemoteURL string
2023-01-30 16:30:05 -06:00
LokiReadURL string
LokiWriteURL string
2023-01-18 13:24:40 -06:00
LokiTenantID string
// LokiBasicAuthUsername and LokiBasicAuthPassword are used for basic auth
// if one of them is set.
LokiBasicAuthPassword string
LokiBasicAuthUsername string
2023-01-30 14:24:45 -06:00
ExternalLabels map [ string ] string
2023-01-05 12:21:07 -06:00
}
2021-11-24 13:56:07 -06:00
// IsEnabled returns true if UnifiedAlertingSettings.Enabled is either nil or true.
// It hides the implementation details of the Enabled and simplifies its usage.
func ( u * UnifiedAlertingSettings ) IsEnabled ( ) bool {
return u . Enabled == nil || * u . Enabled
}
2022-07-11 11:41:40 -05:00
// IsReservedLabelDisabled returns true if UnifiedAlertingReservedLabelSettings.DisabledLabels contains the given reserved label.
func ( u * UnifiedAlertingReservedLabelSettings ) IsReservedLabelDisabled ( label string ) bool {
_ , ok := u . DisabledLabels [ label ]
return ok
}
2022-05-30 10:47:15 -05:00
// readUnifiedAlertingEnabledSettings reads the settings for unified alerting.
// It returns a non-nil bool and a nil error when unified alerting is enabled either
// because it has been enabled in the settings or by default. It returns nil and
// a non-nil error both unified alerting and legacy alerting are enabled at the same time.
2021-11-24 13:56:07 -06:00
func ( cfg * Cfg ) readUnifiedAlertingEnabledSetting ( section * ini . Section ) ( * bool , error ) {
2022-05-30 10:47:15 -05:00
// At present an invalid value is considered the same as no value. This means that a
// spelling mistake in the string "false" could enable unified alerting rather
// than disable it. This issue can be found here
2022-06-10 02:59:58 -05:00
hasEnabled := section . Key ( "enabled" ) . Value ( ) != ""
if ! hasEnabled {
2021-11-24 13:56:07 -06:00
// TODO: Remove in Grafana v9
2022-01-26 11:44:20 -06:00
if cfg . IsFeatureToggleEnabled ( "ngalert" ) {
2021-11-24 13:56:07 -06:00
cfg . Logger . Warn ( "ngalert feature flag is deprecated: use unified alerting enabled setting instead" )
2022-05-30 10:47:15 -05:00
// feature flag overrides the legacy alerting setting
2021-11-24 13:56:07 -06:00
legacyAlerting := false
AlertingEnabled = & legacyAlerting
2022-06-10 02:59:58 -05:00
unifiedAlerting := true
2022-05-30 10:47:15 -05:00
return & unifiedAlerting , nil
2021-11-24 13:56:07 -06:00
}
2022-05-30 10:47:15 -05:00
// if legacy alerting has not been configured then enable unified alerting
if AlertingEnabled == nil {
2022-06-10 02:59:58 -05:00
unifiedAlerting := true
2022-05-30 10:47:15 -05:00
return & unifiedAlerting , nil
2021-11-24 13:56:07 -06:00
}
2022-05-30 10:47:15 -05:00
// enable unified alerting and disable legacy alerting
legacyAlerting := false
AlertingEnabled = & legacyAlerting
2022-06-10 02:59:58 -05:00
unifiedAlerting := true
2022-05-30 10:47:15 -05:00
return & unifiedAlerting , nil
2021-11-24 13:56:07 -06:00
}
2022-06-10 02:59:58 -05:00
unifiedAlerting , err := section . Key ( "enabled" ) . Bool ( )
if err != nil {
// the value for unified alerting is invalid so disable all alerting
legacyAlerting := false
AlertingEnabled = & legacyAlerting
return nil , fmt . Errorf ( "invalid value %s, should be either true or false" , section . Key ( "enabled" ) )
}
2022-05-30 10:47:15 -05:00
// If both legacy and unified alerting are enabled then return an error
if AlertingEnabled != nil && * AlertingEnabled && unifiedAlerting {
return nil , errors . New ( "legacy and unified alerting cannot both be enabled at the same time, please disable one of them and restart Grafana" )
2021-11-24 13:56:07 -06:00
}
2022-05-30 10:47:15 -05:00
2021-11-24 13:56:07 -06:00
if AlertingEnabled == nil {
2022-05-30 10:47:15 -05:00
legacyAlerting := ! unifiedAlerting
AlertingEnabled = & legacyAlerting
2021-11-24 13:56:07 -06:00
}
2022-05-30 10:47:15 -05:00
return & unifiedAlerting , nil
2021-11-24 13:56:07 -06:00
}
2021-09-28 05:00:16 -05:00
// ReadUnifiedAlertingSettings reads both the `unified_alerting` and `alerting` sections of the configuration while preferring configuration the `alerting` section.
// It first reads the `unified_alerting` section, then looks for non-defaults on the `alerting` section and prefers those.
2021-09-16 09:33:51 -05:00
func ( cfg * Cfg ) ReadUnifiedAlertingSettings ( iniFile * ini . File ) error {
2021-11-24 13:56:07 -06:00
var err error
2021-09-20 02:12:21 -05:00
uaCfg := UnifiedAlertingSettings { }
2021-09-16 09:33:51 -05:00
ua := iniFile . Section ( "unified_alerting" )
2021-11-24 13:56:07 -06:00
uaCfg . Enabled , err = cfg . readUnifiedAlertingEnabledSetting ( ua )
if err != nil {
2022-06-10 02:59:58 -05:00
return fmt . Errorf ( "failed to read unified alerting enabled setting: %w" , err )
2021-09-29 09:16:40 -05:00
}
uaCfg . DisabledOrgs = make ( map [ int64 ] struct { } )
orgsStr := valueAsString ( ua , "disabled_orgs" , "" )
for _ , org := range util . SplitString ( orgsStr ) {
orgID , err := strconv . ParseInt ( org , 10 , 64 )
if err != nil {
return err
}
uaCfg . DisabledOrgs [ orgID ] = struct { } { }
}
2021-09-28 05:00:16 -05:00
uaCfg . AdminConfigPollInterval , err = gtime . ParseDuration ( valueAsString ( ua , "admin_config_poll_interval" , ( schedulerDefaultAdminConfigPollInterval ) . String ( ) ) )
2021-09-16 09:33:51 -05:00
if err != nil {
return err
}
2021-09-28 05:00:16 -05:00
uaCfg . AlertmanagerConfigPollInterval , err = gtime . ParseDuration ( valueAsString ( ua , "alertmanager_config_poll_interval" , ( alertmanagerDefaultConfigPollInterval ) . String ( ) ) )
2021-09-16 09:33:51 -05:00
if err != nil {
return err
}
2021-09-28 05:00:16 -05:00
uaCfg . HAPeerTimeout , err = gtime . ParseDuration ( valueAsString ( ua , "ha_peer_timeout" , ( alertmanagerDefaultPeerTimeout ) . String ( ) ) )
2021-09-16 09:33:51 -05:00
if err != nil {
return err
}
2021-09-28 05:00:16 -05:00
uaCfg . HAGossipInterval , err = gtime . ParseDuration ( valueAsString ( ua , "ha_gossip_interval" , ( alertmanagerDefaultGossipInterval ) . String ( ) ) )
2021-09-16 09:33:51 -05:00
if err != nil {
return err
}
2021-09-28 05:00:16 -05:00
uaCfg . HAPushPullInterval , err = gtime . ParseDuration ( valueAsString ( ua , "ha_push_pull_interval" , ( alertmanagerDefaultPushPullInterval ) . String ( ) ) )
2021-09-16 09:33:51 -05:00
if err != nil {
return err
}
2021-09-28 05:00:16 -05:00
uaCfg . HAListenAddr = ua . Key ( "ha_listen_address" ) . MustString ( alertmanagerDefaultClusterAddr )
2021-09-20 02:12:21 -05:00
uaCfg . HAAdvertiseAddr = ua . Key ( "ha_advertise_address" ) . MustString ( "" )
2021-09-16 09:33:51 -05:00
peers := ua . Key ( "ha_peers" ) . MustString ( "" )
2021-09-20 02:12:21 -05:00
uaCfg . HAPeers = make ( [ ] string , 0 )
2021-09-16 09:33:51 -05:00
if peers != "" {
for _ , peer := range strings . Split ( peers , "," ) {
peer = strings . TrimSpace ( peer )
2021-09-20 02:12:21 -05:00
uaCfg . HAPeers = append ( uaCfg . HAPeers , peer )
2021-09-16 09:33:51 -05:00
}
}
2021-09-28 05:00:16 -05:00
2021-09-23 12:52:20 -05:00
// TODO load from ini file
2021-09-28 05:00:16 -05:00
uaCfg . DefaultConfiguration = alertmanagerDefaultConfiguration
alerting := iniFile . Section ( "alerting" )
uaExecuteAlerts := ua . Key ( "execute_alerts" ) . MustBool ( schedulereDefaultExecuteAlerts )
if uaExecuteAlerts { // unified option equals the default (true)
legacyExecuteAlerts := alerting . Key ( "execute_alerts" ) . MustBool ( schedulereDefaultExecuteAlerts )
if ! legacyExecuteAlerts {
cfg . Logger . Warn ( "falling back to legacy setting of 'execute_alerts'; please use the configuration option in the `unified_alerting` section if Grafana 8 alerts are enabled." )
}
uaExecuteAlerts = legacyExecuteAlerts
}
uaCfg . ExecuteAlerts = uaExecuteAlerts
// if the unified alerting options equal the defaults, apply the respective legacy one
uaEvaluationTimeout , err := gtime . ParseDuration ( valueAsString ( ua , "evaluation_timeout" , evaluatorDefaultEvaluationTimeout . String ( ) ) )
if err != nil || uaEvaluationTimeout == evaluatorDefaultEvaluationTimeout { // unified option is invalid duration or equals the default
legaceEvaluationTimeout := time . Duration ( alerting . Key ( "evaluation_timeout_seconds" ) . MustInt64 ( int64 ( evaluatorDefaultEvaluationTimeout . Seconds ( ) ) ) ) * time . Second
if legaceEvaluationTimeout != evaluatorDefaultEvaluationTimeout {
cfg . Logger . Warn ( "falling back to legacy setting of 'evaluation_timeout_seconds'; please use the configuration option in the `unified_alerting` section if Grafana 8 alerts are enabled." )
}
uaEvaluationTimeout = legaceEvaluationTimeout
}
uaCfg . EvaluationTimeout = uaEvaluationTimeout
uaMaxAttempts := ua . Key ( "max_attempts" ) . MustInt64 ( schedulerDefaultMaxAttempts )
if uaMaxAttempts == schedulerDefaultMaxAttempts { // unified option or equals the default
legacyMaxAttempts := alerting . Key ( "max_attempts" ) . MustInt64 ( schedulerDefaultMaxAttempts )
if legacyMaxAttempts != schedulerDefaultMaxAttempts {
cfg . Logger . Warn ( "falling back to legacy setting of 'max_attempts'; please use the configuration option in the `unified_alerting` section if Grafana 8 alerts are enabled." )
}
uaMaxAttempts = legacyMaxAttempts
}
uaCfg . MaxAttempts = uaMaxAttempts
2022-02-11 15:13:49 -06:00
uaCfg . BaseInterval = SchedulerBaseInterval
uaMinInterval , err := gtime . ParseDuration ( valueAsString ( ua , "min_interval" , uaCfg . BaseInterval . String ( ) ) )
if err != nil || uaMinInterval == uaCfg . BaseInterval { // unified option is invalid duration or equals the default
2021-09-28 05:00:16 -05:00
// if the legacy option is invalid, fallback to 10 (unified alerting min interval default)
2022-02-11 15:13:49 -06:00
legacyMinInterval := time . Duration ( alerting . Key ( "min_interval_seconds" ) . MustInt64 ( int64 ( uaCfg . BaseInterval . Seconds ( ) ) ) ) * time . Second
if legacyMinInterval > uaCfg . BaseInterval {
2021-09-28 05:00:16 -05:00
cfg . Logger . Warn ( "falling back to legacy setting of 'min_interval_seconds'; please use the configuration option in the `unified_alerting` section if Grafana 8 alerts are enabled." )
2022-02-11 15:13:49 -06:00
uaMinInterval = legacyMinInterval
} else {
// if legacy interval is smaller than the base interval, adjust it to the base interval
uaMinInterval = uaCfg . BaseInterval
2021-09-28 05:00:16 -05:00
}
2022-02-11 15:13:49 -06:00
}
if uaMinInterval < uaCfg . BaseInterval {
return fmt . Errorf ( "value of setting 'min_interval' should be greater than the base interval (%v)" , uaCfg . BaseInterval )
}
if uaMinInterval % uaCfg . BaseInterval != 0 {
return fmt . Errorf ( "value of setting 'min_interval' should be times of base interval (%v)" , uaCfg . BaseInterval )
2021-09-28 05:00:16 -05:00
}
uaCfg . MinInterval = uaMinInterval
2022-02-18 09:05:06 -06:00
uaCfg . DefaultRuleEvaluationInterval = DefaultRuleEvaluationInterval
if uaMinInterval > uaCfg . DefaultRuleEvaluationInterval {
uaCfg . DefaultRuleEvaluationInterval = uaMinInterval
2022-02-11 15:13:49 -06:00
}
2022-05-22 09:33:49 -05:00
screenshots := iniFile . Section ( "unified_alerting.screenshots" )
uaCfgScreenshots := uaCfg . Screenshots
2022-06-07 22:04:51 -05:00
uaCfgScreenshots . Capture = screenshots . Key ( "capture" ) . MustBool ( screenshotsDefaultCapture )
2023-01-05 10:07:46 -06:00
captureTimeout := screenshots . Key ( "capture_timeout" ) . MustDuration ( screenshotsDefaultCaptureTimeout )
if captureTimeout > screenshotsMaxCaptureTimeout {
return fmt . Errorf ( "value of setting 'capture_timeout' cannot exceed %s" , screenshotsMaxCaptureTimeout )
}
uaCfgScreenshots . CaptureTimeout = captureTimeout
2022-05-22 09:33:49 -05:00
uaCfgScreenshots . MaxConcurrentScreenshots = screenshots . Key ( "max_concurrent_screenshots" ) . MustInt64 ( screenshotsDefaultMaxConcurrent )
uaCfgScreenshots . UploadExternalImageStorage = screenshots . Key ( "upload_external_image_storage" ) . MustBool ( screenshotsDefaultUploadImageStorage )
uaCfg . Screenshots = uaCfgScreenshots
2022-07-11 11:41:40 -05:00
reservedLabels := iniFile . Section ( "unified_alerting.reserved_labels" )
uaCfgReservedLabels := UnifiedAlertingReservedLabelSettings {
DisabledLabels : make ( map [ string ] struct { } ) ,
}
for _ , label := range util . SplitString ( reservedLabels . Key ( "disabled_labels" ) . MustString ( "" ) ) {
uaCfgReservedLabels . DisabledLabels [ label ] = struct { } { }
}
uaCfg . ReservedLabels = uaCfgReservedLabels
2023-01-05 12:21:07 -06:00
stateHistory := iniFile . Section ( "unified_alerting.state_history" )
2023-01-30 14:24:45 -06:00
stateHistoryLabels := iniFile . Section ( "unified_alerting.state_history.external_labels" )
2023-01-05 12:21:07 -06:00
uaCfgStateHistory := UnifiedAlertingStateHistorySettings {
2023-01-18 13:24:40 -06:00
Enabled : stateHistory . Key ( "enabled" ) . MustBool ( stateHistoryDefaultEnabled ) ,
Backend : stateHistory . Key ( "backend" ) . MustString ( "annotations" ) ,
LokiRemoteURL : stateHistory . Key ( "loki_remote_url" ) . MustString ( "" ) ,
2023-01-30 16:30:05 -06:00
LokiReadURL : stateHistory . Key ( "loki_remote_read_url" ) . MustString ( "" ) ,
LokiWriteURL : stateHistory . Key ( "loki_remote_write_url" ) . MustString ( "" ) ,
2023-01-18 13:24:40 -06:00
LokiTenantID : stateHistory . Key ( "loki_tenant_id" ) . MustString ( "" ) ,
LokiBasicAuthUsername : stateHistory . Key ( "loki_basic_auth_username" ) . MustString ( "" ) ,
LokiBasicAuthPassword : stateHistory . Key ( "loki_basic_auth_password" ) . MustString ( "" ) ,
2023-01-30 14:24:45 -06:00
ExternalLabels : stateHistoryLabels . KeysHash ( ) ,
2023-01-05 12:21:07 -06:00
}
uaCfg . StateHistory = uaCfgStateHistory
2021-09-20 02:12:21 -05:00
cfg . UnifiedAlerting = uaCfg
2021-09-16 09:33:51 -05:00
return nil
}
2021-09-28 05:00:16 -05:00
func GetAlertmanagerDefaultConfiguration ( ) string {
return alertmanagerDefaultConfiguration
}