From 80f54778f3ee3e61744a8a0790309c6fd3a0bb31 Mon Sep 17 00:00:00 2001 From: Fayzal Ghantiwala <114010985+fayzal-g@users.noreply.github.com> Date: Wed, 5 Jun 2024 17:02:25 +0100 Subject: [PATCH] Alerting: Add option to use Redis in cluster mode for Alerting HA (#88696) * Add config option to use Redis in cluster mode * Use UniversalOptions --- conf/defaults.ini | 7 ++- conf/sample.ini | 7 ++- pkg/services/ngalert/notifier/redis_peer.go | 50 ++++++++++++------- .../ngalert/notifier/redis_peer_test.go | 29 +++++++++++ pkg/setting/setting_unified_alerting.go | 2 + 5 files changed, 74 insertions(+), 21 deletions(-) diff --git a/conf/defaults.ini b/conf/defaults.ini index 639c21b97e5..2799cfef92f 100644 --- a/conf/defaults.ini +++ b/conf/defaults.ini @@ -1180,7 +1180,12 @@ admin_config_poll_interval = 60s # The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m. alertmanager_config_poll_interval = 60s -# The redis server address that should be connected to. +# Set to true when using redis in cluster mode. +ha_redis_cluster_mode_enabled = false + +# The redis server address(es) that should be connected to. +# Can either be a single address, or if using redis in cluster mode, +# the cluster configuration address or a comma-separated list of addresses. ha_redis_address = # The username that should be used to authenticate with the redis server. diff --git a/conf/sample.ini b/conf/sample.ini index 86127d55eed..2e2d3f4d764 100644 --- a/conf/sample.ini +++ b/conf/sample.ini @@ -1106,7 +1106,12 @@ # The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m. ;alertmanager_config_poll_interval = 60s -# The redis server address that should be connected to. +# Set to true when using redis in cluster mode. +;ha_redis_cluster_mode_enabled = false + +# The redis server address(es) that should be connected to. +# Can either be a single address, or if using redis in cluster mode, +# the cluster configuration address or a comma-separated list of addresses. ;ha_redis_address = # The username that should be used to authenticate with the redis server. diff --git a/pkg/services/ngalert/notifier/redis_peer.go b/pkg/services/ngalert/notifier/redis_peer.go index b1ce249fe96..5a5ad3f8cfc 100644 --- a/pkg/services/ngalert/notifier/redis_peer.go +++ b/pkg/services/ngalert/notifier/redis_peer.go @@ -2,9 +2,11 @@ package notifier import ( "context" + "crypto/tls" "slices" "sort" "strconv" + "strings" "sync" "time" @@ -21,13 +23,14 @@ import ( ) type redisConfig struct { - addr string - username string - password string - db int - name string - prefix string - maxConns int + addr string + username string + password string + db int + name string + prefix string + maxConns int + clusterMode bool tlsEnabled bool tls dstls.ClientConfig @@ -55,7 +58,7 @@ const ( type redisPeer struct { name string - redis *redis.Client + redis redis.UniversalClient prefix string logger log.Logger states map[string]alertingCluster.State @@ -95,25 +98,34 @@ func newRedisPeer(cfg redisConfig, logger log.Logger, reg prometheus.Registerer, poolSize = cfg.maxConns } - opts := &redis.Options{ - Addr: cfg.addr, - Username: cfg.username, - Password: cfg.password, - DB: cfg.db, - PoolSize: poolSize, - } + addrs := strings.Split(cfg.addr, ",") + var tlsClientConfig *tls.Config + var err error if cfg.tlsEnabled { - tlsClientConfig, err := cfg.tls.GetTLSConfig() + tlsClientConfig, err = cfg.tls.GetTLSConfig() if err != nil { logger.Error("Failed to get TLS config", "err", err) return nil, err - } else { - opts.TLSConfig = tlsClientConfig } } - rdb := redis.NewClient(opts) + opts := &redis.UniversalOptions{ + Addrs: addrs, + Username: cfg.username, + Password: cfg.password, + DB: cfg.db, + PoolSize: poolSize, + TLSConfig: tlsClientConfig, + } + + var rdb redis.UniversalClient + if cfg.clusterMode { + rdb = redis.NewClusterClient(opts.Cluster()) + } else { + rdb = redis.NewClient(opts.Simple()) + } + cmd := rdb.Ping(context.Background()) if cmd.Err() != nil { logger.Error("Failed to ping redis - redis-based alertmanager clustering may not be available", "err", cmd.Err()) diff --git a/pkg/services/ngalert/notifier/redis_peer_test.go b/pkg/services/ngalert/notifier/redis_peer_test.go index 121c720a21d..9b88198cca2 100644 --- a/pkg/services/ngalert/notifier/redis_peer_test.go +++ b/pkg/services/ngalert/notifier/redis_peer_test.go @@ -16,6 +16,35 @@ import ( "github.com/stretchr/testify/require" ) +func TestNewRedisPeerClusterMode(t *testing.T) { + // Write client and server certificates/keys to tempDir, both issued by the same CA + certPaths := createX509TestDir(t) + + // Set up tls.Config and start miniredis with server-side TLS + x509Cert, err := tls.LoadX509KeyPair(certPaths.serverCert, certPaths.serverKey) + require.NoError(t, err) + + mr, err := miniredis.RunTLS(&tls.Config{ + Certificates: []tls.Certificate{x509Cert}, + ClientAuth: tls.NoClientCert, + }) + require.NoError(t, err) + defer mr.Close() + + redisPeer, err := newRedisPeer(redisConfig{ + clusterMode: true, + addr: mr.Addr(), + tlsEnabled: true, + tls: dstls.ClientConfig{ + CAPath: certPaths.ca, + ServerName: "localhost", + }}, log.NewNopLogger(), prometheus.NewRegistry(), time.Second*60) + require.NoError(t, err) + + ping := redisPeer.redis.Ping(context.Background()) + require.NoError(t, ping.Err()) +} + func TestNewRedisPeerWithTLS(t *testing.T) { // Write client and server certificates/keys to tempDir, both issued by the same CA certPaths := createX509TestDir(t) diff --git a/pkg/setting/setting_unified_alerting.go b/pkg/setting/setting_unified_alerting.go index bfdc46dc4dc..d6f674979e3 100644 --- a/pkg/setting/setting_unified_alerting.go +++ b/pkg/setting/setting_unified_alerting.go @@ -73,6 +73,7 @@ type UnifiedAlertingSettings struct { HAGossipInterval time.Duration HAPushPullInterval time.Duration HALabel string + HARedisClusterModeEnabled bool HARedisAddr string HARedisPeerName string HARedisPrefix string @@ -222,6 +223,7 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error { uaCfg.HAListenAddr = ua.Key("ha_listen_address").MustString(alertmanagerDefaultClusterAddr) uaCfg.HAAdvertiseAddr = ua.Key("ha_advertise_address").MustString("") uaCfg.HALabel = ua.Key("ha_label").MustString("") + uaCfg.HARedisClusterModeEnabled = ua.Key("ha_redis_cluster_mode_enabled").MustBool(false) uaCfg.HARedisAddr = ua.Key("ha_redis_address").MustString("") uaCfg.HARedisPeerName = ua.Key("ha_redis_peer_name").MustString("") uaCfg.HARedisPrefix = ua.Key("ha_redis_prefix").MustString("")