Alerting: Add option to use Redis in cluster mode for Alerting HA (#88696)

* Add config option to use Redis in cluster mode

* Use UniversalOptions
This commit is contained in:
Fayzal Ghantiwala 2024-06-05 17:02:25 +01:00 committed by GitHub
parent 16cc75b02c
commit 80f54778f3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 74 additions and 21 deletions

View File

@ -1180,7 +1180,12 @@ admin_config_poll_interval = 60s
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
alertmanager_config_poll_interval = 60s
# The redis server address that should be connected to.
# Set to true when using redis in cluster mode.
ha_redis_cluster_mode_enabled = false
# The redis server address(es) that should be connected to.
# Can either be a single address, or if using redis in cluster mode,
# the cluster configuration address or a comma-separated list of addresses.
ha_redis_address =
# The username that should be used to authenticate with the redis server.

View File

@ -1106,7 +1106,12 @@
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
;alertmanager_config_poll_interval = 60s
# The redis server address that should be connected to.
# Set to true when using redis in cluster mode.
;ha_redis_cluster_mode_enabled = false
# The redis server address(es) that should be connected to.
# Can either be a single address, or if using redis in cluster mode,
# the cluster configuration address or a comma-separated list of addresses.
;ha_redis_address =
# The username that should be used to authenticate with the redis server.

View File

@ -2,9 +2,11 @@ package notifier
import (
"context"
"crypto/tls"
"slices"
"sort"
"strconv"
"strings"
"sync"
"time"
@ -21,13 +23,14 @@ import (
)
type redisConfig struct {
addr string
username string
password string
db int
name string
prefix string
maxConns int
addr string
username string
password string
db int
name string
prefix string
maxConns int
clusterMode bool
tlsEnabled bool
tls dstls.ClientConfig
@ -55,7 +58,7 @@ const (
type redisPeer struct {
name string
redis *redis.Client
redis redis.UniversalClient
prefix string
logger log.Logger
states map[string]alertingCluster.State
@ -95,25 +98,34 @@ func newRedisPeer(cfg redisConfig, logger log.Logger, reg prometheus.Registerer,
poolSize = cfg.maxConns
}
opts := &redis.Options{
Addr: cfg.addr,
Username: cfg.username,
Password: cfg.password,
DB: cfg.db,
PoolSize: poolSize,
}
addrs := strings.Split(cfg.addr, ",")
var tlsClientConfig *tls.Config
var err error
if cfg.tlsEnabled {
tlsClientConfig, err := cfg.tls.GetTLSConfig()
tlsClientConfig, err = cfg.tls.GetTLSConfig()
if err != nil {
logger.Error("Failed to get TLS config", "err", err)
return nil, err
} else {
opts.TLSConfig = tlsClientConfig
}
}
rdb := redis.NewClient(opts)
opts := &redis.UniversalOptions{
Addrs: addrs,
Username: cfg.username,
Password: cfg.password,
DB: cfg.db,
PoolSize: poolSize,
TLSConfig: tlsClientConfig,
}
var rdb redis.UniversalClient
if cfg.clusterMode {
rdb = redis.NewClusterClient(opts.Cluster())
} else {
rdb = redis.NewClient(opts.Simple())
}
cmd := rdb.Ping(context.Background())
if cmd.Err() != nil {
logger.Error("Failed to ping redis - redis-based alertmanager clustering may not be available", "err", cmd.Err())

View File

@ -16,6 +16,35 @@ import (
"github.com/stretchr/testify/require"
)
func TestNewRedisPeerClusterMode(t *testing.T) {
// Write client and server certificates/keys to tempDir, both issued by the same CA
certPaths := createX509TestDir(t)
// Set up tls.Config and start miniredis with server-side TLS
x509Cert, err := tls.LoadX509KeyPair(certPaths.serverCert, certPaths.serverKey)
require.NoError(t, err)
mr, err := miniredis.RunTLS(&tls.Config{
Certificates: []tls.Certificate{x509Cert},
ClientAuth: tls.NoClientCert,
})
require.NoError(t, err)
defer mr.Close()
redisPeer, err := newRedisPeer(redisConfig{
clusterMode: true,
addr: mr.Addr(),
tlsEnabled: true,
tls: dstls.ClientConfig{
CAPath: certPaths.ca,
ServerName: "localhost",
}}, log.NewNopLogger(), prometheus.NewRegistry(), time.Second*60)
require.NoError(t, err)
ping := redisPeer.redis.Ping(context.Background())
require.NoError(t, ping.Err())
}
func TestNewRedisPeerWithTLS(t *testing.T) {
// Write client and server certificates/keys to tempDir, both issued by the same CA
certPaths := createX509TestDir(t)

View File

@ -73,6 +73,7 @@ type UnifiedAlertingSettings struct {
HAGossipInterval time.Duration
HAPushPullInterval time.Duration
HALabel string
HARedisClusterModeEnabled bool
HARedisAddr string
HARedisPeerName string
HARedisPrefix string
@ -222,6 +223,7 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
uaCfg.HAListenAddr = ua.Key("ha_listen_address").MustString(alertmanagerDefaultClusterAddr)
uaCfg.HAAdvertiseAddr = ua.Key("ha_advertise_address").MustString("")
uaCfg.HALabel = ua.Key("ha_label").MustString("")
uaCfg.HARedisClusterModeEnabled = ua.Key("ha_redis_cluster_mode_enabled").MustBool(false)
uaCfg.HARedisAddr = ua.Key("ha_redis_address").MustString("")
uaCfg.HARedisPeerName = ua.Key("ha_redis_peer_name").MustString("")
uaCfg.HARedisPrefix = ua.Key("ha_redis_prefix").MustString("")