Alerting: Decrypt secrets before sending configuration to the remote Alertmanager (#83640)

* (WIP) Alerting: Decrypt secrets before sending configuration to the remote Alertmanager

* refactor, fix tests

* test decrypting secrets

* tidy up

* test SendConfiguration, quote keys, refactor tests

* make linter happy

* decrypt configuration before comparing

* copy configuration struct before decrypting

* reduce diff in TestCompareAndSendConfiguration

* clean up remote/alertmanager.go

* make linter happy

* avoid serializing into JSON to copy struct

* codeowners
This commit is contained in:
Santiago 2024-03-19 12:12:03 +01:00 committed by GitHub
parent b5d74ac200
commit c9bb18101c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 233 additions and 34 deletions

2
go.mod
View File

@ -341,7 +341,7 @@ require (
github.com/mitchellh/copystructure v1.2.0 // indirect
github.com/mitchellh/mapstructure v1.5.0 //@grafana/grafana-authnz-team
github.com/mitchellh/reflectwalk v1.0.2 // indirect
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // @grafana/alerting-squad-backend
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.0.3-0.20220512140940-7b36cea86235 // indirect

View File

@ -401,6 +401,8 @@ github.com/grafana/e2e v0.1.1-0.20221018202458-cffd2bb71c7b h1:Ha+kSIoTutf4ytlVw
github.com/grafana/e2e v0.1.1-0.20221018202458-cffd2bb71c7b/go.mod h1:3UsooRp7yW5/NJQBlXcTsAHOoykEhNUYXkQ3r6ehEEY=
github.com/grafana/gomemcache v0.0.0-20231023152154-6947259a0586 h1:/of8Z8taCPftShATouOrBVy6GaTTjgQd/VfNiZp/VXQ=
github.com/grafana/gomemcache v0.0.0-20231023152154-6947259a0586/go.mod h1:PGk3RjYHpxMM8HFPhKKo+vve3DdlPUELZLSDEFehPuU=
github.com/grafana/grafana-aws-sdk v0.25.0 h1:XNi3iA/C/KPArmVbQfbwKQROaIotd38nCRjNE6P1UP0=
github.com/grafana/grafana-aws-sdk v0.25.0/go.mod h1:3zghFF6edrxn0d6k6X9HpGZXDH+VfA+MwD2Pc/9X0ec=
github.com/grafana/grafana-plugin-sdk-go v0.212.0/go.mod h1:qsI4ktDf0lig74u8SLPJf9zRdVxWV/W4Wi+Ox6gifgs=
github.com/grafana/grafana/pkg/promlib v0.0.2/go.mod h1:3El4NlsfALz8QQCbEGHGFvJUG+538QLMuALRhZ3pcoo=
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7 h1:pdN6V1QBWetyv/0+wjACpqVH+eVULgEjkurDLq3goeM=

View File

@ -2,12 +2,14 @@ package definitions
import (
"context"
"encoding/base64"
"encoding/json"
"fmt"
"time"
"github.com/go-openapi/strfmt"
"github.com/grafana/alerting/definition"
"github.com/mohae/deepcopy"
amv2 "github.com/prometheus/alertmanager/api/v2/models"
"github.com/prometheus/alertmanager/config"
"github.com/prometheus/common/model"
@ -615,6 +617,34 @@ func (c *PostableUserConfig) validate() error {
return nil
}
// Decrypt returns a copy of the configuration struct with decrypted secure settings in receivers.
func (c *PostableUserConfig) Decrypt(decryptFn func(payload []byte) ([]byte, error)) (PostableUserConfig, error) {
newCfg, ok := deepcopy.Copy(c).(*PostableUserConfig)
if !ok {
return PostableUserConfig{}, fmt.Errorf("failed to copy config")
}
// Iterate through receivers and decrypt secure settings.
for _, rcv := range newCfg.AlertmanagerConfig.Receivers {
for _, gmr := range rcv.PostableGrafanaReceivers.GrafanaManagedReceivers {
for k, v := range gmr.SecureSettings {
decoded, err := base64.StdEncoding.DecodeString(v)
if err != nil {
return PostableUserConfig{}, fmt.Errorf("failed to decode value for key '%s': %w", k, err)
}
decrypted, err := decryptFn(decoded)
if err != nil {
return PostableUserConfig{}, fmt.Errorf("failed to decrypt value for key '%s': %w", k, err)
}
gmr.SecureSettings[k] = string(decrypted)
}
}
}
return *newCfg, nil
}
// GetGrafanaReceiverMap returns a map that associates UUIDs to grafana receivers
func (c *PostableUserConfig) GetGrafanaReceiverMap() map[string]*PostableGrafanaReceiver {
UIDs := make(map[string]*PostableGrafanaReceiver)

View File

@ -190,7 +190,7 @@ func (ng *AlertNG) init() error {
}
// Create remote Alertmanager.
remoteAM, err := createRemoteAlertmanager(orgID, ng.Cfg.UnifiedAlerting.RemoteAlertmanager, ng.KVStore, m)
remoteAM, err := createRemoteAlertmanager(orgID, ng.Cfg.UnifiedAlerting.RemoteAlertmanager, ng.KVStore, ng.SecretsService.Decrypt, m)
if err != nil {
moaLogger.Error("Failed to create remote Alertmanager, falling back to using only the internal one", "err", err)
return internalAM, nil
@ -535,7 +535,7 @@ func ApplyStateHistoryFeatureToggles(cfg *setting.UnifiedAlertingStateHistorySet
}
}
func createRemoteAlertmanager(orgID int64, amCfg setting.RemoteAlertmanagerSettings, kvstore kvstore.KVStore, m *metrics.RemoteAlertmanager) (*remote.Alertmanager, error) {
func createRemoteAlertmanager(orgID int64, amCfg setting.RemoteAlertmanagerSettings, kvstore kvstore.KVStore, decryptFn remote.DecryptFn, m *metrics.RemoteAlertmanager) (*remote.Alertmanager, error) {
externalAMCfg := remote.AlertmanagerConfig{
OrgID: orgID,
URL: amCfg.URL,
@ -544,5 +544,5 @@ func createRemoteAlertmanager(orgID int64, amCfg setting.RemoteAlertmanagerSetti
}
// We won't be handling files on disk, we can pass an empty string as workingDirPath.
stateStore := notifier.NewFileStore(orgID, kvstore, "")
return remote.NewAlertmanager(externalAMCfg, stateStore, m)
return remote.NewAlertmanager(externalAMCfg, stateStore, decryptFn, m)
}

View File

@ -49,6 +49,7 @@ func TestMultiorgAlertmanager_RemoteSecondaryMode(t *testing.T) {
})
// Create the factory function for the MOA using the forked Alertmanager in remote secondary mode.
secretsService := secretsManager.SetupTestService(t, fakes.NewFakeSecretsStore())
override := notifier.WithAlertmanagerOverride(func(factoryFn notifier.OrgAlertmanagerFactory) notifier.OrgAlertmanagerFactory {
return func(ctx context.Context, orgID int64) (notifier.Alertmanager, error) {
// Create internal Alertmanager.
@ -65,7 +66,7 @@ func TestMultiorgAlertmanager_RemoteSecondaryMode(t *testing.T) {
// We won't be handling files on disk, we can pass an empty string as workingDirPath.
stateStore := notifier.NewFileStore(orgID, kvStore, "")
m := metrics.NewRemoteAlertmanagerMetrics(prometheus.NewRegistry())
remoteAM, err := remote.NewAlertmanager(externalAMCfg, stateStore, m)
remoteAM, err := remote.NewAlertmanager(externalAMCfg, stateStore, secretsService.Decrypt, m)
require.NoError(t, err)
// Use both Alertmanager implementations in the forked Alertmanager.
@ -87,7 +88,6 @@ func TestMultiorgAlertmanager_RemoteSecondaryMode(t *testing.T) {
DefaultConfiguration: setting.GetAlertmanagerDefaultConfiguration(),
}, // do not poll in tests.
}
secretsService := secretsManager.SetupTestService(t, fakes.NewFakeSecretsStore())
moa, err := notifier.NewMultiOrgAlertmanager(
cfg,
configStore,

View File

@ -3,6 +3,7 @@ package remote
import (
"context"
"crypto/md5"
"encoding/json"
"fmt"
"net/http"
"net/url"
@ -25,7 +26,11 @@ type stateStore interface {
GetFullState(ctx context.Context, keys ...string) (string, error)
}
// DecryptFn is a function that takes in an encrypted value and returns it decrypted.
type DecryptFn func(ctx context.Context, payload []byte) ([]byte, error)
type Alertmanager struct {
decrypt DecryptFn
log log.Logger
metrics *metrics.RemoteAlertmanager
orgID int64
@ -61,7 +66,7 @@ func (cfg *AlertmanagerConfig) Validate() error {
return nil
}
func NewAlertmanager(cfg AlertmanagerConfig, store stateStore, metrics *metrics.RemoteAlertmanager) (*Alertmanager, error) {
func NewAlertmanager(cfg AlertmanagerConfig, store stateStore, decryptFn DecryptFn, metrics *metrics.RemoteAlertmanager) (*Alertmanager, error) {
if err := cfg.Validate(); err != nil {
return nil, err
}
@ -111,6 +116,7 @@ func NewAlertmanager(cfg AlertmanagerConfig, store stateStore, metrics *metrics.
return &Alertmanager{
amClient: amc,
decrypt: decryptFn,
log: logger,
metrics: metrics,
mimirClient: mc,
@ -176,21 +182,42 @@ func (am *Alertmanager) checkReadiness(ctx context.Context) error {
// CompareAndSendConfiguration checks whether a given configuration is being used by the remote Alertmanager.
// If not, it sends the configuration to the remote Alertmanager.
func (am *Alertmanager) CompareAndSendConfiguration(ctx context.Context, config *models.AlertConfiguration) error {
if am.shouldSendConfig(ctx, config) {
am.metrics.ConfigSyncsTotal.Inc()
if err := am.mimirClient.CreateGrafanaAlertmanagerConfig(
ctx,
config.AlertmanagerConfiguration,
config.ConfigurationHash,
config.ID,
config.CreatedAt,
config.Default,
); err != nil {
am.metrics.ConfigSyncErrorsTotal.Inc()
return err
}
am.metrics.LastConfigSync.SetToCurrentTime()
c, err := notifier.Load([]byte(config.AlertmanagerConfiguration))
if err != nil {
return err
}
// Decrypt the configuration before comparing.
fn := func(payload []byte) ([]byte, error) {
return am.decrypt(ctx, payload)
}
decrypted, err := c.Decrypt(fn)
if err != nil {
return err
}
rawDecrypted, err := json.Marshal(decrypted)
if err != nil {
return err
}
// Send the configuration only if we need to.
if !am.shouldSendConfig(ctx, rawDecrypted) {
return nil
}
am.metrics.ConfigSyncsTotal.Inc()
if err := am.mimirClient.CreateGrafanaAlertmanagerConfig(
ctx,
string(rawDecrypted),
config.ConfigurationHash,
config.ID,
config.CreatedAt,
config.Default,
); err != nil {
am.metrics.ConfigSyncErrorsTotal.Inc()
return err
}
am.metrics.LastConfigSync.SetToCurrentTime()
return nil
}
@ -375,7 +402,7 @@ func (am *Alertmanager) CleanUp() {}
// shouldSendConfig compares the remote Alertmanager configuration with our local one.
// It returns true if the configurations are different.
func (am *Alertmanager) shouldSendConfig(ctx context.Context, config *models.AlertConfiguration) bool {
func (am *Alertmanager) shouldSendConfig(ctx context.Context, rawConfig []byte) bool {
rc, err := am.mimirClient.GetGrafanaAlertmanagerConfig(ctx)
if err != nil {
// Log the error and return true so we try to upload our config anyway.
@ -383,7 +410,7 @@ func (am *Alertmanager) shouldSendConfig(ctx context.Context, config *models.Ale
return true
}
return md5.Sum([]byte(rc.GrafanaAlertmanagerConfig)) != md5.Sum([]byte(config.AlertmanagerConfiguration))
return md5.Sum([]byte(rc.GrafanaAlertmanagerConfig)) != md5.Sum(rawConfig)
}
// shouldSendState compares the remote Alertmanager state with our local one.

View File

@ -4,20 +4,31 @@ import (
"context"
"crypto/md5"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"io"
"math/rand"
"net/http"
"net/http/httptest"
"os"
"strings"
"testing"
"time"
"github.com/go-openapi/strfmt"
"github.com/grafana/grafana/pkg/infra/db"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/notifier"
"github.com/grafana/grafana/pkg/services/ngalert/tests/fakes"
"github.com/grafana/grafana/pkg/services/ngalert/remote/client"
ngfakes "github.com/grafana/grafana/pkg/services/ngalert/tests/fakes"
"github.com/grafana/grafana/pkg/services/secrets"
"github.com/grafana/grafana/pkg/services/secrets/database"
"github.com/grafana/grafana/pkg/services/secrets/fakes"
secretsManager "github.com/grafana/grafana/pkg/services/secrets/manager"
"github.com/grafana/grafana/pkg/tests/testsuite"
"github.com/grafana/grafana/pkg/util"
amv2 "github.com/prometheus/alertmanager/api/v2/models"
"github.com/prometheus/alertmanager/cluster/clusterpb"
@ -25,8 +36,13 @@ import (
"github.com/stretchr/testify/require"
)
// Valid Grafana Alertmanager configuration.
// Valid Grafana Alertmanager configurations.
const testGrafanaConfig = `{"template_files":{},"alertmanager_config":{"route":{"receiver":"grafana-default-email","group_by":["grafana_folder","alertname"]},"templates":null,"receivers":[{"name":"grafana-default-email","grafana_managed_receiver_configs":[{"uid":"","name":"some other name","type":"email","disableResolveMessage":false,"settings":{"addresses":"\u003cexample@email.com\u003e"},"secureSettings":null}]}]}}`
const testGrafanaConfigWithSecret = `{"template_files":{},"alertmanager_config":{"route":{"receiver":"grafana-default-email","group_by":["grafana_folder","alertname"]},"templates":null,"receivers":[{"name":"grafana-default-email","grafana_managed_receiver_configs":[{"uid":"dde6ntuob69dtf","name":"WH","type":"webhook","disableResolveMessage":false,"settings":{"url":"http://localhost:8080","username":"test"},"secureSettings":{"password":"test"}}]}]}}`
func TestMain(m *testing.M) {
testsuite.Run(m)
}
func TestNewAlertmanager(t *testing.T) {
tests := []struct {
@ -62,6 +78,7 @@ func TestNewAlertmanager(t *testing.T) {
},
}
secretsService := secretsManager.SetupTestService(t, fakes.NewFakeSecretsStore())
for _, test := range tests {
t.Run(test.name, func(tt *testing.T) {
cfg := AlertmanagerConfig{
@ -71,7 +88,7 @@ func TestNewAlertmanager(t *testing.T) {
BasicAuthPassword: test.password,
}
m := metrics.NewRemoteAlertmanagerMetrics(prometheus.NewRegistry())
am, err := NewAlertmanager(cfg, nil, m)
am, err := NewAlertmanager(cfg, nil, secretsService.Decrypt, m)
if test.expErr != "" {
require.EqualError(tt, err, test.expErr)
return
@ -90,10 +107,32 @@ func TestApplyConfig(t *testing.T) {
errorHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
})
var configSent string
okHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodPost && strings.Contains(r.URL.Path, "/config") {
var c client.UserGrafanaConfig
require.NoError(t, json.NewDecoder(r.Body).Decode(&c))
configSent = c.GrafanaAlertmanagerConfig
}
w.WriteHeader(http.StatusOK)
})
// Encrypt receivers to save secrets in the database.
var c apimodels.PostableUserConfig
require.NoError(t, json.Unmarshal([]byte(testGrafanaConfigWithSecret), &c))
secretsService := secretsManager.SetupTestService(t, database.ProvideSecretsStore(db.InitTestDB(t)))
err := notifier.EncryptReceiverConfigs(c.AlertmanagerConfig.Receivers, func(ctx context.Context, payload []byte) ([]byte, error) {
return secretsService.Encrypt(ctx, payload, secrets.WithoutScope())
})
require.NoError(t, err)
// The encrypted configuration should be different than the one we will send.
encryptedConfig, err := json.Marshal(c)
require.NoError(t, err)
require.NotEqual(t, testGrafanaConfig, encryptedConfig)
// ApplyConfig performs a readiness check at startup.
// A non-200 response should result in an error.
server := httptest.NewServer(errorHandler)
@ -104,16 +143,19 @@ func TestApplyConfig(t *testing.T) {
}
ctx := context.Background()
store := fakes.NewFakeKVStore(t)
store := ngfakes.NewFakeKVStore(t)
fstore := notifier.NewFileStore(1, store, "")
require.NoError(t, store.Set(ctx, cfg.OrgID, "alertmanager", notifier.SilencesFilename, "test"))
require.NoError(t, store.Set(ctx, cfg.OrgID, "alertmanager", notifier.NotificationLogFilename, "test"))
// An error response from the remote Alertmanager should result in the readiness check failing.
m := metrics.NewRemoteAlertmanagerMetrics(prometheus.NewRegistry())
am, err := NewAlertmanager(cfg, fstore, m)
am, err := NewAlertmanager(cfg, fstore, secretsService.Decrypt, m)
require.NoError(t, err)
config := &ngmodels.AlertConfiguration{}
config := &ngmodels.AlertConfiguration{
AlertmanagerConfiguration: string(encryptedConfig),
}
require.Error(t, am.ApplyConfig(ctx, config))
require.False(t, am.Ready())
@ -122,12 +164,104 @@ func TestApplyConfig(t *testing.T) {
require.NoError(t, am.ApplyConfig(ctx, config))
require.True(t, am.Ready())
// Secrets in the sent configuration should be unencrypted.
require.JSONEq(t, testGrafanaConfigWithSecret, configSent)
// If we already got a 200 status code response, we shouldn't make the HTTP request again.
server.Config.Handler = errorHandler
require.NoError(t, am.ApplyConfig(ctx, config))
require.True(t, am.Ready())
}
func TestCompareAndSendConfiguration(t *testing.T) {
testValue := []byte("test")
testErr := errors.New("test error")
decryptFn := func(_ context.Context, payload []byte) ([]byte, error) {
if string(payload) == string(testValue) {
return testValue, nil
}
return nil, testErr
}
var got string
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Add("content-type", "application/json")
b, err := io.ReadAll(r.Body)
require.NoError(t, err)
require.NoError(t, r.Body.Close())
got = string(b)
_, err = w.Write([]byte(`{"status": "success"}`))
require.NoError(t, err)
}))
fstore := notifier.NewFileStore(1, ngfakes.NewFakeKVStore(t), "")
m := metrics.NewRemoteAlertmanagerMetrics(prometheus.NewRegistry())
cfg := AlertmanagerConfig{
OrgID: 1,
TenantID: "test",
URL: server.URL,
}
am, err := NewAlertmanager(cfg,
fstore,
decryptFn,
m,
)
require.NoError(t, err)
tests := []struct {
name string
config string
expCfg *client.UserGrafanaConfig
expErr string
}{
{
"invalid config",
"{}",
nil,
"unable to parse Alertmanager configuration: no route provided in config",
},
{
"invalid base-64 in key",
strings.Replace(testGrafanaConfigWithSecret, `"password":"test"`, `"password":"!"`, 1),
nil,
"failed to decode value for key 'password': illegal base64 data at input byte 0",
},
{
"decrypt error",
testGrafanaConfigWithSecret,
nil,
fmt.Sprintf("failed to decrypt value for key 'password': %s", testErr.Error()),
},
{
"no error",
strings.Replace(testGrafanaConfigWithSecret, `"password":"test"`, fmt.Sprintf("%q:%q", "password", base64.StdEncoding.EncodeToString(testValue)), 1),
&client.UserGrafanaConfig{
GrafanaAlertmanagerConfig: testGrafanaConfigWithSecret,
},
"",
},
}
for _, test := range tests {
t.Run(test.name, func(tt *testing.T) {
cfg := ngmodels.AlertConfiguration{
AlertmanagerConfiguration: test.config,
}
err = am.CompareAndSendConfiguration(context.Background(), &cfg)
if test.expErr == "" {
require.NoError(tt, err)
rawCfg, err := json.Marshal(test.expCfg)
require.NoError(tt, err)
require.JSONEq(tt, string(rawCfg), got)
return
}
require.Equal(tt, test.expErr, err.Error())
})
}
}
func TestIntegrationRemoteAlertmanagerApplyConfigOnlyUploadsOnce(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test")
@ -162,7 +296,7 @@ func TestIntegrationRemoteAlertmanagerApplyConfigOnlyUploadsOnce(t *testing.T) {
silences := []byte("test-silences")
nflog := []byte("test-notifications")
store := fakes.NewFakeKVStore(t)
store := ngfakes.NewFakeKVStore(t)
fstore := notifier.NewFileStore(cfg.OrgID, store, "")
ctx := context.Background()
@ -179,8 +313,9 @@ func TestIntegrationRemoteAlertmanagerApplyConfigOnlyUploadsOnce(t *testing.T) {
require.NoError(t, err)
encodedFullState := base64.StdEncoding.EncodeToString(fullState)
secretsService := secretsManager.SetupTestService(t, fakes.NewFakeSecretsStore())
m := metrics.NewRemoteAlertmanagerMetrics(prometheus.NewRegistry())
am, err := NewAlertmanager(cfg, fstore, m)
am, err := NewAlertmanager(cfg, fstore, secretsService.Decrypt, m)
require.NoError(t, err)
// We should have no configuration or state at first.
@ -264,8 +399,10 @@ func TestIntegrationRemoteAlertmanagerSilences(t *testing.T) {
TenantID: tenantID,
BasicAuthPassword: password,
}
secretsService := secretsManager.SetupTestService(t, fakes.NewFakeSecretsStore())
m := metrics.NewRemoteAlertmanagerMetrics(prometheus.NewRegistry())
am, err := NewAlertmanager(cfg, nil, m)
am, err := NewAlertmanager(cfg, nil, secretsService.Decrypt, m)
require.NoError(t, err)
// We should have no silences at first.
@ -345,8 +482,10 @@ func TestIntegrationRemoteAlertmanagerAlerts(t *testing.T) {
TenantID: tenantID,
BasicAuthPassword: password,
}
secretsService := secretsManager.SetupTestService(t, fakes.NewFakeSecretsStore())
m := metrics.NewRemoteAlertmanagerMetrics(prometheus.NewRegistry())
am, err := NewAlertmanager(cfg, nil, m)
am, err := NewAlertmanager(cfg, nil, secretsService.Decrypt, m)
require.NoError(t, err)
// Wait until the Alertmanager is ready to send alerts.
@ -412,8 +551,9 @@ func TestIntegrationRemoteAlertmanagerReceivers(t *testing.T) {
BasicAuthPassword: password,
}
secretsService := secretsManager.SetupTestService(t, fakes.NewFakeSecretsStore())
m := metrics.NewRemoteAlertmanagerMetrics(prometheus.NewRegistry())
am, err := NewAlertmanager(cfg, nil, m)
am, err := NewAlertmanager(cfg, nil, secretsService.Decrypt, m)
require.NoError(t, err)
// We should start with the default config.