grafana/pkg/services/secrets/manager/manager.go
Marcus Efraimsson 6768c6c059
Chore: Remove public vars in setting package (#81018)
Removes the public variable setting.SecretKey plus some other ones. 
Introduces some new functions for creating setting.Cfg.
2024-01-23 12:36:22 +01:00

574 lines
16 KiB
Go

package manager
import (
"bytes"
"context"
"crypto/rand"
"encoding/base64"
"errors"
"fmt"
"strconv"
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
"golang.org/x/sync/errgroup"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/infra/usagestats"
"github.com/grafana/grafana/pkg/services/encryption"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/grafana/grafana/pkg/services/kmsproviders"
"github.com/grafana/grafana/pkg/services/secrets"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/util"
)
const (
keyIdDelimiter = '#'
)
var (
// now is used for testing purposes,
// as a way to fake time.Now function.
now = time.Now
)
type SecretsService struct {
store secrets.Store
enc encryption.Internal
cfg *setting.Cfg
features featuremgmt.FeatureToggles
usageStats usagestats.Service
mtx sync.Mutex
dataKeyCache *dataKeyCache
pOnce sync.Once
providers map[secrets.ProviderID]secrets.Provider
kmsProvidersService kmsproviders.Service
currentProviderID secrets.ProviderID
log log.Logger
}
func ProvideSecretsService(
store secrets.Store,
kmsProvidersService kmsproviders.Service,
enc encryption.Internal,
cfg *setting.Cfg,
features featuremgmt.FeatureToggles,
usageStats usagestats.Service,
) (*SecretsService, error) {
ttl := cfg.SectionWithEnvOverrides("security.encryption").Key("data_keys_cache_ttl").MustDuration(15 * time.Minute)
currentProviderID := kmsproviders.NormalizeProviderID(secrets.ProviderID(
cfg.SectionWithEnvOverrides("security").Key("encryption_provider").MustString(kmsproviders.Default),
))
s := &SecretsService{
store: store,
enc: enc,
cfg: cfg,
usageStats: usageStats,
kmsProvidersService: kmsProvidersService,
dataKeyCache: newDataKeyCache(ttl),
currentProviderID: currentProviderID,
features: features,
log: log.New("secrets"),
}
enabled := !features.IsEnabledGlobally(featuremgmt.FlagDisableEnvelopeEncryption)
if enabled {
err := s.InitProviders()
if err != nil {
return nil, err
}
}
if _, ok := s.providers[currentProviderID]; enabled && !ok {
return nil, fmt.Errorf("missing configuration for current encryption provider %s", currentProviderID)
}
if !enabled && currentProviderID != kmsproviders.Default {
s.log.Warn("Changing encryption provider requires enabling envelope encryption feature")
}
s.log.Info("Envelope encryption state", "enabled", enabled, "current provider", currentProviderID)
s.registerUsageMetrics()
return s, nil
}
func (s *SecretsService) InitProviders() (err error) {
s.pOnce.Do(func() {
s.providers, err = s.kmsProvidersService.Provide()
})
return
}
func (s *SecretsService) registerUsageMetrics() {
s.usageStats.RegisterMetricsFunc(func(ctx context.Context) (map[string]any, error) {
usageMetrics := make(map[string]any)
// Enabled / disabled
usageMetrics["stats.encryption.envelope_encryption_enabled.count"] = 0
if !s.features.IsEnabled(ctx, featuremgmt.FlagDisableEnvelopeEncryption) {
usageMetrics["stats.encryption.envelope_encryption_enabled.count"] = 1
}
// Current provider
kind, err := s.currentProviderID.Kind()
if err != nil {
return nil, err
}
usageMetrics[fmt.Sprintf("stats.encryption.current_provider.%s.count", kind)] = 1
// Count by kind
countByKind := make(map[string]int)
for id := range s.providers {
kind, err := id.Kind()
if err != nil {
return nil, err
}
countByKind[kind]++
}
for kind, count := range countByKind {
usageMetrics[fmt.Sprintf(`stats.encryption.providers.%s.count`, kind)] = count
}
return usageMetrics, nil
})
}
func (s *SecretsService) providersInitialized() bool {
return len(s.providers) > 0
}
func (s *SecretsService) encryptedWithEnvelopeEncryption(payload []byte) bool {
return len(payload) > 0 && payload[0] == keyIdDelimiter
}
var b64 = base64.RawStdEncoding
func (s *SecretsService) Encrypt(ctx context.Context, payload []byte, opt secrets.EncryptionOptions) ([]byte, error) {
// Use legacy encryption service if featuremgmt.FlagDisableEnvelopeEncryption toggle is on
if s.features.IsEnabled(ctx, featuremgmt.FlagDisableEnvelopeEncryption) {
return s.enc.Encrypt(ctx, payload, s.cfg.SecretKey)
}
var err error
defer func() {
opsCounter.With(prometheus.Labels{
"success": strconv.FormatBool(err == nil),
"operation": OpEncrypt,
}).Inc()
}()
// If encryption featuremgmt.FlagEnvelopeEncryption toggle is on, use envelope encryption
scope := opt()
label := secrets.KeyLabel(scope, s.currentProviderID)
var id string
var dataKey []byte
id, dataKey, err = s.currentDataKey(ctx, label, scope)
if err != nil {
s.log.Error("Failed to get current data key", "error", err, "label", label)
return nil, err
}
var encrypted []byte
encrypted, err = s.enc.Encrypt(ctx, payload, string(dataKey))
if err != nil {
s.log.Error("Failed to encrypt secret", "error", err)
return nil, err
}
prefix := make([]byte, b64.EncodedLen(len(id))+2)
b64.Encode(prefix[1:], []byte(id))
prefix[0] = keyIdDelimiter
prefix[len(prefix)-1] = keyIdDelimiter
blob := make([]byte, len(prefix)+len(encrypted))
copy(blob, prefix)
copy(blob[len(prefix):], encrypted)
return blob, nil
}
// currentDataKey looks up for current data key in cache or database by name, and decrypts it.
// If there's no current data key in cache nor in database it generates a new random data key,
// and stores it into both the in-memory cache and database (encrypted by the encryption provider).
func (s *SecretsService) currentDataKey(ctx context.Context, label string, scope string) (string, []byte, error) {
// We want only one request fetching current data key at time to
// avoid the creation of multiple ones in case there's no one existing.
s.mtx.Lock()
defer s.mtx.Unlock()
// We try to fetch the data key, either from cache or database
id, dataKey, err := s.dataKeyByLabel(ctx, label)
if err != nil {
return "", nil, err
}
// If no existing data key was found, create a new one
if dataKey == nil {
id, dataKey, err = s.newDataKey(ctx, label, scope)
if err != nil {
return "", nil, err
}
}
return id, dataKey, nil
}
// dataKeyByLabel looks up for data key in cache by label.
// Otherwise, it fetches it from database, decrypts it and caches it decrypted.
func (s *SecretsService) dataKeyByLabel(ctx context.Context, label string) (string, []byte, error) {
// 0. Get data key from in-memory cache.
if entry, exists := s.dataKeyCache.getByLabel(label); exists && entry.active {
return entry.id, entry.dataKey, nil
}
// 1. Get data key from database.
dataKey, err := s.store.GetCurrentDataKey(ctx, label)
if err != nil {
if errors.Is(err, secrets.ErrDataKeyNotFound) {
return "", nil, nil
}
return "", nil, err
}
// 2.1 Find the encryption provider.
provider, exists := s.providers[kmsproviders.NormalizeProviderID(dataKey.Provider)]
if !exists {
return "", nil, fmt.Errorf("could not find encryption provider '%s'", dataKey.Provider)
}
// 2.2 Decrypt the data key fetched from the database.
decrypted, err := provider.Decrypt(ctx, dataKey.EncryptedData)
if err != nil {
return "", nil, err
}
// 3. Store the decrypted data key into the in-memory cache.
s.cacheDataKey(dataKey, decrypted)
return dataKey.Id, decrypted, nil
}
// newDataKey creates a new random data key, encrypts it and stores it into the database and cache.
func (s *SecretsService) newDataKey(ctx context.Context, label string, scope string) (string, []byte, error) {
// 1. Create new data key.
dataKey, err := newRandomDataKey()
if err != nil {
return "", nil, err
}
// 2.1 Find the encryption provider.
provider, exists := s.providers[s.currentProviderID]
if !exists {
return "", nil, fmt.Errorf("could not find encryption provider '%s'", s.currentProviderID)
}
// 2.2 Encrypt the data key.
encrypted, err := provider.Encrypt(ctx, dataKey)
if err != nil {
return "", nil, err
}
// 3. Store its encrypted value into the DB.
id := util.GenerateShortUID()
dbDataKey := secrets.DataKey{
Active: true,
Id: id,
Provider: s.currentProviderID,
EncryptedData: encrypted,
Label: label,
Scope: scope,
}
err = s.store.CreateDataKey(ctx, &dbDataKey)
if err != nil {
return "", nil, err
}
return id, dataKey, nil
}
func newRandomDataKey() ([]byte, error) {
rawDataKey := make([]byte, 16)
_, err := rand.Read(rawDataKey)
if err != nil {
return nil, err
}
return rawDataKey, nil
}
func (s *SecretsService) Decrypt(ctx context.Context, payload []byte) ([]byte, error) {
var err error
defer func() {
opsCounter.With(prometheus.Labels{
"success": strconv.FormatBool(err == nil),
"operation": OpDecrypt,
}).Inc()
if err != nil {
s.log.Error("Failed to decrypt secret", "error", err)
}
}()
if len(payload) == 0 {
err = fmt.Errorf("unable to decrypt empty payload")
return nil, err
}
// If encrypted with envelope encryption, the feature is disabled and
// no provider is initialized, then we throw an error.
if s.encryptedWithEnvelopeEncryption(payload) &&
s.features.IsEnabled(ctx, featuremgmt.FlagDisableEnvelopeEncryption) &&
!s.providersInitialized() {
err = fmt.Errorf("failed to decrypt a secret encrypted with envelope encryption: envelope encryption is disabled")
return nil, err
}
var dataKey []byte
if !s.encryptedWithEnvelopeEncryption(payload) {
secretKey := s.cfg.SectionWithEnvOverrides("security").Key("secret_key").Value()
dataKey = []byte(secretKey)
} else {
payload = payload[1:]
endOfKey := bytes.Index(payload, []byte{keyIdDelimiter})
if endOfKey == -1 {
err = fmt.Errorf("could not find valid key id in encrypted payload")
return nil, err
}
b64Key := payload[:endOfKey]
payload = payload[endOfKey+1:]
keyId := make([]byte, b64.DecodedLen(len(b64Key)))
_, err = b64.Decode(keyId, b64Key)
if err != nil {
return nil, err
}
dataKey, err = s.dataKeyById(ctx, string(keyId))
if err != nil {
s.log.Error("Failed to lookup data key by id", "id", string(keyId), "error", err)
return nil, err
}
}
var decrypted []byte
decrypted, err = s.enc.Decrypt(ctx, payload, string(dataKey))
return decrypted, err
}
func (s *SecretsService) EncryptJsonData(ctx context.Context, kv map[string]string, opt secrets.EncryptionOptions) (map[string][]byte, error) {
encrypted := make(map[string][]byte)
for key, value := range kv {
encryptedData, err := s.Encrypt(ctx, []byte(value), opt)
if err != nil {
return nil, err
}
encrypted[key] = encryptedData
}
return encrypted, nil
}
func (s *SecretsService) DecryptJsonData(ctx context.Context, sjd map[string][]byte) (map[string]string, error) {
decrypted := make(map[string]string)
for key, data := range sjd {
decryptedData, err := s.Decrypt(ctx, data)
if err != nil {
return nil, err
}
decrypted[key] = string(decryptedData)
}
return decrypted, nil
}
func (s *SecretsService) GetDecryptedValue(ctx context.Context, sjd map[string][]byte, key, fallback string) string {
if value, ok := sjd[key]; ok {
decryptedData, err := s.Decrypt(ctx, value)
if err != nil {
return fallback
}
return string(decryptedData)
}
return fallback
}
// dataKeyById looks up for data key in cache.
// Otherwise, it fetches it from database and returns it decrypted.
func (s *SecretsService) dataKeyById(ctx context.Context, id string) ([]byte, error) {
// 0. Get decrypted data key from in-memory cache.
if entry, exists := s.dataKeyCache.getById(id); exists {
return entry.dataKey, nil
}
// 1. Get encrypted data key from database.
dataKey, err := s.store.GetDataKey(ctx, id)
if err != nil {
return nil, err
}
// 2.1. Find the encryption provider.
provider, exists := s.providers[kmsproviders.NormalizeProviderID(dataKey.Provider)]
if !exists {
return nil, fmt.Errorf("could not find encryption provider '%s'", dataKey.Provider)
}
// 2.2. Encrypt the data key.
decrypted, err := provider.Decrypt(ctx, dataKey.EncryptedData)
if err != nil {
return nil, err
}
// 3. Store the decrypted data key into the in-memory cache.
s.cacheDataKey(dataKey, decrypted)
return decrypted, nil
}
func (s *SecretsService) GetProviders() map[secrets.ProviderID]secrets.Provider {
return s.providers
}
func (s *SecretsService) RotateDataKeys(ctx context.Context) error {
s.log.Info("Data keys rotation triggered, acquiring lock...")
s.mtx.Lock()
defer s.mtx.Unlock()
s.log.Info("Data keys rotation started")
err := s.store.DisableDataKeys(ctx)
if err != nil {
s.log.Error("Data keys rotation failed", "error", err)
return err
}
s.dataKeyCache.flush()
s.log.Info("Data keys rotation finished successfully")
return nil
}
func (s *SecretsService) ReEncryptDataKeys(ctx context.Context) error {
s.log.Info("Data keys re-encryption triggered")
if s.features.IsEnabled(ctx, featuremgmt.FlagDisableEnvelopeEncryption) {
s.log.Info("Envelope encryption is not enabled but trying to init providers anyway...")
if err := s.InitProviders(); err != nil {
s.log.Error("Envelope encryption providers initialization failed", "error", err)
return err
}
}
if err := s.store.ReEncryptDataKeys(ctx, s.providers, s.currentProviderID); err != nil {
s.log.Error("Data keys re-encryption failed", "error", err)
return err
}
s.dataKeyCache.flush()
s.log.Info("Data keys re-encryption finished successfully")
return nil
}
func (s *SecretsService) Run(ctx context.Context) error {
gc := time.NewTicker(
s.cfg.SectionWithEnvOverrides("security.encryption").Key("data_keys_cache_cleanup_interval").
MustDuration(time.Minute),
)
grp, gCtx := errgroup.WithContext(ctx)
for _, p := range s.providers {
if svc, ok := p.(secrets.BackgroundProvider); ok {
grp.Go(func() error {
return svc.Run(gCtx)
})
}
}
for {
select {
case <-gc.C:
s.log.Debug("Removing expired data keys from cache...")
s.dataKeyCache.removeExpired()
s.log.Debug("Removing expired data keys from cache finished successfully")
case <-gCtx.Done():
s.log.Debug("Grafana is shutting down; stopping...")
gc.Stop()
if err := grp.Wait(); err != nil && !errors.Is(err, context.Canceled) {
return err
}
return nil
}
}
}
// Caching a data key is tricky, because at SecretsService level we cannot guarantee
// that a newly created data key has actually been persisted, depending on the different
// use cases that rely on SecretsService encryption and different database engines that
// we have support for, because the data key creation may have happened within a DB TX,
// that may fail afterwards.
//
// Therefore, if we cache a data key that hasn't been persisted with success (and won't),
// and later that one is used for a encryption operation (aside from the DB TX that created
// it), we may end up with data encrypted by a non-persisted data key, which could end up
// in (unrecoverable) data corruption.
//
// So, we cache the data key by id and/or by label, depending on the data key's lifetime,
// assuming that a data key older than a "caution period" should have been persisted.
//
// Look at the comments inline for further details.
// You can also take a look at the issue below for more context:
// https://github.com/grafana/grafana-enterprise/issues/4252
func (s *SecretsService) cacheDataKey(dataKey *secrets.DataKey, decrypted []byte) {
// First, we cache the data key by id, because cache "by id" is
// only used by decrypt operations, so no risk of corrupting data.
entry := &dataKeyCacheEntry{
id: dataKey.Id,
label: dataKey.Label,
dataKey: decrypted,
active: dataKey.Active,
}
s.dataKeyCache.addById(entry)
// Then, we cache the data key by label, ONLY if data key's lifetime
// is longer than a certain "caution period", because cache "by label"
// is used (only) by encrypt operations, and we want to ensure that
// no data key is cached for encryption ops before being persisted.
const cautionPeriod = 10 * time.Minute
// We consider a "caution period" of 10m to be long enough for any database
// transaction that implied a data key creation to have finished successfully.
//
// Therefore, we consider that if we fetch a data key from the database,
// more than 10m later than its creation, it should have been actually
// persisted - i.e. the transaction that created it is no longer running.
nowMinusCautionPeriod := now().Add(-cautionPeriod)
if dataKey.Created.Before(nowMinusCautionPeriod) {
s.dataKeyCache.addByLabel(entry)
}
}