Encryption: Fix DEKs cache (#43129)

* Encryption: Fix DEKs cache

* Clarify tests
This commit is contained in:
Joan López de la Franca Beltran 2021-12-27 18:04:47 +01:00 committed by GitHub
parent c3eb1ffe85
commit 83bc445d3e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 109 additions and 11 deletions

View File

@ -20,6 +20,7 @@ import (
"github.com/grafana/grafana/pkg/services/pluginsettings"
"github.com/grafana/grafana/pkg/services/provisioning"
"github.com/grafana/grafana/pkg/services/rendering"
secretsManager "github.com/grafana/grafana/pkg/services/secrets/manager"
"github.com/grafana/grafana/pkg/services/serviceaccounts"
"github.com/grafana/grafana/pkg/services/updatechecker"
"github.com/grafana/grafana/pkg/tsdb/azuremonitor"
@ -45,7 +46,7 @@ func ProvideBackgroundServiceRegistry(
rendering *rendering.RenderingService, tokenService models.UserTokenBackgroundService,
provisioning *provisioning.ProvisioningServiceImpl, alerting *alerting.AlertEngine, pm *manager.PluginManager,
metrics *metrics.InternalMetricsService, usageStats *uss.UsageStats, updateChecker *updatechecker.Service,
tracing tracing.Tracer, remoteCache *remotecache.RemoteCache,
tracing tracing.Tracer, remoteCache *remotecache.RemoteCache, secretsService *secretsManager.SecretsService,
// Need to make sure these are initialized, is there a better place to put them?
_ *azuremonitor.Service, _ *cloudwatch.CloudWatchService, _ *elasticsearch.Service, _ *graphite.Service,
_ *influxdb.Service, _ *loki.Service, _ *opentsdb.Service, _ *prometheus.Service, _ *tempo.Service,
@ -69,7 +70,8 @@ func ProvideBackgroundServiceRegistry(
metrics,
usageStats,
tracing,
remoteCache)
remoteCache,
secretsService)
}
// BackgroundServiceRegistry provides background services.

View File

@ -103,7 +103,7 @@ func (s *SecretsService) EncryptWithDBSession(ctx context.Context, payload []byt
// If encryption secrets.EnvelopeEncryptionFeatureToggle toggle is on, use envelope encryption
scope := opt()
keyName := fmt.Sprintf("%s/%s@%s", time.Now().Format("2006-01-02"), scope, s.currentProvider)
keyName := s.keyName(scope)
dataKey, err := s.dataKey(ctx, keyName)
if err != nil {
@ -134,6 +134,10 @@ func (s *SecretsService) EncryptWithDBSession(ctx context.Context, payload []byt
return blob, nil
}
func (s *SecretsService) keyName(scope string) string {
return fmt.Sprintf("%s/%s@%s", now().Format("2006-01-02"), scope, s.currentProvider)
}
func (s *SecretsService) Decrypt(ctx context.Context, payload []byte) ([]byte, error) {
// Use legacy encryption service if secrets.EnvelopeEncryptionFeatureToggle toggle is off
if !s.settings.IsFeatureToggleEnabled(secrets.EnvelopeEncryptionFeatureToggle) {
@ -265,7 +269,7 @@ func (s *SecretsService) newDataKey(ctx context.Context, name string, scope stri
// 4. Cache its unencrypted value and return it
s.dataKeyCache[name] = dataKeyCacheItem{
expiry: time.Now().Add(15 * time.Minute),
expiry: now().Add(dekTTL),
dataKey: dataKey,
}
@ -275,11 +279,9 @@ func (s *SecretsService) newDataKey(ctx context.Context, name string, scope stri
// dataKey looks up DEK in cache or database, and decrypts it
func (s *SecretsService) dataKey(ctx context.Context, name string) ([]byte, error) {
if item, exists := s.dataKeyCache[name]; exists {
if item.expiry.Before(time.Now()) && !item.expiry.IsZero() {
delete(s.dataKeyCache, name)
} else {
return item.dataKey, nil
}
item.expiry = now().Add(dekTTL)
s.dataKeyCache[name] = item
return item.dataKey, nil
}
// 1. get encrypted data key from database
@ -301,7 +303,7 @@ func (s *SecretsService) dataKey(ctx context.Context, name string) ([]byte, erro
// 3. cache data key
s.dataKeyCache[name] = dataKeyCacheItem{
expiry: time.Now().Add(15 * time.Minute),
expiry: now().Add(dekTTL),
dataKey: decrypted,
}
@ -311,3 +313,37 @@ func (s *SecretsService) dataKey(ctx context.Context, name string) ([]byte, erro
func (s *SecretsService) GetProviders() map[string]secrets.Provider {
return s.providers
}
// These variables are used to test the code
// responsible for periodically cleaning up
// data encryption keys cache.
var (
now = time.Now
dekTTL = 15 * time.Minute
gcInterval = time.Minute
)
func (s *SecretsService) Run(ctx context.Context) error {
gc := time.NewTicker(gcInterval)
for {
select {
case <-gc.C:
s.log.Debug("removing expired data encryption keys from cache...")
s.removeExpiredItems()
s.log.Debug("done removing expired data encryption keys from cache")
case <-ctx.Done():
s.log.Debug("grafana is shutting down; stopping...")
gc.Stop()
return nil
}
}
}
func (s *SecretsService) removeExpiredItems() {
for id, dek := range s.dataKeyCache {
if dek.expiry.Before(now()) {
delete(s.dataKeyCache, id)
}
}
}

View File

@ -3,6 +3,7 @@ package manager
import (
"context"
"testing"
"time"
"github.com/grafana/grafana/pkg/infra/usagestats"
"github.com/grafana/grafana/pkg/services/encryption/ossencryption"
@ -11,7 +12,6 @@ import (
"github.com/grafana/grafana/pkg/services/secrets/database"
"github.com/grafana/grafana/pkg/services/sqlstore"
"github.com/grafana/grafana/pkg/setting"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gopkg.in/ini.v1"
@ -253,3 +253,63 @@ func (f *fakeKMS) Provide() (map[string]secrets.Provider, error) {
providers["fakeProvider.v1"] = f.fake
return providers, nil
}
func TestSecretsService_Run(t *testing.T) {
ctx := context.Background()
sql := sqlstore.InitTestDB(t)
store := database.ProvideSecretsStore(sql)
svc := SetupTestService(t, store)
t.Run("should stop with no error once the context's finished", func(t *testing.T) {
ctx, cancel := context.WithTimeout(ctx, time.Millisecond)
defer cancel()
err := svc.Run(ctx)
assert.NoError(t, err)
})
t.Run("should trigger cache clean up", func(t *testing.T) {
// Encrypt to ensure there's a data encryption key generated
_, err := svc.Encrypt(ctx, []byte("grafana"), secrets.WithoutScope())
require.NoError(t, err)
// Data encryption key cache should contain one element
require.Len(t, svc.dataKeyCache, 1)
// Execute background process after key's TTL, to force
// clean up process, during a millisecond with gc ticker
// configured on every nanosecond, to ensure the ticker
// is triggered.
gcInterval = time.Nanosecond
t.Cleanup(func() { now = time.Now })
now = func() time.Time { return time.Now().Add(dekTTL) }
ctx, cancel := context.WithTimeout(ctx, time.Millisecond)
defer cancel()
err = svc.Run(ctx)
require.NoError(t, err)
// Then, once the ticker has been triggered,
// the cleanup process should have happened,
// therefore the cache should be empty.
require.Len(t, svc.dataKeyCache, 0)
})
t.Run("should update data key expiry after every use", func(t *testing.T) {
// Encrypt to generate data encryption key
withoutScope := secrets.WithoutScope()
_, err := svc.Encrypt(ctx, []byte("grafana"), withoutScope)
require.NoError(t, err)
// New call to Encrypt one minute later should update cache entry's expiry
t.Cleanup(func() { now = time.Now })
now = func() time.Time { return time.Now().Add(time.Minute) }
_, err = svc.Encrypt(ctx, []byte("grafana"), withoutScope)
require.NoError(t, err)
dataKeyID := svc.keyName(withoutScope())
assert.True(t, svc.dataKeyCache[dataKeyID].expiry.After(time.Now().Add(dekTTL)))
})
}