mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Fix screenshots were not cached (#58493)
This commit is contained in:
parent
238a3f820c
commit
c646ff0ce3
76
pkg/services/ngalert/image/cache.go
Normal file
76
pkg/services/ngalert/image/cache.go
Normal file
@ -0,0 +1,76 @@
|
||||
package image
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
gocache "github.com/patrickmn/go-cache"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
)
|
||||
|
||||
const (
|
||||
namespace = "grafana"
|
||||
subsystem = "alerting"
|
||||
)
|
||||
|
||||
// CacheService caches images.
|
||||
//
|
||||
//go:generate mockgen -destination=cache_mock.go -package=image github.com/grafana/grafana/pkg/services/ngalert/image CacheService
|
||||
type CacheService interface {
|
||||
// Get returns the screenshot for the options or false if a screenshot with these
|
||||
// options does not exist.
|
||||
Get(ctx context.Context, k string) (models.Image, bool)
|
||||
// Set the screenshot for the options. If another screenshot exists with these
|
||||
// options then it will be replaced.
|
||||
Set(ctx context.Context, k string, image models.Image) error
|
||||
}
|
||||
|
||||
// InmemCacheService is an in-mem screenshot cache.
|
||||
type InmemCacheService struct {
|
||||
cache *gocache.Cache
|
||||
cacheHits prometheus.Counter
|
||||
cacheMisses prometheus.Counter
|
||||
}
|
||||
|
||||
func NewInmemCacheService(expiration time.Duration, r prometheus.Registerer) CacheService {
|
||||
return &InmemCacheService{
|
||||
cache: gocache.New(expiration, time.Minute),
|
||||
cacheHits: promauto.With(r).NewCounter(prometheus.CounterOpts{
|
||||
Name: "image_cache_hits_total",
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
}),
|
||||
cacheMisses: promauto.With(r).NewCounter(prometheus.CounterOpts{
|
||||
Name: "image_cache_misses_total",
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
func (s *InmemCacheService) Get(_ context.Context, k string) (models.Image, bool) {
|
||||
if v, ok := s.cache.Get(k); ok {
|
||||
defer s.cacheHits.Inc()
|
||||
return v.(models.Image), true
|
||||
}
|
||||
defer s.cacheMisses.Inc()
|
||||
return models.Image{}, false
|
||||
}
|
||||
|
||||
func (s *InmemCacheService) Set(_ context.Context, k string, screenshot models.Image) error {
|
||||
s.cache.Set(k, screenshot, 0)
|
||||
return nil
|
||||
}
|
||||
|
||||
type NoOpCacheService struct{}
|
||||
|
||||
func (s *NoOpCacheService) Get(_ context.Context, _ string) (models.Image, bool) {
|
||||
return models.Image{}, false
|
||||
}
|
||||
|
||||
func (s *NoOpCacheService) Set(_ context.Context, _ string, _ models.Image) error {
|
||||
return nil
|
||||
}
|
65
pkg/services/ngalert/image/cache_mock.go
Normal file
65
pkg/services/ngalert/image/cache_mock.go
Normal file
@ -0,0 +1,65 @@
|
||||
// Code generated by MockGen. DO NOT EDIT.
|
||||
// Source: github.com/grafana/grafana/pkg/services/ngalert/image (interfaces: CacheService)
|
||||
|
||||
// Package image is a generated GoMock package.
|
||||
package image
|
||||
|
||||
import (
|
||||
context "context"
|
||||
reflect "reflect"
|
||||
|
||||
gomock "github.com/golang/mock/gomock"
|
||||
models "github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
)
|
||||
|
||||
// MockCacheService is a mock of CacheService interface.
|
||||
type MockCacheService struct {
|
||||
ctrl *gomock.Controller
|
||||
recorder *MockCacheServiceMockRecorder
|
||||
}
|
||||
|
||||
// MockCacheServiceMockRecorder is the mock recorder for MockCacheService.
|
||||
type MockCacheServiceMockRecorder struct {
|
||||
mock *MockCacheService
|
||||
}
|
||||
|
||||
// NewMockCacheService creates a new mock instance.
|
||||
func NewMockCacheService(ctrl *gomock.Controller) *MockCacheService {
|
||||
mock := &MockCacheService{ctrl: ctrl}
|
||||
mock.recorder = &MockCacheServiceMockRecorder{mock}
|
||||
return mock
|
||||
}
|
||||
|
||||
// EXPECT returns an object that allows the caller to indicate expected use.
|
||||
func (m *MockCacheService) EXPECT() *MockCacheServiceMockRecorder {
|
||||
return m.recorder
|
||||
}
|
||||
|
||||
// Get mocks base method.
|
||||
func (m *MockCacheService) Get(arg0 context.Context, arg1 string) (models.Image, bool) {
|
||||
m.ctrl.T.Helper()
|
||||
ret := m.ctrl.Call(m, "Get", arg0, arg1)
|
||||
ret0, _ := ret[0].(models.Image)
|
||||
ret1, _ := ret[1].(bool)
|
||||
return ret0, ret1
|
||||
}
|
||||
|
||||
// Get indicates an expected call of Get.
|
||||
func (mr *MockCacheServiceMockRecorder) Get(arg0, arg1 interface{}) *gomock.Call {
|
||||
mr.mock.ctrl.T.Helper()
|
||||
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Get", reflect.TypeOf((*MockCacheService)(nil).Get), arg0, arg1)
|
||||
}
|
||||
|
||||
// Set mocks base method.
|
||||
func (m *MockCacheService) Set(arg0 context.Context, arg1 string, arg2 models.Image) error {
|
||||
m.ctrl.T.Helper()
|
||||
ret := m.ctrl.Call(m, "Set", arg0, arg1, arg2)
|
||||
ret0, _ := ret[0].(error)
|
||||
return ret0
|
||||
}
|
||||
|
||||
// Set indicates an expected call of Set.
|
||||
func (mr *MockCacheServiceMockRecorder) Set(arg0, arg1, arg2 interface{}) *gomock.Call {
|
||||
mr.mock.ctrl.T.Helper()
|
||||
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Set", reflect.TypeOf((*MockCacheService)(nil).Set), arg0, arg1, arg2)
|
||||
}
|
38
pkg/services/ngalert/image/cache_test.go
Normal file
38
pkg/services/ngalert/image/cache_test.go
Normal file
@ -0,0 +1,38 @@
|
||||
package image
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
)
|
||||
|
||||
func TestInmemCacheService(t *testing.T) {
|
||||
s := NewInmemCacheService(time.Second, prometheus.DefaultRegisterer)
|
||||
ctx := context.Background()
|
||||
|
||||
// should be a miss
|
||||
actual, ok := s.Get(ctx, "test")
|
||||
assert.False(t, ok)
|
||||
assert.Equal(t, models.Image{}, actual)
|
||||
|
||||
// should be a hit
|
||||
expected := models.Image{Path: "test.png"}
|
||||
require.NoError(t, s.Set(ctx, "test", expected))
|
||||
actual, ok = s.Get(ctx, "test")
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, expected, actual)
|
||||
|
||||
// wait 1s and the cached image should have expired
|
||||
<-time.After(time.Second)
|
||||
|
||||
// should be a miss
|
||||
actual, ok = s.Get(ctx, "test")
|
||||
assert.False(t, ok)
|
||||
assert.Equal(t, models.Image{}, actual)
|
||||
}
|
@ -21,8 +21,8 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
screenshotTimeout = 10 * time.Second
|
||||
screenshotCacheTTL = 60 * time.Second
|
||||
screenshotTimeout = 10 * time.Second
|
||||
)
|
||||
|
||||
var (
|
||||
@ -59,6 +59,7 @@ type ImageService interface {
|
||||
// as an annotation or label to the Alertmanager. This service cannot take
|
||||
// screenshots of alert rules that are not associated with a dashboard panel.
|
||||
type ScreenshotImageService struct {
|
||||
cache CacheService
|
||||
limiter screenshot.RateLimiter
|
||||
logger log.Logger
|
||||
screenshots screenshot.ScreenshotService
|
||||
@ -69,12 +70,14 @@ type ScreenshotImageService struct {
|
||||
|
||||
// NewScreenshotImageService returns a new ScreenshotImageService.
|
||||
func NewScreenshotImageService(
|
||||
cache CacheService,
|
||||
limiter screenshot.RateLimiter,
|
||||
logger log.Logger,
|
||||
screenshots screenshot.ScreenshotService,
|
||||
store store.ImageStore,
|
||||
uploads *UploadingService) ImageService {
|
||||
return &ScreenshotImageService{
|
||||
cache: cache,
|
||||
limiter: limiter,
|
||||
logger: logger,
|
||||
screenshots: screenshots,
|
||||
@ -88,6 +91,7 @@ func NewScreenshotImageService(
|
||||
func NewScreenshotImageServiceFromCfg(cfg *setting.Cfg, db *store.DBstore, ds dashboards.DashboardService,
|
||||
rs rendering.Service, r prometheus.Registerer) (ImageService, error) {
|
||||
var (
|
||||
cache CacheService = &NoOpCacheService{}
|
||||
limiter screenshot.RateLimiter = &screenshot.NoOpRateLimiter{}
|
||||
screenshots screenshot.ScreenshotService = &screenshot.ScreenshotUnavailableService{}
|
||||
uploads *UploadingService = nil
|
||||
@ -95,6 +99,7 @@ func NewScreenshotImageServiceFromCfg(cfg *setting.Cfg, db *store.DBstore, ds da
|
||||
|
||||
// If screenshots are enabled
|
||||
if cfg.UnifiedAlerting.Screenshots.Capture {
|
||||
cache = NewInmemCacheService(screenshotCacheTTL, r)
|
||||
limiter = screenshot.NewTokenRateLimiter(cfg.UnifiedAlerting.Screenshots.MaxConcurrentScreenshots)
|
||||
screenshots = screenshot.NewHeadlessScreenshotService(ds, rs, r)
|
||||
|
||||
@ -108,7 +113,7 @@ func NewScreenshotImageServiceFromCfg(cfg *setting.Cfg, db *store.DBstore, ds da
|
||||
}
|
||||
}
|
||||
|
||||
return NewScreenshotImageService(limiter, cfg.Logger, screenshots, db, uploads), nil
|
||||
return NewScreenshotImageService(cache, limiter, log.New("ngalert.image"), screenshots, db, uploads), nil
|
||||
}
|
||||
|
||||
// NewImage returns a screenshot of the alert rule or an error.
|
||||
@ -119,7 +124,7 @@ func NewScreenshotImageServiceFromCfg(cfg *setting.Cfg, db *store.DBstore, ds da
|
||||
// alert rule has a Dashboard UID and the dashboard exists, but does not have a
|
||||
// Panel ID in its annotations then an ErrNoPanel error is returned.
|
||||
func (s *ScreenshotImageService) NewImage(ctx context.Context, r *models.AlertRule) (*models.Image, error) {
|
||||
if r.DashboardUID == nil {
|
||||
if r.DashboardUID == nil || *r.DashboardUID == "" {
|
||||
return nil, ErrNoDashboard
|
||||
}
|
||||
|
||||
@ -127,6 +132,17 @@ func (s *ScreenshotImageService) NewImage(ctx context.Context, r *models.AlertRu
|
||||
return nil, ErrNoPanel
|
||||
}
|
||||
|
||||
// If there is an image is in the cache return it instead of taking another screenshot
|
||||
if image, ok := s.cache.Get(ctx, r.GetKey().String()); ok {
|
||||
s.logger.Debug("Found cached image", "token", image.Token)
|
||||
return &image, nil
|
||||
}
|
||||
|
||||
// We create both a context with timeout and set a timeout in ScreenshotOptions. The timeout
|
||||
// in the context is used for both database queries and the request to the rendering service,
|
||||
// while the timeout in ScreenshotOptions is passed to the rendering service where it is used as
|
||||
// a client timeout. It is not recommended to pass a context without a deadline and the context
|
||||
// deadline should be at least as long as the timeout in ScreenshotOptions.
|
||||
ctx, cancelFunc := context.WithTimeout(ctx, screenshotTimeout)
|
||||
defer cancelFunc()
|
||||
|
||||
@ -136,8 +152,11 @@ func (s *ScreenshotImageService) NewImage(ctx context.Context, r *models.AlertRu
|
||||
Timeout: screenshotTimeout,
|
||||
}
|
||||
|
||||
// To prevent concurrent screenshots of the same dashboard panel we use singleflight,
|
||||
// deduplicated on a base64 hash of the screenshot options.
|
||||
optsHash := base64.StdEncoding.EncodeToString(opts.Hash())
|
||||
result, err, _ := s.singleflight.Do(optsHash, func() (interface{}, error) {
|
||||
// Once deduplicated concurrent screenshots are then rate-limited
|
||||
screenshot, err := s.limiter.Do(ctx, opts, s.screenshots.Take)
|
||||
if err != nil {
|
||||
if errors.Is(err, dashboards.ErrDashboardNotFound) {
|
||||
@ -145,15 +164,20 @@ func (s *ScreenshotImageService) NewImage(ctx context.Context, r *models.AlertRu
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
image := models.Image{Path: screenshot.Path}
|
||||
|
||||
// Uploading images is optional
|
||||
if s.uploads != nil {
|
||||
if image, err = s.uploads.Upload(ctx, image); err != nil {
|
||||
s.logger.Warn("failed to upload image", "path", image.Path, "error", err)
|
||||
s.logger.Warn("Failed to upload image", "path", image.Path, "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := s.store.SaveImage(ctx, &image); err != nil {
|
||||
return nil, fmt.Errorf("failed to save image: %w", err)
|
||||
}
|
||||
s.logger.Debug("Saved new image", "token", image.Token)
|
||||
return image, nil
|
||||
})
|
||||
if err != nil {
|
||||
@ -161,6 +185,10 @@ func (s *ScreenshotImageService) NewImage(ctx context.Context, r *models.AlertRu
|
||||
}
|
||||
|
||||
image := result.(models.Image)
|
||||
if err = s.cache.Set(ctx, r.GetKey().String(), image); err != nil {
|
||||
s.logger.Warn("Failed to cache image", "token", image.Token, "error", err)
|
||||
}
|
||||
|
||||
return &image, nil
|
||||
}
|
||||
|
||||
|
@ -23,17 +23,21 @@ func TestScreenshotImageService(t *testing.T) {
|
||||
defer ctrl.Finish()
|
||||
|
||||
var (
|
||||
cache = NewMockCacheService(ctrl)
|
||||
images = store.NewFakeImageStore(t)
|
||||
limiter = screenshot.NoOpRateLimiter{}
|
||||
screenshots = screenshot.NewMockScreenshotService(ctrl)
|
||||
uploads = imguploader.NewMockImageUploader(ctrl)
|
||||
)
|
||||
|
||||
s := NewScreenshotImageService(&limiter, log.NewNopLogger(), screenshots, images,
|
||||
s := NewScreenshotImageService(cache, &limiter, log.NewNopLogger(), screenshots, images,
|
||||
NewUploadingService(uploads, prometheus.NewRegistry()))
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
// assert that the cache is checked for an existing image
|
||||
cache.EXPECT().Get(gomock.Any(), "{orgID: 1, UID: foo}").Return(models.Image{}, false)
|
||||
|
||||
// assert that a screenshot is taken
|
||||
screenshots.EXPECT().Take(gomock.Any(), screenshot.ScreenshotOptions{
|
||||
DashboardUID: "foo",
|
||||
@ -43,11 +47,11 @@ func TestScreenshotImageService(t *testing.T) {
|
||||
Path: "foo.png",
|
||||
}, nil)
|
||||
|
||||
// the screenshot is made into an image and uploaded
|
||||
// assert that the screenshot is made into an image and uploaded
|
||||
uploads.EXPECT().Upload(gomock.Any(), "foo.png").
|
||||
Return("https://example.com/foo.png", nil)
|
||||
|
||||
// and then saved into the database
|
||||
// assert that the image is saved into the database
|
||||
expected := models.Image{
|
||||
ID: 1,
|
||||
Token: "foo",
|
||||
@ -55,12 +59,20 @@ func TestScreenshotImageService(t *testing.T) {
|
||||
URL: "https://example.com/foo.png",
|
||||
}
|
||||
|
||||
// assert that the image is saved into the cache
|
||||
cache.EXPECT().Set(gomock.Any(), "{orgID: 1, UID: foo}", expected).Return(nil)
|
||||
|
||||
image, err := s.NewImage(ctx, &models.AlertRule{
|
||||
OrgID: 1,
|
||||
UID: "foo",
|
||||
DashboardUID: pointer.String("foo"),
|
||||
PanelID: pointer.Int64(1)})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, expected, *image)
|
||||
|
||||
// assert that the cache is checked for an existing image
|
||||
cache.EXPECT().Get(gomock.Any(), "{orgID: 1, UID: bar}").Return(models.Image{}, false)
|
||||
|
||||
// assert that a screenshot is taken
|
||||
screenshots.EXPECT().Take(gomock.Any(), screenshot.ScreenshotOptions{
|
||||
DashboardUID: "bar",
|
||||
@ -81,9 +93,27 @@ func TestScreenshotImageService(t *testing.T) {
|
||||
Path: "bar.png",
|
||||
}
|
||||
|
||||
// assert that the image is saved into the cache, but without a URL
|
||||
cache.EXPECT().Set(gomock.Any(), "{orgID: 1, UID: bar}", expected).Return(nil)
|
||||
|
||||
image, err = s.NewImage(ctx, &models.AlertRule{
|
||||
OrgID: 1,
|
||||
UID: "bar",
|
||||
DashboardUID: pointer.String("bar"),
|
||||
PanelID: pointer.Int64(1)})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, expected, *image)
|
||||
|
||||
expected = models.Image{Path: "baz.png", URL: "https://example.com/baz.png"}
|
||||
|
||||
// assert that the cache is checked for an existing image and it is returned
|
||||
cache.EXPECT().Get(gomock.Any(), "{orgID: 1, UID: baz}").Return(expected, true)
|
||||
|
||||
image, err = s.NewImage(ctx, &models.AlertRule{
|
||||
OrgID: 1,
|
||||
UID: "baz",
|
||||
DashboardUID: pointer.String("baz"),
|
||||
PanelID: pointer.Int64(1)})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, expected, *image)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user