mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: State Manager takes screenshots. (#49338)
The State Manager will now take screenshots when an alert instance switches to an Alerting or Resolved state. Signed-off-by: Joe Blubaugh joe.blubaugh@grafana.com
This commit is contained in:
@@ -33,7 +33,7 @@ var (
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
screenshotTimeout = 10 * time.Second
|
screenshotTimeout = 10 * time.Second
|
||||||
screenshotCacheTTL = 15 * time.Second
|
screenshotCacheTTL = 60 * time.Second
|
||||||
)
|
)
|
||||||
|
|
||||||
// ScreenshotImageService takes screenshots of the panel for an alert rule and
|
// ScreenshotImageService takes screenshots of the panel for an alert rule and
|
||||||
@@ -111,6 +111,12 @@ func (s *ScreenshotImageService) NewImage(ctx context.Context, r *ngmodels.Alert
|
|||||||
return &v, nil
|
return &v, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type NotAvailableImageService struct{}
|
||||||
|
|
||||||
|
func (s *NotAvailableImageService) NewImage(ctx context.Context, r *ngmodels.AlertRule) (*store.Image, error) {
|
||||||
|
return nil, screenshot.ErrScreenshotsUnavailable
|
||||||
|
}
|
||||||
|
|
||||||
type NoopImageService struct{}
|
type NoopImageService struct{}
|
||||||
|
|
||||||
func (s *NoopImageService) NewImage(ctx context.Context, r *ngmodels.AlertRule) (*store.Image, error) {
|
func (s *NoopImageService) NewImage(ctx context.Context, r *ngmodels.AlertRule) (*store.Image, error) {
|
||||||
|
|||||||
@@ -81,6 +81,10 @@ const (
|
|||||||
// Annotations are actually a set of labels, so technically this is the label name of an annotation.
|
// Annotations are actually a set of labels, so technically this is the label name of an annotation.
|
||||||
DashboardUIDAnnotation = "__dashboardUid__"
|
DashboardUIDAnnotation = "__dashboardUid__"
|
||||||
PanelIDAnnotation = "__panelId__"
|
PanelIDAnnotation = "__panelId__"
|
||||||
|
|
||||||
|
// This isn't a hard-coded secret token, hence the nolint.
|
||||||
|
//nolint:gosec
|
||||||
|
ScreenshotTokenAnnotation = "__alertScreenshotToken__"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -89,7 +93,11 @@ var (
|
|||||||
RuleUIDLabel: {},
|
RuleUIDLabel: {},
|
||||||
NamespaceUIDLabel: {},
|
NamespaceUIDLabel: {},
|
||||||
}
|
}
|
||||||
InternalAnnotationNameSet = map[string]struct{}{}
|
InternalAnnotationNameSet = map[string]struct{}{
|
||||||
|
DashboardUIDAnnotation: {},
|
||||||
|
PanelIDAnnotation: {},
|
||||||
|
ScreenshotTokenAnnotation: {},
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
// AlertRule is the model for alert rules in unified alerting.
|
// AlertRule is the model for alert rules in unified alerting.
|
||||||
|
|||||||
@@ -39,6 +39,10 @@ func stateToPostableAlert(alertState *state.State, appURL *url.URL) *models.Post
|
|||||||
nA["__value_string__"] = alertState.LastEvaluationString
|
nA["__value_string__"] = alertState.LastEvaluationString
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if alertState.Image != nil {
|
||||||
|
nA[ngModels.ScreenshotTokenAnnotation] = alertState.Image.Token
|
||||||
|
}
|
||||||
|
|
||||||
var urlStr string
|
var urlStr string
|
||||||
if uid := nL[ngModels.RuleUIDLabel]; len(uid) > 0 && appURL != nil {
|
if uid := nL[ngModels.RuleUIDLabel]; len(uid) > 0 && appURL != nil {
|
||||||
u := *appURL
|
u := *appURL
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ import (
|
|||||||
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
||||||
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
|
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||||
"github.com/grafana/grafana/pkg/services/ngalert/state"
|
"github.com/grafana/grafana/pkg/services/ngalert/state"
|
||||||
|
"github.com/grafana/grafana/pkg/services/ngalert/store"
|
||||||
"github.com/grafana/grafana/pkg/util"
|
"github.com/grafana/grafana/pkg/util"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -117,6 +118,22 @@ func Test_stateToPostableAlert(t *testing.T) {
|
|||||||
result = stateToPostableAlert(alertState, appURL)
|
result = stateToPostableAlert(alertState, appURL)
|
||||||
require.Equal(t, expected, result.Annotations)
|
require.Equal(t, expected, result.Annotations)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
t.Run("add __alertScreenshotToken__ if there is an image token", func(t *testing.T) {
|
||||||
|
alertState := randomState(tc.state)
|
||||||
|
alertState.Annotations = randomMapOfStrings()
|
||||||
|
alertState.Image = &store.Image{Token: "test_token"}
|
||||||
|
|
||||||
|
result := stateToPostableAlert(alertState, appURL)
|
||||||
|
|
||||||
|
expected := make(models.LabelSet, len(alertState.Annotations)+1)
|
||||||
|
for k, v := range alertState.Annotations {
|
||||||
|
expected[k] = v
|
||||||
|
}
|
||||||
|
expected["__alertScreenshotToken__"] = alertState.Image.Token
|
||||||
|
|
||||||
|
require.Equal(t, expected, result.Annotations)
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
switch tc.state {
|
switch tc.state {
|
||||||
|
|||||||
@@ -170,19 +170,34 @@ func (st *Manager) ProcessEvalResults(ctx context.Context, alertRule *ngModels.A
|
|||||||
return states
|
return states
|
||||||
}
|
}
|
||||||
|
|
||||||
//nolint:unused
|
// Maybe take a screenshot. Do it if:
|
||||||
func (st *Manager) newImage(ctx context.Context, alertRule *ngModels.AlertRule, state *State) error {
|
// 1. The alert state is transitioning into the "Alerting" state from something else.
|
||||||
if state.Image == nil {
|
// 2. The alert state has just transitioned to the resolved state.
|
||||||
image, err := st.imageService.NewImage(ctx, alertRule)
|
// 3. The state is alerting and there is no screenshot annotation on the alert state.
|
||||||
if errors.Is(err, screenshot.ErrScreenshotsUnavailable) {
|
func (st *Manager) maybeTakeScreenshot(
|
||||||
// It's not an error if screenshots are disabled.
|
ctx context.Context,
|
||||||
return nil
|
alertRule *ngModels.AlertRule,
|
||||||
} else if err != nil {
|
state *State,
|
||||||
st.log.Error("failed to create image", "error", err)
|
oldState eval.State,
|
||||||
return err
|
) error {
|
||||||
}
|
shouldScreenshot := state.Resolved ||
|
||||||
state.Image = image
|
state.State == eval.Alerting && oldState != eval.Alerting ||
|
||||||
|
state.State == eval.Alerting && state.Image == nil
|
||||||
|
if !shouldScreenshot {
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
img, err := st.imageService.NewImage(ctx, alertRule)
|
||||||
|
if err != nil &&
|
||||||
|
errors.Is(err, screenshot.ErrScreenshotsUnavailable) ||
|
||||||
|
errors.Is(err, image.ErrNoDashboard) ||
|
||||||
|
errors.Is(err, image.ErrNoPanel) {
|
||||||
|
// It's not an error if screenshots are disabled, or our rule isn't allowed to generate screenshots.
|
||||||
|
return nil
|
||||||
|
} else if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
state.Image = img
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -219,6 +234,14 @@ func (st *Manager) setNextState(ctx context.Context, alertRule *ngModels.AlertRu
|
|||||||
// to Alertmanager.
|
// to Alertmanager.
|
||||||
currentState.Resolved = oldState == eval.Alerting && currentState.State == eval.Normal
|
currentState.Resolved = oldState == eval.Alerting && currentState.State == eval.Normal
|
||||||
|
|
||||||
|
err := st.maybeTakeScreenshot(ctx, alertRule, currentState, oldState)
|
||||||
|
if err != nil {
|
||||||
|
st.log.Warn("Error generating a screenshot for an alert instance.",
|
||||||
|
"alert_rule", alertRule.UID,
|
||||||
|
"dashboard", alertRule.DashboardUID,
|
||||||
|
"panel", alertRule.PanelID)
|
||||||
|
}
|
||||||
|
|
||||||
st.set(currentState)
|
st.set(currentState)
|
||||||
if oldState != currentState.State {
|
if oldState != currentState.State {
|
||||||
go st.annotateState(ctx, alertRule, currentState.Labels, result.EvaluatedAt, currentState.State, oldState)
|
go st.annotateState(ctx, alertRule, currentState.Labels, result.EvaluatedAt, currentState.State, oldState)
|
||||||
|
|||||||
106
pkg/services/ngalert/state/manager_private_test.go
Normal file
106
pkg/services/ngalert/state/manager_private_test.go
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
package state
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"math/rand"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/grafana/grafana/pkg/infra/log"
|
||||||
|
"github.com/grafana/grafana/pkg/services/dashboards"
|
||||||
|
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
||||||
|
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
||||||
|
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||||
|
"github.com/grafana/grafana/pkg/services/ngalert/store"
|
||||||
|
"github.com/grafana/grafana/pkg/services/sqlstore/mockstore"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Not for parallel tests.
|
||||||
|
type CountingImageService struct {
|
||||||
|
Called int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *CountingImageService) NewImage(_ context.Context, _ *ngmodels.AlertRule) (*store.Image, error) {
|
||||||
|
c.Called += 1
|
||||||
|
return &store.Image{
|
||||||
|
Token: fmt.Sprint(rand.Int()),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_maybeNewImage(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
description string
|
||||||
|
shouldScreenshot bool
|
||||||
|
state *State
|
||||||
|
oldState eval.State
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
"Take a screenshot when we change to an alerting state",
|
||||||
|
true,
|
||||||
|
&State{
|
||||||
|
State: eval.Alerting,
|
||||||
|
Image: &store.Image{
|
||||||
|
Token: "erase me",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
eval.Normal,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Take a screenshot if we're already alerting with no image",
|
||||||
|
true,
|
||||||
|
&State{
|
||||||
|
State: eval.Alerting,
|
||||||
|
},
|
||||||
|
eval.Alerting,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Take a screenshot if we're resolved.",
|
||||||
|
true,
|
||||||
|
&State{
|
||||||
|
Resolved: true,
|
||||||
|
State: eval.Normal,
|
||||||
|
Image: &store.Image{
|
||||||
|
Token: "abcd",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
eval.Alerting,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Don't take a screenshot if we already have one.",
|
||||||
|
false,
|
||||||
|
&State{
|
||||||
|
State: eval.Alerting,
|
||||||
|
Image: &store.Image{
|
||||||
|
Token: "already set",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
eval.Alerting,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Don't take a screenshot if we're pending.",
|
||||||
|
false,
|
||||||
|
&State{
|
||||||
|
State: eval.Pending,
|
||||||
|
},
|
||||||
|
eval.Normal,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run(test.description, func(t *testing.T) {
|
||||||
|
imageService := &CountingImageService{}
|
||||||
|
mgr := NewManager(log.NewNopLogger(), &metrics.State{}, nil,
|
||||||
|
&store.FakeRuleStore{}, &store.FakeInstanceStore{}, mockstore.NewSQLStoreMock(),
|
||||||
|
&dashboards.FakeDashboardService{}, imageService)
|
||||||
|
err := mgr.maybeTakeScreenshot(context.Background(), &ngmodels.AlertRule{}, test.state, test.oldState)
|
||||||
|
require.NoError(t, err)
|
||||||
|
if !test.shouldScreenshot {
|
||||||
|
require.Equal(t, 0, imageService.Called)
|
||||||
|
} else {
|
||||||
|
require.Equal(t, 1, imageService.Called)
|
||||||
|
require.NotNil(t, test.state.Image)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1770,7 +1770,7 @@ func TestProcessEvalResults(t *testing.T) {
|
|||||||
|
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
ss := mockstore.NewSQLStoreMock()
|
ss := mockstore.NewSQLStoreMock()
|
||||||
st := state.NewManager(log.New("test_state_manager"), testMetrics.GetStateMetrics(), nil, nil, &store.FakeInstanceStore{}, ss, &dashboards.FakeDashboardService{}, &image.NoopImageService{})
|
st := state.NewManager(log.New("test_state_manager"), testMetrics.GetStateMetrics(), nil, nil, &store.FakeInstanceStore{}, ss, &dashboards.FakeDashboardService{}, &image.NotAvailableImageService{})
|
||||||
t.Run(tc.desc, func(t *testing.T) {
|
t.Run(tc.desc, func(t *testing.T) {
|
||||||
fakeAnnoRepo := store.NewFakeAnnotationsRepo()
|
fakeAnnoRepo := store.NewFakeAnnotationsRepo()
|
||||||
annotations.SetRepository(fakeAnnoRepo)
|
annotations.SetRepository(fakeAnnoRepo)
|
||||||
|
|||||||
@@ -109,17 +109,26 @@ func TestDeleteExpiredImages(t *testing.T) {
|
|||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Images are availabile
|
||||||
|
img, err := dbstore.GetImage(ctx, imgs[0].Token)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, img)
|
||||||
|
|
||||||
|
img, err = dbstore.GetImage(ctx, imgs[1].Token)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, img)
|
||||||
|
|
||||||
// Wait until timeout.
|
// Wait until timeout.
|
||||||
for i := 0; i < 120; i++ {
|
for i := 0; i < 120; i++ {
|
||||||
store.TimeNow()
|
store.TimeNow()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Call expired
|
// Call expired
|
||||||
err := dbstore.DeleteExpiredImages(ctx)
|
err = dbstore.DeleteExpiredImages(ctx)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
// All images are gone.
|
// All images are gone.
|
||||||
img, err := dbstore.GetImage(ctx, imgs[0].Token)
|
img, err = dbstore.GetImage(ctx, imgs[0].Token)
|
||||||
require.Nil(t, img)
|
require.Nil(t, img)
|
||||||
require.Error(t, err)
|
require.Error(t, err)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user