mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: State Manager takes screenshots. (#49338)
The State Manager will now take screenshots when an alert instance switches to an Alerting or Resolved state. Signed-off-by: Joe Blubaugh joe.blubaugh@grafana.com
This commit is contained in:
parent
687e79538b
commit
1d724810de
@ -33,7 +33,7 @@ var (
|
||||
|
||||
const (
|
||||
screenshotTimeout = 10 * time.Second
|
||||
screenshotCacheTTL = 15 * time.Second
|
||||
screenshotCacheTTL = 60 * time.Second
|
||||
)
|
||||
|
||||
// ScreenshotImageService takes screenshots of the panel for an alert rule and
|
||||
@ -111,6 +111,12 @@ func (s *ScreenshotImageService) NewImage(ctx context.Context, r *ngmodels.Alert
|
||||
return &v, nil
|
||||
}
|
||||
|
||||
type NotAvailableImageService struct{}
|
||||
|
||||
func (s *NotAvailableImageService) NewImage(ctx context.Context, r *ngmodels.AlertRule) (*store.Image, error) {
|
||||
return nil, screenshot.ErrScreenshotsUnavailable
|
||||
}
|
||||
|
||||
type NoopImageService struct{}
|
||||
|
||||
func (s *NoopImageService) NewImage(ctx context.Context, r *ngmodels.AlertRule) (*store.Image, error) {
|
||||
|
@ -81,6 +81,10 @@ const (
|
||||
// Annotations are actually a set of labels, so technically this is the label name of an annotation.
|
||||
DashboardUIDAnnotation = "__dashboardUid__"
|
||||
PanelIDAnnotation = "__panelId__"
|
||||
|
||||
// This isn't a hard-coded secret token, hence the nolint.
|
||||
//nolint:gosec
|
||||
ScreenshotTokenAnnotation = "__alertScreenshotToken__"
|
||||
)
|
||||
|
||||
var (
|
||||
@ -89,7 +93,11 @@ var (
|
||||
RuleUIDLabel: {},
|
||||
NamespaceUIDLabel: {},
|
||||
}
|
||||
InternalAnnotationNameSet = map[string]struct{}{}
|
||||
InternalAnnotationNameSet = map[string]struct{}{
|
||||
DashboardUIDAnnotation: {},
|
||||
PanelIDAnnotation: {},
|
||||
ScreenshotTokenAnnotation: {},
|
||||
}
|
||||
)
|
||||
|
||||
// AlertRule is the model for alert rules in unified alerting.
|
||||
|
@ -39,6 +39,10 @@ func stateToPostableAlert(alertState *state.State, appURL *url.URL) *models.Post
|
||||
nA["__value_string__"] = alertState.LastEvaluationString
|
||||
}
|
||||
|
||||
if alertState.Image != nil {
|
||||
nA[ngModels.ScreenshotTokenAnnotation] = alertState.Image.Token
|
||||
}
|
||||
|
||||
var urlStr string
|
||||
if uid := nL[ngModels.RuleUIDLabel]; len(uid) > 0 && appURL != nil {
|
||||
u := *appURL
|
||||
|
@ -16,6 +16,7 @@ import (
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
||||
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/state"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/store"
|
||||
"github.com/grafana/grafana/pkg/util"
|
||||
)
|
||||
|
||||
@ -117,6 +118,22 @@ func Test_stateToPostableAlert(t *testing.T) {
|
||||
result = stateToPostableAlert(alertState, appURL)
|
||||
require.Equal(t, expected, result.Annotations)
|
||||
})
|
||||
|
||||
t.Run("add __alertScreenshotToken__ if there is an image token", func(t *testing.T) {
|
||||
alertState := randomState(tc.state)
|
||||
alertState.Annotations = randomMapOfStrings()
|
||||
alertState.Image = &store.Image{Token: "test_token"}
|
||||
|
||||
result := stateToPostableAlert(alertState, appURL)
|
||||
|
||||
expected := make(models.LabelSet, len(alertState.Annotations)+1)
|
||||
for k, v := range alertState.Annotations {
|
||||
expected[k] = v
|
||||
}
|
||||
expected["__alertScreenshotToken__"] = alertState.Image.Token
|
||||
|
||||
require.Equal(t, expected, result.Annotations)
|
||||
})
|
||||
})
|
||||
|
||||
switch tc.state {
|
||||
|
@ -170,19 +170,34 @@ func (st *Manager) ProcessEvalResults(ctx context.Context, alertRule *ngModels.A
|
||||
return states
|
||||
}
|
||||
|
||||
//nolint:unused
|
||||
func (st *Manager) newImage(ctx context.Context, alertRule *ngModels.AlertRule, state *State) error {
|
||||
if state.Image == nil {
|
||||
image, err := st.imageService.NewImage(ctx, alertRule)
|
||||
if errors.Is(err, screenshot.ErrScreenshotsUnavailable) {
|
||||
// It's not an error if screenshots are disabled.
|
||||
return nil
|
||||
} else if err != nil {
|
||||
st.log.Error("failed to create image", "error", err)
|
||||
return err
|
||||
}
|
||||
state.Image = image
|
||||
// Maybe take a screenshot. Do it if:
|
||||
// 1. The alert state is transitioning into the "Alerting" state from something else.
|
||||
// 2. The alert state has just transitioned to the resolved state.
|
||||
// 3. The state is alerting and there is no screenshot annotation on the alert state.
|
||||
func (st *Manager) maybeTakeScreenshot(
|
||||
ctx context.Context,
|
||||
alertRule *ngModels.AlertRule,
|
||||
state *State,
|
||||
oldState eval.State,
|
||||
) error {
|
||||
shouldScreenshot := state.Resolved ||
|
||||
state.State == eval.Alerting && oldState != eval.Alerting ||
|
||||
state.State == eval.Alerting && state.Image == nil
|
||||
if !shouldScreenshot {
|
||||
return nil
|
||||
}
|
||||
|
||||
img, err := st.imageService.NewImage(ctx, alertRule)
|
||||
if err != nil &&
|
||||
errors.Is(err, screenshot.ErrScreenshotsUnavailable) ||
|
||||
errors.Is(err, image.ErrNoDashboard) ||
|
||||
errors.Is(err, image.ErrNoPanel) {
|
||||
// It's not an error if screenshots are disabled, or our rule isn't allowed to generate screenshots.
|
||||
return nil
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
state.Image = img
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -219,6 +234,14 @@ func (st *Manager) setNextState(ctx context.Context, alertRule *ngModels.AlertRu
|
||||
// to Alertmanager.
|
||||
currentState.Resolved = oldState == eval.Alerting && currentState.State == eval.Normal
|
||||
|
||||
err := st.maybeTakeScreenshot(ctx, alertRule, currentState, oldState)
|
||||
if err != nil {
|
||||
st.log.Warn("Error generating a screenshot for an alert instance.",
|
||||
"alert_rule", alertRule.UID,
|
||||
"dashboard", alertRule.DashboardUID,
|
||||
"panel", alertRule.PanelID)
|
||||
}
|
||||
|
||||
st.set(currentState)
|
||||
if oldState != currentState.State {
|
||||
go st.annotateState(ctx, alertRule, currentState.Labels, result.EvaluatedAt, currentState.State, oldState)
|
||||
|
106
pkg/services/ngalert/state/manager_private_test.go
Normal file
106
pkg/services/ngalert/state/manager_private_test.go
Normal file
@ -0,0 +1,106 @@
|
||||
package state
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"testing"
|
||||
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
"github.com/grafana/grafana/pkg/services/dashboards"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
||||
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/store"
|
||||
"github.com/grafana/grafana/pkg/services/sqlstore/mockstore"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// Not for parallel tests.
|
||||
type CountingImageService struct {
|
||||
Called int
|
||||
}
|
||||
|
||||
func (c *CountingImageService) NewImage(_ context.Context, _ *ngmodels.AlertRule) (*store.Image, error) {
|
||||
c.Called += 1
|
||||
return &store.Image{
|
||||
Token: fmt.Sprint(rand.Int()),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func Test_maybeNewImage(t *testing.T) {
|
||||
tests := []struct {
|
||||
description string
|
||||
shouldScreenshot bool
|
||||
state *State
|
||||
oldState eval.State
|
||||
}{
|
||||
{
|
||||
"Take a screenshot when we change to an alerting state",
|
||||
true,
|
||||
&State{
|
||||
State: eval.Alerting,
|
||||
Image: &store.Image{
|
||||
Token: "erase me",
|
||||
},
|
||||
},
|
||||
eval.Normal,
|
||||
},
|
||||
{
|
||||
"Take a screenshot if we're already alerting with no image",
|
||||
true,
|
||||
&State{
|
||||
State: eval.Alerting,
|
||||
},
|
||||
eval.Alerting,
|
||||
},
|
||||
{
|
||||
"Take a screenshot if we're resolved.",
|
||||
true,
|
||||
&State{
|
||||
Resolved: true,
|
||||
State: eval.Normal,
|
||||
Image: &store.Image{
|
||||
Token: "abcd",
|
||||
},
|
||||
},
|
||||
eval.Alerting,
|
||||
},
|
||||
{
|
||||
"Don't take a screenshot if we already have one.",
|
||||
false,
|
||||
&State{
|
||||
State: eval.Alerting,
|
||||
Image: &store.Image{
|
||||
Token: "already set",
|
||||
},
|
||||
},
|
||||
eval.Alerting,
|
||||
},
|
||||
{
|
||||
"Don't take a screenshot if we're pending.",
|
||||
false,
|
||||
&State{
|
||||
State: eval.Pending,
|
||||
},
|
||||
eval.Normal,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.description, func(t *testing.T) {
|
||||
imageService := &CountingImageService{}
|
||||
mgr := NewManager(log.NewNopLogger(), &metrics.State{}, nil,
|
||||
&store.FakeRuleStore{}, &store.FakeInstanceStore{}, mockstore.NewSQLStoreMock(),
|
||||
&dashboards.FakeDashboardService{}, imageService)
|
||||
err := mgr.maybeTakeScreenshot(context.Background(), &ngmodels.AlertRule{}, test.state, test.oldState)
|
||||
require.NoError(t, err)
|
||||
if !test.shouldScreenshot {
|
||||
require.Equal(t, 0, imageService.Called)
|
||||
} else {
|
||||
require.Equal(t, 1, imageService.Called)
|
||||
require.NotNil(t, test.state.Image)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
@ -1770,7 +1770,7 @@ func TestProcessEvalResults(t *testing.T) {
|
||||
|
||||
for _, tc := range testCases {
|
||||
ss := mockstore.NewSQLStoreMock()
|
||||
st := state.NewManager(log.New("test_state_manager"), testMetrics.GetStateMetrics(), nil, nil, &store.FakeInstanceStore{}, ss, &dashboards.FakeDashboardService{}, &image.NoopImageService{})
|
||||
st := state.NewManager(log.New("test_state_manager"), testMetrics.GetStateMetrics(), nil, nil, &store.FakeInstanceStore{}, ss, &dashboards.FakeDashboardService{}, &image.NotAvailableImageService{})
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
fakeAnnoRepo := store.NewFakeAnnotationsRepo()
|
||||
annotations.SetRepository(fakeAnnoRepo)
|
||||
|
@ -109,17 +109,26 @@ func TestDeleteExpiredImages(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
// Images are availabile
|
||||
img, err := dbstore.GetImage(ctx, imgs[0].Token)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, img)
|
||||
|
||||
img, err = dbstore.GetImage(ctx, imgs[1].Token)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, img)
|
||||
|
||||
// Wait until timeout.
|
||||
for i := 0; i < 120; i++ {
|
||||
store.TimeNow()
|
||||
}
|
||||
|
||||
// Call expired
|
||||
err := dbstore.DeleteExpiredImages(ctx)
|
||||
err = dbstore.DeleteExpiredImages(ctx)
|
||||
require.NoError(t, err)
|
||||
|
||||
// All images are gone.
|
||||
img, err := dbstore.GetImage(ctx, imgs[0].Token)
|
||||
img, err = dbstore.GetImage(ctx, imgs[0].Token)
|
||||
require.Nil(t, img)
|
||||
require.Error(t, err)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user