Alerting: State Manager takes screenshots. (#49338)

The State Manager will now take screenshots when an alert instance
switches to an Alerting or Resolved state.

Signed-off-by: Joe Blubaugh joe.blubaugh@grafana.com
This commit is contained in:
Joe Blubaugh 2022-05-23 10:53:41 +08:00 committed by GitHub
parent 687e79538b
commit 1d724810de
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 190 additions and 17 deletions

View File

@ -33,7 +33,7 @@ var (
const (
screenshotTimeout = 10 * time.Second
screenshotCacheTTL = 15 * time.Second
screenshotCacheTTL = 60 * time.Second
)
// ScreenshotImageService takes screenshots of the panel for an alert rule and
@ -111,6 +111,12 @@ func (s *ScreenshotImageService) NewImage(ctx context.Context, r *ngmodels.Alert
return &v, nil
}
type NotAvailableImageService struct{}
func (s *NotAvailableImageService) NewImage(ctx context.Context, r *ngmodels.AlertRule) (*store.Image, error) {
return nil, screenshot.ErrScreenshotsUnavailable
}
type NoopImageService struct{}
func (s *NoopImageService) NewImage(ctx context.Context, r *ngmodels.AlertRule) (*store.Image, error) {

View File

@ -81,6 +81,10 @@ const (
// Annotations are actually a set of labels, so technically this is the label name of an annotation.
DashboardUIDAnnotation = "__dashboardUid__"
PanelIDAnnotation = "__panelId__"
// This isn't a hard-coded secret token, hence the nolint.
//nolint:gosec
ScreenshotTokenAnnotation = "__alertScreenshotToken__"
)
var (
@ -89,7 +93,11 @@ var (
RuleUIDLabel: {},
NamespaceUIDLabel: {},
}
InternalAnnotationNameSet = map[string]struct{}{}
InternalAnnotationNameSet = map[string]struct{}{
DashboardUIDAnnotation: {},
PanelIDAnnotation: {},
ScreenshotTokenAnnotation: {},
}
)
// AlertRule is the model for alert rules in unified alerting.

View File

@ -39,6 +39,10 @@ func stateToPostableAlert(alertState *state.State, appURL *url.URL) *models.Post
nA["__value_string__"] = alertState.LastEvaluationString
}
if alertState.Image != nil {
nA[ngModels.ScreenshotTokenAnnotation] = alertState.Image.Token
}
var urlStr string
if uid := nL[ngModels.RuleUIDLabel]; len(uid) > 0 && appURL != nil {
u := *appURL

View File

@ -16,6 +16,7 @@ import (
"github.com/grafana/grafana/pkg/services/ngalert/eval"
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/state"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/util"
)
@ -117,6 +118,22 @@ func Test_stateToPostableAlert(t *testing.T) {
result = stateToPostableAlert(alertState, appURL)
require.Equal(t, expected, result.Annotations)
})
t.Run("add __alertScreenshotToken__ if there is an image token", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.Annotations = randomMapOfStrings()
alertState.Image = &store.Image{Token: "test_token"}
result := stateToPostableAlert(alertState, appURL)
expected := make(models.LabelSet, len(alertState.Annotations)+1)
for k, v := range alertState.Annotations {
expected[k] = v
}
expected["__alertScreenshotToken__"] = alertState.Image.Token
require.Equal(t, expected, result.Annotations)
})
})
switch tc.state {

View File

@ -170,19 +170,34 @@ func (st *Manager) ProcessEvalResults(ctx context.Context, alertRule *ngModels.A
return states
}
//nolint:unused
func (st *Manager) newImage(ctx context.Context, alertRule *ngModels.AlertRule, state *State) error {
if state.Image == nil {
image, err := st.imageService.NewImage(ctx, alertRule)
if errors.Is(err, screenshot.ErrScreenshotsUnavailable) {
// It's not an error if screenshots are disabled.
return nil
} else if err != nil {
st.log.Error("failed to create image", "error", err)
return err
}
state.Image = image
// Maybe take a screenshot. Do it if:
// 1. The alert state is transitioning into the "Alerting" state from something else.
// 2. The alert state has just transitioned to the resolved state.
// 3. The state is alerting and there is no screenshot annotation on the alert state.
func (st *Manager) maybeTakeScreenshot(
ctx context.Context,
alertRule *ngModels.AlertRule,
state *State,
oldState eval.State,
) error {
shouldScreenshot := state.Resolved ||
state.State == eval.Alerting && oldState != eval.Alerting ||
state.State == eval.Alerting && state.Image == nil
if !shouldScreenshot {
return nil
}
img, err := st.imageService.NewImage(ctx, alertRule)
if err != nil &&
errors.Is(err, screenshot.ErrScreenshotsUnavailable) ||
errors.Is(err, image.ErrNoDashboard) ||
errors.Is(err, image.ErrNoPanel) {
// It's not an error if screenshots are disabled, or our rule isn't allowed to generate screenshots.
return nil
} else if err != nil {
return err
}
state.Image = img
return nil
}
@ -219,6 +234,14 @@ func (st *Manager) setNextState(ctx context.Context, alertRule *ngModels.AlertRu
// to Alertmanager.
currentState.Resolved = oldState == eval.Alerting && currentState.State == eval.Normal
err := st.maybeTakeScreenshot(ctx, alertRule, currentState, oldState)
if err != nil {
st.log.Warn("Error generating a screenshot for an alert instance.",
"alert_rule", alertRule.UID,
"dashboard", alertRule.DashboardUID,
"panel", alertRule.PanelID)
}
st.set(currentState)
if oldState != currentState.State {
go st.annotateState(ctx, alertRule, currentState.Labels, result.EvaluatedAt, currentState.State, oldState)

View File

@ -0,0 +1,106 @@
package state
import (
"context"
"fmt"
"math/rand"
"testing"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/dashboards"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/services/sqlstore/mockstore"
"github.com/stretchr/testify/require"
)
// Not for parallel tests.
type CountingImageService struct {
Called int
}
func (c *CountingImageService) NewImage(_ context.Context, _ *ngmodels.AlertRule) (*store.Image, error) {
c.Called += 1
return &store.Image{
Token: fmt.Sprint(rand.Int()),
}, nil
}
func Test_maybeNewImage(t *testing.T) {
tests := []struct {
description string
shouldScreenshot bool
state *State
oldState eval.State
}{
{
"Take a screenshot when we change to an alerting state",
true,
&State{
State: eval.Alerting,
Image: &store.Image{
Token: "erase me",
},
},
eval.Normal,
},
{
"Take a screenshot if we're already alerting with no image",
true,
&State{
State: eval.Alerting,
},
eval.Alerting,
},
{
"Take a screenshot if we're resolved.",
true,
&State{
Resolved: true,
State: eval.Normal,
Image: &store.Image{
Token: "abcd",
},
},
eval.Alerting,
},
{
"Don't take a screenshot if we already have one.",
false,
&State{
State: eval.Alerting,
Image: &store.Image{
Token: "already set",
},
},
eval.Alerting,
},
{
"Don't take a screenshot if we're pending.",
false,
&State{
State: eval.Pending,
},
eval.Normal,
},
}
for _, test := range tests {
t.Run(test.description, func(t *testing.T) {
imageService := &CountingImageService{}
mgr := NewManager(log.NewNopLogger(), &metrics.State{}, nil,
&store.FakeRuleStore{}, &store.FakeInstanceStore{}, mockstore.NewSQLStoreMock(),
&dashboards.FakeDashboardService{}, imageService)
err := mgr.maybeTakeScreenshot(context.Background(), &ngmodels.AlertRule{}, test.state, test.oldState)
require.NoError(t, err)
if !test.shouldScreenshot {
require.Equal(t, 0, imageService.Called)
} else {
require.Equal(t, 1, imageService.Called)
require.NotNil(t, test.state.Image)
}
})
}
}

View File

@ -1770,7 +1770,7 @@ func TestProcessEvalResults(t *testing.T) {
for _, tc := range testCases {
ss := mockstore.NewSQLStoreMock()
st := state.NewManager(log.New("test_state_manager"), testMetrics.GetStateMetrics(), nil, nil, &store.FakeInstanceStore{}, ss, &dashboards.FakeDashboardService{}, &image.NoopImageService{})
st := state.NewManager(log.New("test_state_manager"), testMetrics.GetStateMetrics(), nil, nil, &store.FakeInstanceStore{}, ss, &dashboards.FakeDashboardService{}, &image.NotAvailableImageService{})
t.Run(tc.desc, func(t *testing.T) {
fakeAnnoRepo := store.NewFakeAnnotationsRepo()
annotations.SetRepository(fakeAnnoRepo)

View File

@ -109,17 +109,26 @@ func TestDeleteExpiredImages(t *testing.T) {
require.NoError(t, err)
}
// Images are availabile
img, err := dbstore.GetImage(ctx, imgs[0].Token)
require.NoError(t, err)
require.NotNil(t, img)
img, err = dbstore.GetImage(ctx, imgs[1].Token)
require.NoError(t, err)
require.NotNil(t, img)
// Wait until timeout.
for i := 0; i < 120; i++ {
store.TimeNow()
}
// Call expired
err := dbstore.DeleteExpiredImages(ctx)
err = dbstore.DeleteExpiredImages(ctx)
require.NoError(t, err)
// All images are gone.
img, err := dbstore.GetImage(ctx, imgs[0].Token)
img, err = dbstore.GetImage(ctx, imgs[0].Token)
require.Nil(t, img)
require.Error(t, err)