Alerting: Fix screenshot is not taken for stale series (#57982)

This commit is contained in:
George Robinson 2022-11-02 22:14:22 +00:00 committed by GitHub
parent f1b6660b54
commit 215ffee437
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 232 additions and 124 deletions

View File

@ -0,0 +1,51 @@
// Code generated by MockGen. DO NOT EDIT.
// Source: github.com/grafana/grafana/pkg/services/ngalert/image (interfaces: ImageService)
// Package image is a generated GoMock package.
package image
import (
context "context"
reflect "reflect"
gomock "github.com/golang/mock/gomock"
models "github.com/grafana/grafana/pkg/services/ngalert/models"
)
// MockImageService is a mock of ImageService interface.
type MockImageService struct {
ctrl *gomock.Controller
recorder *MockImageServiceMockRecorder
}
// MockImageServiceMockRecorder is the mock recorder for MockImageService.
type MockImageServiceMockRecorder struct {
mock *MockImageService
}
// NewMockImageService creates a new mock instance.
func NewMockImageService(ctrl *gomock.Controller) *MockImageService {
mock := &MockImageService{ctrl: ctrl}
mock.recorder = &MockImageServiceMockRecorder{mock}
return mock
}
// EXPECT returns an object that allows the caller to indicate expected use.
func (m *MockImageService) EXPECT() *MockImageServiceMockRecorder {
return m.recorder
}
// NewImage mocks base method.
func (m *MockImageService) NewImage(arg0 context.Context, arg1 *models.AlertRule) (*models.Image, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "NewImage", arg0, arg1)
ret0, _ := ret[0].(*models.Image)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// NewImage indicates an expected call of NewImage.
func (mr *MockImageServiceMockRecorder) NewImage(arg0, arg1 interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "NewImage", reflect.TypeOf((*MockImageService)(nil).NewImage), arg0, arg1)
}

View File

@ -2,7 +2,6 @@ package state
import (
"context"
"errors"
"fmt"
"net/url"
"time"
@ -15,8 +14,6 @@ import (
"github.com/grafana/grafana/pkg/services/ngalert/image"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/screenshot"
)
var ResendDelay = 30 * time.Second
@ -187,37 +184,6 @@ func (st *Manager) ProcessEvalResults(ctx context.Context, evaluatedAt time.Time
return append(states, resolvedStates...)
}
// Maybe take a screenshot. Do it if:
// 1. The alert state is transitioning into the "Alerting" state from something else.
// 2. The alert state has just transitioned to the resolved state.
// 3. The state is alerting and there is no screenshot annotation on the alert state.
func (st *Manager) maybeTakeScreenshot(
ctx context.Context,
alertRule *ngModels.AlertRule,
state *State,
oldState eval.State,
) error {
shouldScreenshot := state.Resolved ||
state.State == eval.Alerting && oldState != eval.Alerting ||
state.State == eval.Alerting && state.Image == nil
if !shouldScreenshot {
return nil
}
img, err := st.imageService.NewImage(ctx, alertRule)
if err != nil &&
errors.Is(err, screenshot.ErrScreenshotsUnavailable) ||
errors.Is(err, image.ErrNoDashboard) ||
errors.Is(err, image.ErrNoPanel) {
// It's not an error if screenshots are disabled, or our rule isn't allowed to generate screenshots.
return nil
} else if err != nil {
return err
}
state.Image = img
return nil
}
// Set the current state based on evaluation results
func (st *Manager) setNextState(ctx context.Context, alertRule *ngModels.AlertRule, result eval.Result, extraLabels data.Labels, logger log.Logger) *State {
currentState := st.cache.getOrCreate(ctx, st.log, alertRule, result, extraLabels, st.externalURL)
@ -261,12 +227,16 @@ func (st *Manager) setNextState(ctx context.Context, alertRule *ngModels.AlertRu
// to Alertmanager.
currentState.Resolved = oldState == eval.Alerting && currentState.State == eval.Normal
err := st.maybeTakeScreenshot(ctx, alertRule, currentState, oldState)
if err != nil {
logger.Warn("Failed to generate a screenshot for an alert instance",
"dashboard", alertRule.DashboardUID,
"panel", alertRule.PanelID,
"error", err)
if shouldTakeImage(currentState.State, oldState, currentState.Image, currentState.Resolved) {
image, err := takeImage(ctx, st.imageService, alertRule)
if err != nil {
logger.Warn("Failed to take an image",
"dashboard", alertRule.DashboardUID,
"panel", alertRule.PanelID,
"error", err)
} else if image != nil {
currentState.Image = image
}
}
st.cache.set(currentState)
@ -389,6 +359,10 @@ func (i InstanceStateAndReason) String() string {
}
func (st *Manager) staleResultsHandler(ctx context.Context, evaluatedAt time.Time, alertRule *ngModels.AlertRule, states map[string]*State, logger log.Logger) []*State {
// If we are removing two or more stale series it makes sense to share the resolved image as the alert rule is the same.
// TODO: We will need to change this when we support images without screenshots as each series will have a different image
var resolvedImage *ngModels.Image
var resolvedStates []*State
allStates := st.GetStatesForRuleUID(alertRule.OrgID, alertRule.UID)
toDelete := make([]ngModels.AlertInstanceKey, 0)
@ -418,6 +392,20 @@ func (st *Manager) staleResultsHandler(ctx context.Context, evaluatedAt time.Tim
previousState,
)
}
// If there is no resolved image for this rule then take one
if resolvedImage == nil {
image, err := takeImage(ctx, st.imageService, alertRule)
if err != nil {
logger.Warn("Failed to take an image",
"dashboard", alertRule.DashboardUID,
"panel", alertRule.PanelID,
"error", err)
} else if image != nil {
resolvedImage = image
}
}
s.Image = resolvedImage
resolvedStates = append(resolvedStates, s)
}
}

View File

@ -9,10 +9,6 @@ import (
"github.com/stretchr/testify/require"
"github.com/benbjohnson/clock"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
)
@ -28,83 +24,6 @@ func (c *CountingImageService) NewImage(_ context.Context, _ *ngmodels.AlertRule
}, nil
}
func Test_maybeNewImage(t *testing.T) {
tests := []struct {
description string
shouldScreenshot bool
state *State
oldState eval.State
}{
{
"Take a screenshot when we change to an alerting state",
true,
&State{
State: eval.Alerting,
Image: &ngmodels.Image{
Token: "erase me",
},
},
eval.Normal,
},
{
"Take a screenshot if we're already alerting with no image",
true,
&State{
State: eval.Alerting,
},
eval.Alerting,
},
{
"Take a screenshot if we're resolved.",
true,
&State{
Resolved: true,
State: eval.Normal,
Image: &ngmodels.Image{
Token: "abcd",
},
},
eval.Alerting,
},
{
"Don't take a screenshot if we already have one.",
false,
&State{
State: eval.Alerting,
Image: &ngmodels.Image{
Token: "already set",
},
},
eval.Alerting,
},
{
"Don't take a screenshot if we're pending.",
false,
&State{
State: eval.Pending,
},
eval.Normal,
},
}
for _, test := range tests {
t.Run(test.description, func(t *testing.T) {
imageService := &CountingImageService{}
mgr := NewManager(&metrics.State{}, nil,
&FakeRuleReader{}, &FakeInstanceStore{},
imageService, clock.NewMock(), &FakeHistorian{})
err := mgr.maybeTakeScreenshot(context.Background(), &ngmodels.AlertRule{}, test.state, test.oldState)
require.NoError(t, err)
if !test.shouldScreenshot {
require.Equal(t, 0, imageService.Called)
} else {
require.Equal(t, 1, imageService.Called)
require.NotNil(t, test.state.Image)
}
})
}
}
func TestStateIsStale(t *testing.T) {
now := time.Now()
intervalSeconds := rand.Int63n(10) + 5

View File

@ -1,6 +1,7 @@
package state
import (
"context"
"errors"
"fmt"
"math"
@ -11,7 +12,9 @@ import (
"github.com/grafana/grafana/pkg/expr"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/image"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/screenshot"
)
type State struct {
@ -284,3 +287,27 @@ func (a *State) GetLastEvaluationValuesForCondition() map[string]float64 {
return r
}
// shouldTakeImage returns true if the state just has transitioned to alerting from another state,
// transitioned to alerting in a previous evaluation but does not have a screenshot, or has just
// been resolved.
func shouldTakeImage(state, previousState eval.State, previousImage *models.Image, resolved bool) bool {
return resolved ||
state == eval.Alerting && previousState != eval.Alerting ||
state == eval.Alerting && previousImage == nil
}
// takeImage takes an image for the alert rule. It returns nil if screenshots are disabled or
// the rule is not associated with a dashboard panel.
func takeImage(ctx context.Context, s image.ImageService, r *models.AlertRule) (*models.Image, error) {
img, err := s.NewImage(ctx, r)
if err != nil {
if errors.Is(err, screenshot.ErrScreenshotsUnavailable) ||
errors.Is(err, image.ErrNoDashboard) ||
errors.Is(err, image.ErrNoPanel) {
return nil, nil
}
return nil, err
}
return img, nil
}

View File

@ -1,17 +1,21 @@
package state
import (
"context"
"errors"
"math"
"math/rand"
"testing"
"time"
"github.com/golang/mock/gomock"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/image"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/screenshot"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
ptr "github.com/xorcare/pointer"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
)
func TestNeedsSending(t *testing.T) {
@ -293,3 +297,122 @@ func TestGetLastEvaluationValuesForCondition(t *testing.T) {
require.Truef(t, math.IsNaN(result["A"]), "expected NaN but got %v", result["A"])
})
}
func TestShouldTakeImage(t *testing.T) {
tests := []struct {
name string
state eval.State
previousState eval.State
previousImage *ngmodels.Image
resolved bool
expected bool
}{{
name: "should take image for state that just transitioned to alerting",
state: eval.Alerting,
previousState: eval.Pending,
expected: true,
}, {
name: "should take image for alerting state without image",
state: eval.Alerting,
previousState: eval.Alerting,
expected: true,
}, {
name: "should take image for resolved state",
state: eval.Normal,
previousState: eval.Alerting,
resolved: true,
expected: true,
}, {
name: "should not take image for normal state",
state: eval.Normal,
previousState: eval.Normal,
}, {
name: "should not take image for pending state",
state: eval.Pending,
previousState: eval.Normal,
}, {
name: "should not take image for alerting state with image",
state: eval.Alerting,
previousState: eval.Alerting,
previousImage: &ngmodels.Image{Path: "foo.png", URL: "https://example.com/foo.png"},
}}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
assert.Equal(t, test.expected, shouldTakeImage(test.state, test.previousState, test.previousImage, test.resolved))
})
}
}
func TestTakeImage(t *testing.T) {
t.Run("ErrNoDashboard should return nil", func(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
ctx := context.Background()
r := ngmodels.AlertRule{}
s := image.NewMockImageService(ctrl)
s.EXPECT().NewImage(ctx, &r).Return(nil, image.ErrNoDashboard)
image, err := takeImage(ctx, s, &r)
assert.NoError(t, err)
assert.Nil(t, image)
})
t.Run("ErrNoPanel should return nil", func(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
ctx := context.Background()
r := ngmodels.AlertRule{DashboardUID: ptr.String("foo")}
s := image.NewMockImageService(ctrl)
s.EXPECT().NewImage(ctx, &r).Return(nil, image.ErrNoPanel)
image, err := takeImage(ctx, s, &r)
assert.NoError(t, err)
assert.Nil(t, image)
})
t.Run("ErrScreenshotsUnavailable should return nil", func(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
ctx := context.Background()
r := ngmodels.AlertRule{DashboardUID: ptr.String("foo"), PanelID: ptr.Int64(1)}
s := image.NewMockImageService(ctrl)
s.EXPECT().NewImage(ctx, &r).Return(nil, screenshot.ErrScreenshotsUnavailable)
image, err := takeImage(ctx, s, &r)
assert.NoError(t, err)
assert.Nil(t, image)
})
t.Run("other errors should be returned", func(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
ctx := context.Background()
r := ngmodels.AlertRule{DashboardUID: ptr.String("foo"), PanelID: ptr.Int64(1)}
s := image.NewMockImageService(ctrl)
s.EXPECT().NewImage(ctx, &r).Return(nil, errors.New("unknown error"))
image, err := takeImage(ctx, s, &r)
assert.EqualError(t, err, "unknown error")
assert.Nil(t, image)
})
t.Run("image should be returned", func(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
ctx := context.Background()
r := ngmodels.AlertRule{DashboardUID: ptr.String("foo"), PanelID: ptr.Int64(1)}
s := image.NewMockImageService(ctrl)
s.EXPECT().NewImage(ctx, &r).Return(&ngmodels.Image{Path: "foo.png"}, nil)
image, err := takeImage(ctx, s, &r)
assert.NoError(t, err)
require.NotNil(t, image)
assert.Equal(t, ngmodels.Image{Path: "foo.png"}, *image)
})
}