mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Prior to this change, all alert instance writes and deletes happened individually, in their own database transaction. This change batches up writes or deletes for a given rule's evaluation loop into a single transaction before applying it. These new transactions are off by default, guarded by the feature toggle "alertingBigTransactions" Before: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 398 2991381 ns/op 1133537 B/op 27703 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: FovKXiRVzm} with title: "an alert definition FTvFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: foDFXmRVkm} with title: "an alert definition fovFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: VQvFuigVkm} with title: "an alert definition VwDKXmR4kz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.619s ``` After: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 1440 816484 ns/op 352297 B/op 6529 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: 302r_igVzm} with title: "an alert definition q0h9lmR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: 71hrlmR4km} with title: "an alert definition nJ29_mR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: Cahr_mR4zm} with title: "an alert definition ja2rlmg4zz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.383s ``` So we cut time by about 75% and memory allocations by about 60% when storing and deleting 100 instances.
150 lines
3.5 KiB
Go
150 lines
3.5 KiB
Go
package state
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"math/rand"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/benbjohnson/clock"
|
|
|
|
"github.com/grafana/grafana/pkg/infra/log"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
|
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
|
|
)
|
|
|
|
// Not for parallel tests.
|
|
type CountingImageService struct {
|
|
Called int
|
|
}
|
|
|
|
func (c *CountingImageService) NewImage(_ context.Context, _ *ngmodels.AlertRule) (*ngmodels.Image, error) {
|
|
c.Called += 1
|
|
return &ngmodels.Image{
|
|
Token: fmt.Sprint(rand.Int()),
|
|
}, nil
|
|
}
|
|
|
|
func Test_maybeNewImage(t *testing.T) {
|
|
tests := []struct {
|
|
description string
|
|
shouldScreenshot bool
|
|
state *State
|
|
oldState eval.State
|
|
}{
|
|
{
|
|
"Take a screenshot when we change to an alerting state",
|
|
true,
|
|
&State{
|
|
State: eval.Alerting,
|
|
Image: &ngmodels.Image{
|
|
Token: "erase me",
|
|
},
|
|
},
|
|
eval.Normal,
|
|
},
|
|
{
|
|
"Take a screenshot if we're already alerting with no image",
|
|
true,
|
|
&State{
|
|
State: eval.Alerting,
|
|
},
|
|
eval.Alerting,
|
|
},
|
|
{
|
|
"Take a screenshot if we're resolved.",
|
|
true,
|
|
&State{
|
|
Resolved: true,
|
|
State: eval.Normal,
|
|
Image: &ngmodels.Image{
|
|
Token: "abcd",
|
|
},
|
|
},
|
|
eval.Alerting,
|
|
},
|
|
{
|
|
"Don't take a screenshot if we already have one.",
|
|
false,
|
|
&State{
|
|
State: eval.Alerting,
|
|
Image: &ngmodels.Image{
|
|
Token: "already set",
|
|
},
|
|
},
|
|
eval.Alerting,
|
|
},
|
|
{
|
|
"Don't take a screenshot if we're pending.",
|
|
false,
|
|
&State{
|
|
State: eval.Pending,
|
|
},
|
|
eval.Normal,
|
|
},
|
|
}
|
|
|
|
for _, test := range tests {
|
|
t.Run(test.description, func(t *testing.T) {
|
|
imageService := &CountingImageService{}
|
|
mgr := NewManager(log.NewNopLogger(), &metrics.State{}, nil,
|
|
&FakeRuleReader{}, &FakeInstanceStore{},
|
|
imageService, clock.NewMock(), &FakeHistorian{})
|
|
err := mgr.maybeTakeScreenshot(context.Background(), &ngmodels.AlertRule{}, test.state, test.oldState)
|
|
require.NoError(t, err)
|
|
if !test.shouldScreenshot {
|
|
require.Equal(t, 0, imageService.Called)
|
|
} else {
|
|
require.Equal(t, 1, imageService.Called)
|
|
require.NotNil(t, test.state.Image)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestStateIsStale(t *testing.T) {
|
|
now := time.Now()
|
|
intervalSeconds := rand.Int63n(10) + 5
|
|
|
|
testCases := []struct {
|
|
name string
|
|
lastEvaluation time.Time
|
|
expectedResult bool
|
|
}{
|
|
{
|
|
name: "false if last evaluation is now",
|
|
lastEvaluation: now,
|
|
expectedResult: false,
|
|
},
|
|
{
|
|
name: "false if last evaluation is 1 interval before now",
|
|
lastEvaluation: now.Add(-time.Duration(intervalSeconds)),
|
|
expectedResult: false,
|
|
},
|
|
{
|
|
name: "false if last evaluation is little less than 2 interval before now",
|
|
lastEvaluation: now.Add(-time.Duration(intervalSeconds) * time.Second * 2).Add(100 * time.Millisecond),
|
|
expectedResult: false,
|
|
},
|
|
{
|
|
name: "true if last evaluation is 2 intervals from now",
|
|
lastEvaluation: now.Add(-time.Duration(intervalSeconds) * time.Second * 2),
|
|
expectedResult: true,
|
|
},
|
|
{
|
|
name: "true if last evaluation is 3 intervals from now",
|
|
lastEvaluation: now.Add(-time.Duration(intervalSeconds) * time.Second * 3),
|
|
expectedResult: true,
|
|
},
|
|
}
|
|
for _, tc := range testCases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
require.Equal(t, tc.expectedResult, stateIsStale(now, tc.lastEvaluation, intervalSeconds))
|
|
})
|
|
}
|
|
}
|