mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Prior to this change, all alert instance writes and deletes happened individually, in their own database transaction. This change batches up writes or deletes for a given rule's evaluation loop into a single transaction before applying it. These new transactions are off by default, guarded by the feature toggle "alertingBigTransactions" Before: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 398 2991381 ns/op 1133537 B/op 27703 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: FovKXiRVzm} with title: "an alert definition FTvFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: foDFXmRVkm} with title: "an alert definition fovFXmRVkz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: VQvFuigVkm} with title: "an alert definition VwDKXmR4kz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.619s ``` After: ``` goos: darwin goarch: arm64 pkg: github.com/grafana/grafana/pkg/services/ngalert/store BenchmarkAlertInstanceOperations-8 1440 816484 ns/op 352297 B/op 6529 allocs/op --- BENCH: BenchmarkAlertInstanceOperations-8 util.go:127: alert definition: {orgID: 1, UID: 302r_igVzm} with title: "an alert definition q0h9lmR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: 71hrlmR4km} with title: "an alert definition nJ29_mR4zz" interval: 60 created util.go:127: alert definition: {orgID: 1, UID: Cahr_mR4zm} with title: "an alert definition ja2rlmg4zz" interval: 60 created PASS ok github.com/grafana/grafana/pkg/services/ngalert/store 1.383s ``` So we cut time by about 75% and memory allocations by about 60% when storing and deleting 100 instances.
308 lines
9.1 KiB
Go
308 lines
9.1 KiB
Go
package store_test
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"testing"
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/grafana/grafana/pkg/services/ngalert/models"
|
|
"github.com/grafana/grafana/pkg/services/ngalert/tests"
|
|
)
|
|
|
|
const baseIntervalSeconds = 10
|
|
|
|
func BenchmarkAlertInstanceOperations(b *testing.B) {
|
|
b.StopTimer()
|
|
ctx := context.Background()
|
|
_, dbstore := tests.SetupTestEnv(b, baseIntervalSeconds)
|
|
dbstore.FeatureToggles.(*tests.FakeFeatures).BigTransactions = false
|
|
|
|
const mainOrgID int64 = 1
|
|
|
|
alertRule := tests.CreateTestAlertRule(b, ctx, dbstore, 60, mainOrgID)
|
|
|
|
// Create some instances to write down and then delete.
|
|
count := 10_003
|
|
instances := make([]models.AlertInstance, 0, count)
|
|
keys := make([]models.AlertInstanceKey, 0, count)
|
|
for i := 0; i < count; i++ {
|
|
labels := models.InstanceLabels{"test": fmt.Sprint(i)}
|
|
_, labelsHash, _ := labels.StringAndHash()
|
|
instance := models.AlertInstance{
|
|
AlertInstanceKey: models.AlertInstanceKey{
|
|
RuleOrgID: alertRule.OrgID,
|
|
RuleUID: alertRule.UID,
|
|
LabelsHash: labelsHash,
|
|
},
|
|
CurrentState: models.InstanceStateFiring,
|
|
CurrentReason: string(models.InstanceStateError),
|
|
Labels: labels,
|
|
}
|
|
instances = append(instances, instance)
|
|
keys = append(keys, instance.AlertInstanceKey)
|
|
}
|
|
|
|
b.StartTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
_ = dbstore.SaveAlertInstances(ctx, instances...)
|
|
_ = dbstore.DeleteAlertInstances(ctx, keys...)
|
|
}
|
|
}
|
|
|
|
func TestIntegrationAlertInstanceBulkWrite(t *testing.T) {
|
|
if testing.Short() {
|
|
t.Skip("skipping integration test")
|
|
}
|
|
ctx := context.Background()
|
|
_, dbstore := tests.SetupTestEnv(t, baseIntervalSeconds)
|
|
|
|
orgIDs := []int64{1, 2, 3, 4, 5}
|
|
counts := []int{20_000, 200, 503, 0, 1256}
|
|
instances := []models.AlertInstance{}
|
|
keys := []models.AlertInstanceKey{}
|
|
|
|
for i, id := range orgIDs {
|
|
alertRule := tests.CreateTestAlertRule(t, ctx, dbstore, 60, id)
|
|
|
|
// Create some instances to write down and then delete.
|
|
for j := 0; j < counts[i]; j++ {
|
|
labels := models.InstanceLabels{"test": fmt.Sprint(j)}
|
|
_, labelsHash, _ := labels.StringAndHash()
|
|
instance := models.AlertInstance{
|
|
AlertInstanceKey: models.AlertInstanceKey{
|
|
RuleOrgID: alertRule.OrgID,
|
|
RuleUID: alertRule.UID,
|
|
LabelsHash: labelsHash,
|
|
},
|
|
CurrentState: models.InstanceStateFiring,
|
|
CurrentReason: string(models.InstanceStateError),
|
|
Labels: labels,
|
|
}
|
|
instances = append(instances, instance)
|
|
keys = append(keys, instance.AlertInstanceKey)
|
|
}
|
|
}
|
|
|
|
for _, bigStmts := range []bool{false, true} {
|
|
dbstore.FeatureToggles.(*tests.FakeFeatures).BigTransactions = bigStmts
|
|
err := dbstore.SaveAlertInstances(ctx, instances...)
|
|
require.NoError(t, err)
|
|
t.Log("Finished database write")
|
|
|
|
// List our instances. Make sure we have the right count.
|
|
for i, id := range orgIDs {
|
|
q := &models.ListAlertInstancesQuery{
|
|
RuleOrgID: id,
|
|
}
|
|
err = dbstore.ListAlertInstances(ctx, q)
|
|
require.NoError(t, err)
|
|
require.Equal(t, counts[i], len(q.Result), "Org %v: Expected %v instances but got %v", id, counts[i], len(q.Result))
|
|
}
|
|
t.Log("Finished database read")
|
|
|
|
err = dbstore.DeleteAlertInstances(ctx, keys...)
|
|
require.NoError(t, err)
|
|
t.Log("Finished database delete")
|
|
|
|
for _, id := range orgIDs {
|
|
q := &models.ListAlertInstancesQuery{
|
|
RuleOrgID: id,
|
|
}
|
|
err = dbstore.ListAlertInstances(ctx, q)
|
|
require.NoError(t, err)
|
|
require.Zero(t, len(q.Result), "Org %v: Deleted instances but still had %v", id, len(q.Result))
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestIntegrationAlertInstanceOperations(t *testing.T) {
|
|
if testing.Short() {
|
|
t.Skip("skipping integration test")
|
|
}
|
|
ctx := context.Background()
|
|
_, dbstore := tests.SetupTestEnv(t, baseIntervalSeconds)
|
|
|
|
const mainOrgID int64 = 1
|
|
|
|
alertRule1 := tests.CreateTestAlertRule(t, ctx, dbstore, 60, mainOrgID)
|
|
orgID := alertRule1.OrgID
|
|
|
|
alertRule2 := tests.CreateTestAlertRule(t, ctx, dbstore, 60, mainOrgID)
|
|
require.Equal(t, orgID, alertRule2.OrgID)
|
|
|
|
alertRule3 := tests.CreateTestAlertRule(t, ctx, dbstore, 60, mainOrgID)
|
|
require.Equal(t, orgID, alertRule3.OrgID)
|
|
|
|
alertRule4 := tests.CreateTestAlertRule(t, ctx, dbstore, 60, mainOrgID)
|
|
require.Equal(t, orgID, alertRule4.OrgID)
|
|
|
|
t.Run("can save and read new alert instance", func(t *testing.T) {
|
|
labels := models.InstanceLabels{"test": "testValue"}
|
|
_, hash, _ := labels.StringAndHash()
|
|
instance := models.AlertInstance{
|
|
AlertInstanceKey: models.AlertInstanceKey{
|
|
RuleOrgID: alertRule1.OrgID,
|
|
RuleUID: alertRule1.UID,
|
|
LabelsHash: hash,
|
|
},
|
|
CurrentState: models.InstanceStateFiring,
|
|
CurrentReason: string(models.InstanceStateError),
|
|
Labels: labels,
|
|
}
|
|
err := dbstore.SaveAlertInstances(ctx, instance)
|
|
require.NoError(t, err)
|
|
|
|
listCmd := &models.ListAlertInstancesQuery{
|
|
RuleOrgID: instance.RuleOrgID,
|
|
RuleUID: instance.RuleUID,
|
|
}
|
|
err = dbstore.ListAlertInstances(ctx, listCmd)
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, listCmd.Result, 1)
|
|
require.Equal(t, instance.Labels, listCmd.Result[0].Labels)
|
|
require.Equal(t, alertRule1.OrgID, listCmd.Result[0].RuleOrgID)
|
|
require.Equal(t, alertRule1.UID, listCmd.Result[0].RuleUID)
|
|
require.Equal(t, instance.CurrentReason, listCmd.Result[0].CurrentReason)
|
|
})
|
|
|
|
t.Run("can save and read new alert instance with no labels", func(t *testing.T) {
|
|
labels := models.InstanceLabels{}
|
|
_, hash, _ := labels.StringAndHash()
|
|
instance := models.AlertInstance{
|
|
AlertInstanceKey: models.AlertInstanceKey{
|
|
RuleOrgID: alertRule2.OrgID,
|
|
RuleUID: alertRule2.UID,
|
|
LabelsHash: hash,
|
|
},
|
|
CurrentState: models.InstanceStateNormal,
|
|
Labels: labels,
|
|
}
|
|
err := dbstore.SaveAlertInstances(ctx, instance)
|
|
require.NoError(t, err)
|
|
|
|
listCmd := &models.ListAlertInstancesQuery{
|
|
RuleOrgID: instance.RuleOrgID,
|
|
RuleUID: instance.RuleUID,
|
|
}
|
|
|
|
err = dbstore.ListAlertInstances(ctx, listCmd)
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, listCmd.Result, 1)
|
|
require.Equal(t, alertRule2.OrgID, listCmd.Result[0].RuleOrgID)
|
|
require.Equal(t, alertRule2.UID, listCmd.Result[0].RuleUID)
|
|
require.Equal(t, instance.Labels, listCmd.Result[0].Labels)
|
|
})
|
|
|
|
t.Run("can save two instances with same org_id, uid and different labels", func(t *testing.T) {
|
|
labels := models.InstanceLabels{"test": "testValue"}
|
|
_, hash, _ := labels.StringAndHash()
|
|
instance1 := models.AlertInstance{
|
|
AlertInstanceKey: models.AlertInstanceKey{
|
|
RuleOrgID: alertRule3.OrgID,
|
|
RuleUID: alertRule3.UID,
|
|
LabelsHash: hash,
|
|
},
|
|
CurrentState: models.InstanceStateFiring,
|
|
Labels: labels,
|
|
}
|
|
|
|
err := dbstore.SaveAlertInstances(ctx, instance1)
|
|
require.NoError(t, err)
|
|
|
|
labels = models.InstanceLabels{"test": "testValue2"}
|
|
_, hash, _ = labels.StringAndHash()
|
|
instance2 := models.AlertInstance{
|
|
AlertInstanceKey: models.AlertInstanceKey{
|
|
RuleOrgID: instance1.RuleOrgID,
|
|
RuleUID: instance1.RuleUID,
|
|
LabelsHash: hash,
|
|
},
|
|
CurrentState: models.InstanceStateFiring,
|
|
Labels: labels,
|
|
}
|
|
err = dbstore.SaveAlertInstances(ctx, instance2)
|
|
require.NoError(t, err)
|
|
|
|
listQuery := &models.ListAlertInstancesQuery{
|
|
RuleOrgID: instance1.RuleOrgID,
|
|
RuleUID: instance1.RuleUID,
|
|
}
|
|
|
|
err = dbstore.ListAlertInstances(ctx, listQuery)
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, listQuery.Result, 2)
|
|
})
|
|
|
|
t.Run("can list all added instances in org", func(t *testing.T) {
|
|
listQuery := &models.ListAlertInstancesQuery{
|
|
RuleOrgID: orgID,
|
|
}
|
|
|
|
err := dbstore.ListAlertInstances(ctx, listQuery)
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, listQuery.Result, 4)
|
|
})
|
|
|
|
t.Run("can list all added instances in org filtered by current state", func(t *testing.T) {
|
|
listQuery := &models.ListAlertInstancesQuery{
|
|
RuleOrgID: orgID,
|
|
State: models.InstanceStateNormal,
|
|
}
|
|
|
|
err := dbstore.ListAlertInstances(ctx, listQuery)
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, listQuery.Result, 1)
|
|
})
|
|
|
|
t.Run("update instance with same org_id, uid and different state", func(t *testing.T) {
|
|
labels := models.InstanceLabels{"test": "testValue"}
|
|
_, hash, _ := labels.StringAndHash()
|
|
instance1 := models.AlertInstance{
|
|
AlertInstanceKey: models.AlertInstanceKey{
|
|
RuleOrgID: alertRule4.OrgID,
|
|
RuleUID: alertRule4.UID,
|
|
LabelsHash: hash,
|
|
},
|
|
CurrentState: models.InstanceStateFiring,
|
|
Labels: labels,
|
|
}
|
|
|
|
err := dbstore.SaveAlertInstances(ctx, instance1)
|
|
require.NoError(t, err)
|
|
|
|
instance2 := models.AlertInstance{
|
|
AlertInstanceKey: models.AlertInstanceKey{
|
|
RuleOrgID: alertRule4.OrgID,
|
|
RuleUID: instance1.RuleUID,
|
|
LabelsHash: instance1.LabelsHash,
|
|
},
|
|
CurrentState: models.InstanceStateNormal,
|
|
Labels: instance1.Labels,
|
|
}
|
|
err = dbstore.SaveAlertInstances(ctx, instance2)
|
|
require.NoError(t, err)
|
|
|
|
listQuery := &models.ListAlertInstancesQuery{
|
|
RuleOrgID: alertRule4.OrgID,
|
|
RuleUID: alertRule4.UID,
|
|
}
|
|
|
|
err = dbstore.ListAlertInstances(ctx, listQuery)
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, listQuery.Result, 1)
|
|
|
|
require.Equal(t, instance2.RuleOrgID, listQuery.Result[0].RuleOrgID)
|
|
require.Equal(t, instance2.RuleUID, listQuery.Result[0].RuleUID)
|
|
require.Equal(t, instance2.Labels, listQuery.Result[0].Labels)
|
|
require.Equal(t, instance2.CurrentState, listQuery.Result[0].CurrentState)
|
|
})
|
|
}
|