Alerting: Write and Delete multiple alert instances. (#54072)

Prior to this change, all alert instance writes and deletes happened
individually, in their own database transaction. This change batches up
writes or deletes for a given rule's evaluation loop into a single
transaction before applying it.

Before:
```
goos: darwin
goarch: arm64
pkg: github.com/grafana/grafana/pkg/services/ngalert/store
BenchmarkAlertInstanceOperations-8           398           2991381 ns/op         1133537 B/op      27703 allocs/op
--- BENCH: BenchmarkAlertInstanceOperations-8
    util.go:127: alert definition: {orgID: 1, UID: FovKXiRVzm} with title: "an alert definition FTvFXmRVkz" interval: 60 created
    util.go:127: alert definition: {orgID: 1, UID: foDFXmRVkm} with title: "an alert definition fovFXmRVkz" interval: 60 created
    util.go:127: alert definition: {orgID: 1, UID: VQvFuigVkm} with title: "an alert definition VwDKXmR4kz" interval: 60 created
PASS
ok      github.com/grafana/grafana/pkg/services/ngalert/store   1.619s
```

After:
```
goos: darwin
goarch: arm64
pkg: github.com/grafana/grafana/pkg/services/ngalert/store
BenchmarkAlertInstanceOperations-8          1440            816484 ns/op          352297 B/op       6529 allocs/op
--- BENCH: BenchmarkAlertInstanceOperations-8
    util.go:127: alert definition: {orgID: 1, UID: 302r_igVzm} with title: "an alert definition q0h9lmR4zz" interval: 60 created
    util.go:127: alert definition: {orgID: 1, UID: 71hrlmR4km} with title: "an alert definition nJ29_mR4zz" interval: 60 created
    util.go:127: alert definition: {orgID: 1, UID: Cahr_mR4zm} with title: "an alert definition ja2rlmg4zz" interval: 60 created
PASS
ok      github.com/grafana/grafana/pkg/services/ngalert/store   1.383s
```

So we cut time by about 75% and memory allocations by about 60% when
storing and deleting 100 instances.

This change also updates some of our tests so that they run successfully against postgreSQL - we were using random Int64s, but postgres integers, which our tables use, max out at 2^31-1
This commit is contained in:
Joe Blubaugh
2022-09-02 11:17:20 +08:00
committed by GitHub
parent d706320d0a
commit 5e4fd94413
15 changed files with 559 additions and 210 deletions

View File

@@ -7,10 +7,8 @@ import (
// AlertInstance represents a single alert instance.
type AlertInstance struct {
RuleOrgID int64 `xorm:"rule_org_id"`
RuleUID string `xorm:"rule_uid"`
AlertInstanceKey `xorm:"extends"`
Labels InstanceLabels
LabelsHash string
CurrentState InstanceStateType
CurrentReason string
CurrentStateSince time.Time
@@ -18,6 +16,12 @@ type AlertInstance struct {
LastEvalTime time.Time
}
type AlertInstanceKey struct {
RuleOrgID int64 `xorm:"rule_org_id"`
RuleUID string `xorm:"rule_uid"`
LabelsHash string
}
// InstanceStateType is an enum for instance states.
type InstanceStateType string
@@ -44,18 +48,6 @@ func (i InstanceStateType) IsValid() bool {
i == InstanceStateError
}
// SaveAlertInstanceCommand is the query for saving a new alert instance.
type SaveAlertInstanceCommand struct {
RuleOrgID int64
RuleUID string
Labels InstanceLabels
State InstanceStateType
StateReason string
LastEvalTime time.Time
CurrentStateSince time.Time
CurrentStateEnd time.Time
}
// GetAlertInstanceQuery is the query for retrieving/deleting an alert definition by ID.
// nolint:unused
type GetAlertInstanceQuery struct {
@@ -78,11 +70,7 @@ type ListAlertInstancesQuery struct {
// ValidateAlertInstance validates that the alert instance contains an alert rule id,
// and state.
func ValidateAlertInstance(alertInstance *AlertInstance) error {
if alertInstance == nil {
return fmt.Errorf("alert instance is invalid because it is nil")
}
func ValidateAlertInstance(alertInstance AlertInstance) error {
if alertInstance.RuleOrgID == 0 {
return fmt.Errorf("alert instance is invalid due to missing alert rule organisation")
}

View File

@@ -42,7 +42,7 @@ func (il *InstanceLabels) StringKey() (string, error) {
tl := labelsToTupleLabels(*il)
b, err := json.Marshal(tl)
if err != nil {
return "", fmt.Errorf("can not gereate key due to failure to encode labels: %w", err)
return "", fmt.Errorf("could not generate key due to failure to encode labels: %w", err)
}
return string(b), nil
}
@@ -54,7 +54,7 @@ func (il *InstanceLabels) StringAndHash() (string, string, error) {
b, err := json.Marshal(tl)
if err != nil {
return "", "", fmt.Errorf("can not gereate key for alert instance due to failure to encode labels: %w", err)
return "", "", fmt.Errorf("could not generate key for alert instance due to failure to encode labels: %w", err)
}
h := sha1.New()
@@ -76,7 +76,7 @@ type tupleLabels []tupleLabel
type tupleLabel [2]string
// Sort tupleLabels by each elements first property (key).
func (t *tupleLabels) sortBtKey() {
func (t *tupleLabels) sortByKey() {
if t == nil {
return
}
@@ -91,7 +91,7 @@ func labelsToTupleLabels(l InstanceLabels) tupleLabels {
for k, v := range l {
t = append(t, tupleLabel{k, v})
}
t.sortBtKey()
t.sortByKey()
return t
}

View File

@@ -53,25 +53,25 @@ func AlertRuleGen(mutators ...AlertRuleMutator) func() *AlertRule {
if rand.Int63()%2 == 0 {
d := util.GenerateShortUID()
dashUID = &d
p := rand.Int63()
p := rand.Int63n(1500)
panelID = &p
}
rule := &AlertRule{
ID: rand.Int63(),
OrgID: rand.Int63(),
ID: rand.Int63n(1500),
OrgID: rand.Int63n(1500),
Title: "TEST-ALERT-" + util.GenerateShortUID(),
Condition: "A",
Data: []AlertQuery{GenerateAlertQuery()},
Updated: time.Now().Add(-time.Duration(rand.Intn(100) + 1)),
IntervalSeconds: rand.Int63n(60) + 1,
Version: rand.Int63(),
Version: rand.Int63n(1500), // Don't generate a rule ID too big for postgres
UID: util.GenerateShortUID(),
NamespaceUID: util.GenerateShortUID(),
DashboardUID: dashUID,
PanelID: panelID,
RuleGroup: "TEST-GROUP-" + util.GenerateShortUID(),
RuleGroupIndex: rand.Int(),
RuleGroupIndex: rand.Intn(1500),
NoDataState: randNoDataState(),
ExecErrState: randErrState(),
For: forInterval,
@@ -95,7 +95,7 @@ func WithUniqueID() AlertRuleMutator {
usedID := make(map[int64]struct{})
return func(rule *AlertRule) {
for {
id := rand.Int63()
id := rand.Int63n(1500)
if _, ok := usedID[id]; !ok {
usedID[id] = struct{}{}
rule.ID = id