mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Write and Delete multiple alert instances. (#54072)
Prior to this change, all alert instance writes and deletes happened
individually, in their own database transaction. This change batches up
writes or deletes for a given rule's evaluation loop into a single
transaction before applying it.
Before:
```
goos: darwin
goarch: arm64
pkg: github.com/grafana/grafana/pkg/services/ngalert/store
BenchmarkAlertInstanceOperations-8 398 2991381 ns/op 1133537 B/op 27703 allocs/op
--- BENCH: BenchmarkAlertInstanceOperations-8
util.go:127: alert definition: {orgID: 1, UID: FovKXiRVzm} with title: "an alert definition FTvFXmRVkz" interval: 60 created
util.go:127: alert definition: {orgID: 1, UID: foDFXmRVkm} with title: "an alert definition fovFXmRVkz" interval: 60 created
util.go:127: alert definition: {orgID: 1, UID: VQvFuigVkm} with title: "an alert definition VwDKXmR4kz" interval: 60 created
PASS
ok github.com/grafana/grafana/pkg/services/ngalert/store 1.619s
```
After:
```
goos: darwin
goarch: arm64
pkg: github.com/grafana/grafana/pkg/services/ngalert/store
BenchmarkAlertInstanceOperations-8 1440 816484 ns/op 352297 B/op 6529 allocs/op
--- BENCH: BenchmarkAlertInstanceOperations-8
util.go:127: alert definition: {orgID: 1, UID: 302r_igVzm} with title: "an alert definition q0h9lmR4zz" interval: 60 created
util.go:127: alert definition: {orgID: 1, UID: 71hrlmR4km} with title: "an alert definition nJ29_mR4zz" interval: 60 created
util.go:127: alert definition: {orgID: 1, UID: Cahr_mR4zm} with title: "an alert definition ja2rlmg4zz" interval: 60 created
PASS
ok github.com/grafana/grafana/pkg/services/ngalert/store 1.383s
```
So we cut time by about 75% and memory allocations by about 60% when
storing and deleting 100 instances.
This change also updates some of our tests so that they run successfully against postgreSQL - we were using random Int64s, but postgres integers, which our tables use, max out at 2^31-1
This commit is contained in:
@@ -7,10 +7,8 @@ import (
|
||||
|
||||
// AlertInstance represents a single alert instance.
|
||||
type AlertInstance struct {
|
||||
RuleOrgID int64 `xorm:"rule_org_id"`
|
||||
RuleUID string `xorm:"rule_uid"`
|
||||
AlertInstanceKey `xorm:"extends"`
|
||||
Labels InstanceLabels
|
||||
LabelsHash string
|
||||
CurrentState InstanceStateType
|
||||
CurrentReason string
|
||||
CurrentStateSince time.Time
|
||||
@@ -18,6 +16,12 @@ type AlertInstance struct {
|
||||
LastEvalTime time.Time
|
||||
}
|
||||
|
||||
type AlertInstanceKey struct {
|
||||
RuleOrgID int64 `xorm:"rule_org_id"`
|
||||
RuleUID string `xorm:"rule_uid"`
|
||||
LabelsHash string
|
||||
}
|
||||
|
||||
// InstanceStateType is an enum for instance states.
|
||||
type InstanceStateType string
|
||||
|
||||
@@ -44,18 +48,6 @@ func (i InstanceStateType) IsValid() bool {
|
||||
i == InstanceStateError
|
||||
}
|
||||
|
||||
// SaveAlertInstanceCommand is the query for saving a new alert instance.
|
||||
type SaveAlertInstanceCommand struct {
|
||||
RuleOrgID int64
|
||||
RuleUID string
|
||||
Labels InstanceLabels
|
||||
State InstanceStateType
|
||||
StateReason string
|
||||
LastEvalTime time.Time
|
||||
CurrentStateSince time.Time
|
||||
CurrentStateEnd time.Time
|
||||
}
|
||||
|
||||
// GetAlertInstanceQuery is the query for retrieving/deleting an alert definition by ID.
|
||||
// nolint:unused
|
||||
type GetAlertInstanceQuery struct {
|
||||
@@ -78,11 +70,7 @@ type ListAlertInstancesQuery struct {
|
||||
|
||||
// ValidateAlertInstance validates that the alert instance contains an alert rule id,
|
||||
// and state.
|
||||
func ValidateAlertInstance(alertInstance *AlertInstance) error {
|
||||
if alertInstance == nil {
|
||||
return fmt.Errorf("alert instance is invalid because it is nil")
|
||||
}
|
||||
|
||||
func ValidateAlertInstance(alertInstance AlertInstance) error {
|
||||
if alertInstance.RuleOrgID == 0 {
|
||||
return fmt.Errorf("alert instance is invalid due to missing alert rule organisation")
|
||||
}
|
||||
|
||||
@@ -42,7 +42,7 @@ func (il *InstanceLabels) StringKey() (string, error) {
|
||||
tl := labelsToTupleLabels(*il)
|
||||
b, err := json.Marshal(tl)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("can not gereate key due to failure to encode labels: %w", err)
|
||||
return "", fmt.Errorf("could not generate key due to failure to encode labels: %w", err)
|
||||
}
|
||||
return string(b), nil
|
||||
}
|
||||
@@ -54,7 +54,7 @@ func (il *InstanceLabels) StringAndHash() (string, string, error) {
|
||||
|
||||
b, err := json.Marshal(tl)
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("can not gereate key for alert instance due to failure to encode labels: %w", err)
|
||||
return "", "", fmt.Errorf("could not generate key for alert instance due to failure to encode labels: %w", err)
|
||||
}
|
||||
|
||||
h := sha1.New()
|
||||
@@ -76,7 +76,7 @@ type tupleLabels []tupleLabel
|
||||
type tupleLabel [2]string
|
||||
|
||||
// Sort tupleLabels by each elements first property (key).
|
||||
func (t *tupleLabels) sortBtKey() {
|
||||
func (t *tupleLabels) sortByKey() {
|
||||
if t == nil {
|
||||
return
|
||||
}
|
||||
@@ -91,7 +91,7 @@ func labelsToTupleLabels(l InstanceLabels) tupleLabels {
|
||||
for k, v := range l {
|
||||
t = append(t, tupleLabel{k, v})
|
||||
}
|
||||
t.sortBtKey()
|
||||
t.sortByKey()
|
||||
return t
|
||||
}
|
||||
|
||||
|
||||
@@ -53,25 +53,25 @@ func AlertRuleGen(mutators ...AlertRuleMutator) func() *AlertRule {
|
||||
if rand.Int63()%2 == 0 {
|
||||
d := util.GenerateShortUID()
|
||||
dashUID = &d
|
||||
p := rand.Int63()
|
||||
p := rand.Int63n(1500)
|
||||
panelID = &p
|
||||
}
|
||||
|
||||
rule := &AlertRule{
|
||||
ID: rand.Int63(),
|
||||
OrgID: rand.Int63(),
|
||||
ID: rand.Int63n(1500),
|
||||
OrgID: rand.Int63n(1500),
|
||||
Title: "TEST-ALERT-" + util.GenerateShortUID(),
|
||||
Condition: "A",
|
||||
Data: []AlertQuery{GenerateAlertQuery()},
|
||||
Updated: time.Now().Add(-time.Duration(rand.Intn(100) + 1)),
|
||||
IntervalSeconds: rand.Int63n(60) + 1,
|
||||
Version: rand.Int63(),
|
||||
Version: rand.Int63n(1500), // Don't generate a rule ID too big for postgres
|
||||
UID: util.GenerateShortUID(),
|
||||
NamespaceUID: util.GenerateShortUID(),
|
||||
DashboardUID: dashUID,
|
||||
PanelID: panelID,
|
||||
RuleGroup: "TEST-GROUP-" + util.GenerateShortUID(),
|
||||
RuleGroupIndex: rand.Int(),
|
||||
RuleGroupIndex: rand.Intn(1500),
|
||||
NoDataState: randNoDataState(),
|
||||
ExecErrState: randErrState(),
|
||||
For: forInterval,
|
||||
@@ -95,7 +95,7 @@ func WithUniqueID() AlertRuleMutator {
|
||||
usedID := make(map[int64]struct{})
|
||||
return func(rule *AlertRule) {
|
||||
for {
|
||||
id := rand.Int63()
|
||||
id := rand.Int63n(1500)
|
||||
if _, ok := usedID[id]; !ok {
|
||||
usedID[id] = struct{}{}
|
||||
rule.ID = id
|
||||
|
||||
Reference in New Issue
Block a user