mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Add sync state persister to save entire state of the rule (#96628)
This commit is contained in:
committed by
GitHub
parent
63a68f3e99
commit
651430e34a
80
pkg/services/ngalert/state/persister_sync_rule.go
Normal file
80
pkg/services/ngalert/state/persister_sync_rule.go
Normal file
@@ -0,0 +1,80 @@
|
||||
package state
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
)
|
||||
|
||||
type SyncRuleStatePersister struct {
|
||||
log log.Logger
|
||||
store InstanceStore
|
||||
doNotSaveNormalState bool
|
||||
}
|
||||
|
||||
func NewSyncRuleStatePersisiter(log log.Logger, cfg ManagerCfg) StatePersister {
|
||||
return &SyncRuleStatePersister{
|
||||
log: log,
|
||||
store: cfg.InstanceStore,
|
||||
doNotSaveNormalState: cfg.DoNotSaveNormalState,
|
||||
}
|
||||
}
|
||||
|
||||
func (a *SyncRuleStatePersister) Async(_ context.Context, _ AlertInstancesProvider) {
|
||||
a.log.Debug("Async: No-Op")
|
||||
}
|
||||
|
||||
func (a *SyncRuleStatePersister) Sync(ctx context.Context, span trace.Span, ruleKey models.AlertRuleKeyWithGroup, states StateTransitions) {
|
||||
if a.store == nil || len(states) == 0 {
|
||||
return
|
||||
}
|
||||
logger := a.log.FromContext(ctx)
|
||||
|
||||
instancesToSave := make([]models.AlertInstance, 0, len(states))
|
||||
|
||||
for _, s := range states {
|
||||
if s.IsStale() {
|
||||
continue
|
||||
}
|
||||
|
||||
if a.doNotSaveNormalState && IsNormalStateWithNoReason(s.State) && !s.Changed() {
|
||||
continue
|
||||
}
|
||||
|
||||
key, err := s.GetAlertInstanceKey()
|
||||
if err != nil {
|
||||
logger.Error("Failed to create a key for alert state to save it. The state will be ignored ", "cacheID", s.CacheID, "error", err, "labels", s.Labels.String(), "rule_uid", ruleKey.UID, "rule_group", ruleKey.RuleGroup)
|
||||
continue
|
||||
}
|
||||
|
||||
instance := models.AlertInstance{
|
||||
AlertInstanceKey: key,
|
||||
Labels: models.InstanceLabels(s.Labels),
|
||||
CurrentState: models.InstanceStateType(s.State.State.String()),
|
||||
CurrentReason: s.StateReason,
|
||||
LastEvalTime: s.LastEvaluationTime,
|
||||
CurrentStateSince: s.StartsAt,
|
||||
CurrentStateEnd: s.EndsAt,
|
||||
ResolvedAt: s.ResolvedAt,
|
||||
LastSentAt: s.LastSentAt,
|
||||
ResultFingerprint: s.ResultFingerprint.String(),
|
||||
}
|
||||
|
||||
instancesToSave = append(instancesToSave, instance)
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
logger.Debug("Saving alert states", "count", len(instancesToSave))
|
||||
err := a.store.SaveAlertInstancesForRule(ctx, ruleKey, instancesToSave)
|
||||
if err != nil {
|
||||
logger.Error("Failed to save alert rule state", "error", err, "duration", time.Since(start))
|
||||
return
|
||||
}
|
||||
|
||||
logger.Debug("Saving alert states done", "count", len(instancesToSave), "duration", time.Since(start))
|
||||
span.AddEvent("saved alert rule state")
|
||||
}
|
||||
126
pkg/services/ngalert/state/persister_sync_rule_test.go
Normal file
126
pkg/services/ngalert/state/persister_sync_rule_test.go
Normal file
@@ -0,0 +1,126 @@
|
||||
package state
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.opentelemetry.io/otel"
|
||||
|
||||
"github.com/grafana/grafana-plugin-sdk-go/data"
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
)
|
||||
|
||||
func TestSyncRuleStatePersister_Sync(t *testing.T) {
|
||||
const orgID = int64(1)
|
||||
const ruleUID = "rule-id"
|
||||
const ruleGroup = "test-group"
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
states StateTransitions
|
||||
ruleKey models.AlertRuleKeyWithGroup
|
||||
doNotSaveNormalState bool
|
||||
expectedError error
|
||||
}{
|
||||
{
|
||||
name: "success case",
|
||||
states: StateTransitions{
|
||||
{
|
||||
State: &State{
|
||||
Labels: data.Labels{
|
||||
"label-1": "value-1",
|
||||
},
|
||||
LastEvaluationTime: time.Now(),
|
||||
StartsAt: time.Now(),
|
||||
EndsAt: time.Now(),
|
||||
},
|
||||
},
|
||||
},
|
||||
ruleKey: models.AlertRuleKeyWithGroup{
|
||||
AlertRuleKey: models.AlertRuleKey{
|
||||
OrgID: orgID,
|
||||
UID: ruleUID,
|
||||
},
|
||||
RuleGroup: ruleGroup,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
mockStore := new(FakeInstanceStore)
|
||||
persister := &SyncRuleStatePersister{
|
||||
log: log.New("test"),
|
||||
store: mockStore,
|
||||
}
|
||||
tracer := otel.Tracer("test")
|
||||
ctx, span := tracer.Start(ctx, "test-span")
|
||||
|
||||
instances := make([]models.AlertInstance, 0, len(tc.states))
|
||||
for _, s := range tc.states {
|
||||
key, err := s.GetAlertInstanceKey()
|
||||
require.NoError(t, err)
|
||||
instance := models.AlertInstance{
|
||||
AlertInstanceKey: key,
|
||||
Labels: models.InstanceLabels(s.Labels),
|
||||
CurrentState: models.InstanceStateType(s.State.State.String()),
|
||||
CurrentReason: s.StateReason,
|
||||
LastEvalTime: s.LastEvaluationTime,
|
||||
CurrentStateSince: s.StartsAt,
|
||||
CurrentStateEnd: s.EndsAt,
|
||||
ResolvedAt: s.ResolvedAt,
|
||||
LastSentAt: s.LastSentAt,
|
||||
ResultFingerprint: s.ResultFingerprint.String(),
|
||||
}
|
||||
instances = append(instances, instance)
|
||||
}
|
||||
persister.Sync(ctx, span, tc.ruleKey, tc.states)
|
||||
|
||||
recordedCalls := mockStore.RecordedOps()
|
||||
require.Len(t, recordedCalls, 1)
|
||||
|
||||
for _, op := range recordedCalls {
|
||||
switch q := op.(type) {
|
||||
case FakeInstanceStoreOp:
|
||||
require.Equal(t, "SaveAlertInstancesForRule", q.Name)
|
||||
require.Equal(t, tc.ruleKey, q.Args[1])
|
||||
require.Equal(t, instances, q.Args[2])
|
||||
default:
|
||||
require.Fail(t, "unexpected call", "op: %v", op)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
t.Run("no-op when store is nil", func(t *testing.T) {
|
||||
persister := &SyncRuleStatePersister{
|
||||
log: log.New("test"),
|
||||
}
|
||||
tracer := otel.Tracer("test")
|
||||
ctx := context.Background()
|
||||
ctx, span := tracer.Start(ctx, "test-span")
|
||||
ruleKey := models.AlertRuleKeyWithGroup{
|
||||
AlertRuleKey: models.AlertRuleKey{
|
||||
OrgID: orgID,
|
||||
UID: ruleUID,
|
||||
},
|
||||
RuleGroup: ruleGroup,
|
||||
}
|
||||
states := StateTransitions{
|
||||
{
|
||||
State: &State{
|
||||
Labels: data.Labels{
|
||||
"label-1": "value-1",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// There is no store, so no call to SaveAlertInstancesForRule or panic
|
||||
persister.Sync(ctx, span, ruleKey, states)
|
||||
})
|
||||
}
|
||||
@@ -59,6 +59,17 @@ func (f *FakeInstanceStore) DeleteAlertInstances(ctx context.Context, q ...model
|
||||
}
|
||||
|
||||
func (f *FakeInstanceStore) SaveAlertInstancesForRule(ctx context.Context, key models.AlertRuleKeyWithGroup, instances []models.AlertInstance) error {
|
||||
f.mtx.Lock()
|
||||
defer f.mtx.Unlock()
|
||||
|
||||
f.recordedOps = append(f.recordedOps, FakeInstanceStoreOp{
|
||||
Name: "SaveAlertInstancesForRule", Args: []any{
|
||||
ctx,
|
||||
key,
|
||||
instances,
|
||||
},
|
||||
})
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user