From a85adeed965fe72e3e297b93fa2197d98e64a10b Mon Sep 17 00:00:00 2001 From: Yuri Tseretyan <yuriy.tseretyan@grafana.com> Date: Tue, 6 Dec 2022 12:33:15 -0500 Subject: [PATCH] Alerting: Update state history service to filter states transitions (#58863) * rename the method to better reflect its behavior * make historian filter transition on itself * call historian with all changes --- .../ngalert/state/historian/annotation.go | 14 ++- .../state/historian/annotation_test.go | 100 ++++++++++++++++++ pkg/services/ngalert/state/manager.go | 25 +---- pkg/services/ngalert/state/persist.go | 2 +- pkg/services/ngalert/state/state.go | 2 +- pkg/services/ngalert/state/testing.go | 2 +- 6 files changed, 120 insertions(+), 25 deletions(-) create mode 100644 pkg/services/ngalert/state/historian/annotation_test.go diff --git a/pkg/services/ngalert/state/historian/annotation.go b/pkg/services/ngalert/state/historian/annotation.go index c94805178af..9687b463592 100644 --- a/pkg/services/ngalert/state/historian/annotation.go +++ b/pkg/services/ngalert/state/historian/annotation.go @@ -9,6 +9,7 @@ import ( "time" "github.com/grafana/grafana-plugin-sdk-go/data" + "github.com/grafana/grafana/pkg/components/simplejson" "github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/services/annotations" @@ -34,7 +35,7 @@ func NewAnnotationHistorian(annotations annotations.Repository, dashboards dashb } // RecordStates writes a number of state transitions for a given rule to state history. -func (h *AnnotationStateHistorian) RecordStates(ctx context.Context, rule *ngmodels.AlertRule, states []state.StateTransition) { +func (h *AnnotationStateHistorian) RecordStatesAsync(ctx context.Context, rule *ngmodels.AlertRule, states []state.StateTransition) { logger := h.log.FromContext(ctx) // Build annotations before starting goroutine, to make sure all data is copied and won't mutate underneath us. annotations := h.buildAnnotations(rule, states, logger) @@ -45,6 +46,9 @@ func (h *AnnotationStateHistorian) RecordStates(ctx context.Context, rule *ngmod func (h *AnnotationStateHistorian) buildAnnotations(rule *ngmodels.AlertRule, states []state.StateTransition, logger log.Logger) []annotations.Item { items := make([]annotations.Item, 0, len(states)) for _, state := range states { + if !shouldAnnotate(state) { + continue + } logger.Debug("Alert state changed creating annotation", "newState", state.Formatted(), "oldState", state.PreviousFormatted()) annotationText, annotationData := buildAnnotationTextAndData(rule, state.State) @@ -154,3 +158,11 @@ func removePrivateLabels(labels data.Labels) data.Labels { } return result } + +func shouldAnnotate(transition state.StateTransition) bool { + // Do not log not transitioned states normal states if it was marked as stale + if !transition.Changed() || transition.StateReason == ngmodels.StateReasonMissingSeries && transition.PreviousState == eval.Normal && transition.State.State == eval.Normal { + return false + } + return true +} diff --git a/pkg/services/ngalert/state/historian/annotation_test.go b/pkg/services/ngalert/state/historian/annotation_test.go new file mode 100644 index 00000000000..eb008035c3c --- /dev/null +++ b/pkg/services/ngalert/state/historian/annotation_test.go @@ -0,0 +1,100 @@ +package historian + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/grafana/grafana/pkg/services/ngalert/eval" + "github.com/grafana/grafana/pkg/services/ngalert/models" + "github.com/grafana/grafana/pkg/services/ngalert/state" +) + +func TestShouldAnnotate(t *testing.T) { + allStates := []eval.State{ + eval.Normal, + eval.Alerting, + eval.Pending, + eval.NoData, + eval.Error, + } + + type Transition struct { + State eval.State + StateReason string + PreviousState eval.State + PreviousStateReason string + } + + transition := func(from eval.State, fromReason string, to eval.State, toReason string) Transition { + return Transition{ + PreviousState: from, + PreviousStateReason: fromReason, + State: to, + StateReason: toReason, + } + } + + noTransition := func(state eval.State, stateReason string) Transition { + return transition(state, stateReason, state, stateReason) + } + + knownReasons := []string{ + "", + models.StateReasonMissingSeries, + eval.Error.String(), + eval.NoData.String(), + } + + allCombinations := make([]Transition, 0, len(allStates)*len(allStates)*len(knownReasons)*len(knownReasons)) + for _, from := range allStates { + for _, reasonFrom := range knownReasons { + for _, to := range allStates { + for _, reasonTo := range knownReasons { + allCombinations = append(allCombinations, transition(from, reasonFrom, to, reasonTo)) + } + } + } + } + + negativeTransitions := map[Transition]struct{}{ + noTransition(eval.Normal, ""): {}, + noTransition(eval.Normal, eval.Error.String()): {}, + noTransition(eval.Normal, eval.NoData.String()): {}, + noTransition(eval.Normal, models.StateReasonMissingSeries): {}, + noTransition(eval.Alerting, ""): {}, + noTransition(eval.Alerting, eval.Error.String()): {}, + noTransition(eval.Alerting, eval.NoData.String()): {}, + noTransition(eval.Alerting, models.StateReasonMissingSeries): {}, + noTransition(eval.Pending, ""): {}, + noTransition(eval.Pending, eval.Error.String()): {}, + noTransition(eval.Pending, eval.NoData.String()): {}, + noTransition(eval.Pending, models.StateReasonMissingSeries): {}, + noTransition(eval.NoData, ""): {}, + noTransition(eval.NoData, eval.Error.String()): {}, + noTransition(eval.NoData, eval.NoData.String()): {}, + noTransition(eval.NoData, models.StateReasonMissingSeries): {}, + noTransition(eval.Error, ""): {}, + noTransition(eval.Error, eval.Error.String()): {}, + noTransition(eval.Error, eval.NoData.String()): {}, + noTransition(eval.Error, models.StateReasonMissingSeries): {}, + + transition(eval.Normal, "", eval.Normal, models.StateReasonMissingSeries): {}, + transition(eval.Normal, eval.Error.String(), eval.Normal, models.StateReasonMissingSeries): {}, + transition(eval.Normal, eval.NoData.String(), eval.Normal, models.StateReasonMissingSeries): {}, + } + + for _, tc := range allCombinations { + _, ok := negativeTransitions[tc] + trans := state.StateTransition{ + State: &state.State{State: tc.State, StateReason: tc.StateReason}, + PreviousState: tc.PreviousState, + PreviousStateReason: tc.PreviousStateReason, + } + + t.Run(fmt.Sprintf("%s -> %s should be %v", trans.PreviousFormatted(), trans.Formatted(), !ok), func(t *testing.T) { + require.Equal(t, !ok, shouldAnnotate(trans)) + }) + } +} diff --git a/pkg/services/ngalert/state/manager.go b/pkg/services/ngalert/state/manager.go index 388adb81403..11e48ebc804 100644 --- a/pkg/services/ngalert/state/manager.go +++ b/pkg/services/ngalert/state/manager.go @@ -181,7 +181,10 @@ func (st *Manager) ProcessEvalResults(ctx context.Context, evaluatedAt time.Time st.saveAlertStates(ctx, logger, states...) - st.logStateTransitions(ctx, alertRule, states, staleStates) + allChanges := append(states, staleStates...) + if st.historian != nil { + st.historian.RecordStatesAsync(ctx, alertRule, allChanges) + } nextStates := make([]*State, 0, len(states)) for _, s := range states { @@ -316,26 +319,6 @@ func (st *Manager) saveAlertStates(ctx context.Context, logger log.Logger, state } } -func (st *Manager) logStateTransitions(ctx context.Context, alertRule *ngModels.AlertRule, newStates, staleStates []StateTransition) { - if st.historian == nil { - return - } - changedStates := make([]StateTransition, 0, len(staleStates)) - for _, s := range newStates { - if s.changed() { - changedStates = append(changedStates, s) - } - } - - // TODO refactor further. Let historian decide what to log. Current logic removes states `Normal (reason-X) -> Normal (reason-Y)` - for _, t := range staleStates { - if t.PreviousState == eval.Alerting { - changedStates = append(changedStates, t) - } - } - st.historian.RecordStates(ctx, alertRule, changedStates) -} - func (st *Manager) deleteAlertStates(ctx context.Context, logger log.Logger, states []StateTransition) { if st.instanceStore == nil || len(states) == 0 { return diff --git a/pkg/services/ngalert/state/persist.go b/pkg/services/ngalert/state/persist.go index 78bcd181daf..1dd8a0366ab 100644 --- a/pkg/services/ngalert/state/persist.go +++ b/pkg/services/ngalert/state/persist.go @@ -23,7 +23,7 @@ type RuleReader interface { // Historian maintains an audit log of alert state history. type Historian interface { // RecordStates writes a number of state transitions for a given rule to state history. - RecordStates(ctx context.Context, rule *models.AlertRule, states []StateTransition) + RecordStatesAsync(ctx context.Context, rule *models.AlertRule, states []StateTransition) } // ImageCapturer captures images. diff --git a/pkg/services/ngalert/state/state.go b/pkg/services/ngalert/state/state.go index 839dc014c2c..d0892db5d74 100644 --- a/pkg/services/ngalert/state/state.go +++ b/pkg/services/ngalert/state/state.go @@ -103,7 +103,7 @@ func (c StateTransition) PreviousFormatted() string { return FormatStateAndReason(c.PreviousState, c.PreviousStateReason) } -func (c StateTransition) changed() bool { +func (c StateTransition) Changed() bool { return c.PreviousState != c.State.State || c.PreviousStateReason != c.State.StateReason } diff --git a/pkg/services/ngalert/state/testing.go b/pkg/services/ngalert/state/testing.go index 80c2a565e45..d1511fc972a 100644 --- a/pkg/services/ngalert/state/testing.go +++ b/pkg/services/ngalert/state/testing.go @@ -49,7 +49,7 @@ func (f *FakeRuleReader) ListAlertRules(_ context.Context, q *models.ListAlertRu type FakeHistorian struct{} -func (f *FakeHistorian) RecordStates(ctx context.Context, rule *models.AlertRule, states []StateTransition) { +func (f *FakeHistorian) RecordStatesAsync(ctx context.Context, rule *models.AlertRule, states []StateTransition) { } // NotAvailableImageService is a service that returns ErrScreenshotsUnavailable.