From a85adeed965fe72e3e297b93fa2197d98e64a10b Mon Sep 17 00:00:00 2001
From: Yuri Tseretyan <yuriy.tseretyan@grafana.com>
Date: Tue, 6 Dec 2022 12:33:15 -0500
Subject: [PATCH] Alerting: Update state history service to filter states
 transitions (#58863)

* rename the method to better reflect its behavior
* make historian filter transition on itself
* call historian with all changes
---
 .../ngalert/state/historian/annotation.go     |  14 ++-
 .../state/historian/annotation_test.go        | 100 ++++++++++++++++++
 pkg/services/ngalert/state/manager.go         |  25 +----
 pkg/services/ngalert/state/persist.go         |   2 +-
 pkg/services/ngalert/state/state.go           |   2 +-
 pkg/services/ngalert/state/testing.go         |   2 +-
 6 files changed, 120 insertions(+), 25 deletions(-)
 create mode 100644 pkg/services/ngalert/state/historian/annotation_test.go

diff --git a/pkg/services/ngalert/state/historian/annotation.go b/pkg/services/ngalert/state/historian/annotation.go
index c94805178af..9687b463592 100644
--- a/pkg/services/ngalert/state/historian/annotation.go
+++ b/pkg/services/ngalert/state/historian/annotation.go
@@ -9,6 +9,7 @@ import (
 	"time"
 
 	"github.com/grafana/grafana-plugin-sdk-go/data"
+
 	"github.com/grafana/grafana/pkg/components/simplejson"
 	"github.com/grafana/grafana/pkg/infra/log"
 	"github.com/grafana/grafana/pkg/services/annotations"
@@ -34,7 +35,7 @@ func NewAnnotationHistorian(annotations annotations.Repository, dashboards dashb
 }
 
 // RecordStates writes a number of state transitions for a given rule to state history.
-func (h *AnnotationStateHistorian) RecordStates(ctx context.Context, rule *ngmodels.AlertRule, states []state.StateTransition) {
+func (h *AnnotationStateHistorian) RecordStatesAsync(ctx context.Context, rule *ngmodels.AlertRule, states []state.StateTransition) {
 	logger := h.log.FromContext(ctx)
 	// Build annotations before starting goroutine, to make sure all data is copied and won't mutate underneath us.
 	annotations := h.buildAnnotations(rule, states, logger)
@@ -45,6 +46,9 @@ func (h *AnnotationStateHistorian) RecordStates(ctx context.Context, rule *ngmod
 func (h *AnnotationStateHistorian) buildAnnotations(rule *ngmodels.AlertRule, states []state.StateTransition, logger log.Logger) []annotations.Item {
 	items := make([]annotations.Item, 0, len(states))
 	for _, state := range states {
+		if !shouldAnnotate(state) {
+			continue
+		}
 		logger.Debug("Alert state changed creating annotation", "newState", state.Formatted(), "oldState", state.PreviousFormatted())
 
 		annotationText, annotationData := buildAnnotationTextAndData(rule, state.State)
@@ -154,3 +158,11 @@ func removePrivateLabels(labels data.Labels) data.Labels {
 	}
 	return result
 }
+
+func shouldAnnotate(transition state.StateTransition) bool {
+	// Do not log not transitioned states normal states if it was marked as stale
+	if !transition.Changed() || transition.StateReason == ngmodels.StateReasonMissingSeries && transition.PreviousState == eval.Normal && transition.State.State == eval.Normal {
+		return false
+	}
+	return true
+}
diff --git a/pkg/services/ngalert/state/historian/annotation_test.go b/pkg/services/ngalert/state/historian/annotation_test.go
new file mode 100644
index 00000000000..eb008035c3c
--- /dev/null
+++ b/pkg/services/ngalert/state/historian/annotation_test.go
@@ -0,0 +1,100 @@
+package historian
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/grafana/grafana/pkg/services/ngalert/eval"
+	"github.com/grafana/grafana/pkg/services/ngalert/models"
+	"github.com/grafana/grafana/pkg/services/ngalert/state"
+)
+
+func TestShouldAnnotate(t *testing.T) {
+	allStates := []eval.State{
+		eval.Normal,
+		eval.Alerting,
+		eval.Pending,
+		eval.NoData,
+		eval.Error,
+	}
+
+	type Transition struct {
+		State               eval.State
+		StateReason         string
+		PreviousState       eval.State
+		PreviousStateReason string
+	}
+
+	transition := func(from eval.State, fromReason string, to eval.State, toReason string) Transition {
+		return Transition{
+			PreviousState:       from,
+			PreviousStateReason: fromReason,
+			State:               to,
+			StateReason:         toReason,
+		}
+	}
+
+	noTransition := func(state eval.State, stateReason string) Transition {
+		return transition(state, stateReason, state, stateReason)
+	}
+
+	knownReasons := []string{
+		"",
+		models.StateReasonMissingSeries,
+		eval.Error.String(),
+		eval.NoData.String(),
+	}
+
+	allCombinations := make([]Transition, 0, len(allStates)*len(allStates)*len(knownReasons)*len(knownReasons))
+	for _, from := range allStates {
+		for _, reasonFrom := range knownReasons {
+			for _, to := range allStates {
+				for _, reasonTo := range knownReasons {
+					allCombinations = append(allCombinations, transition(from, reasonFrom, to, reasonTo))
+				}
+			}
+		}
+	}
+
+	negativeTransitions := map[Transition]struct{}{
+		noTransition(eval.Normal, ""):                                {},
+		noTransition(eval.Normal, eval.Error.String()):               {},
+		noTransition(eval.Normal, eval.NoData.String()):              {},
+		noTransition(eval.Normal, models.StateReasonMissingSeries):   {},
+		noTransition(eval.Alerting, ""):                              {},
+		noTransition(eval.Alerting, eval.Error.String()):             {},
+		noTransition(eval.Alerting, eval.NoData.String()):            {},
+		noTransition(eval.Alerting, models.StateReasonMissingSeries): {},
+		noTransition(eval.Pending, ""):                               {},
+		noTransition(eval.Pending, eval.Error.String()):              {},
+		noTransition(eval.Pending, eval.NoData.String()):             {},
+		noTransition(eval.Pending, models.StateReasonMissingSeries):  {},
+		noTransition(eval.NoData, ""):                                {},
+		noTransition(eval.NoData, eval.Error.String()):               {},
+		noTransition(eval.NoData, eval.NoData.String()):              {},
+		noTransition(eval.NoData, models.StateReasonMissingSeries):   {},
+		noTransition(eval.Error, ""):                                 {},
+		noTransition(eval.Error, eval.Error.String()):                {},
+		noTransition(eval.Error, eval.NoData.String()):               {},
+		noTransition(eval.Error, models.StateReasonMissingSeries):    {},
+
+		transition(eval.Normal, "", eval.Normal, models.StateReasonMissingSeries):                   {},
+		transition(eval.Normal, eval.Error.String(), eval.Normal, models.StateReasonMissingSeries):  {},
+		transition(eval.Normal, eval.NoData.String(), eval.Normal, models.StateReasonMissingSeries): {},
+	}
+
+	for _, tc := range allCombinations {
+		_, ok := negativeTransitions[tc]
+		trans := state.StateTransition{
+			State:               &state.State{State: tc.State, StateReason: tc.StateReason},
+			PreviousState:       tc.PreviousState,
+			PreviousStateReason: tc.PreviousStateReason,
+		}
+
+		t.Run(fmt.Sprintf("%s -> %s should be %v", trans.PreviousFormatted(), trans.Formatted(), !ok), func(t *testing.T) {
+			require.Equal(t, !ok, shouldAnnotate(trans))
+		})
+	}
+}
diff --git a/pkg/services/ngalert/state/manager.go b/pkg/services/ngalert/state/manager.go
index 388adb81403..11e48ebc804 100644
--- a/pkg/services/ngalert/state/manager.go
+++ b/pkg/services/ngalert/state/manager.go
@@ -181,7 +181,10 @@ func (st *Manager) ProcessEvalResults(ctx context.Context, evaluatedAt time.Time
 
 	st.saveAlertStates(ctx, logger, states...)
 
-	st.logStateTransitions(ctx, alertRule, states, staleStates)
+	allChanges := append(states, staleStates...)
+	if st.historian != nil {
+		st.historian.RecordStatesAsync(ctx, alertRule, allChanges)
+	}
 
 	nextStates := make([]*State, 0, len(states))
 	for _, s := range states {
@@ -316,26 +319,6 @@ func (st *Manager) saveAlertStates(ctx context.Context, logger log.Logger, state
 	}
 }
 
-func (st *Manager) logStateTransitions(ctx context.Context, alertRule *ngModels.AlertRule, newStates, staleStates []StateTransition) {
-	if st.historian == nil {
-		return
-	}
-	changedStates := make([]StateTransition, 0, len(staleStates))
-	for _, s := range newStates {
-		if s.changed() {
-			changedStates = append(changedStates, s)
-		}
-	}
-
-	// TODO refactor further. Let historian decide what to log. Current logic removes states `Normal (reason-X) -> Normal (reason-Y)`
-	for _, t := range staleStates {
-		if t.PreviousState == eval.Alerting {
-			changedStates = append(changedStates, t)
-		}
-	}
-	st.historian.RecordStates(ctx, alertRule, changedStates)
-}
-
 func (st *Manager) deleteAlertStates(ctx context.Context, logger log.Logger, states []StateTransition) {
 	if st.instanceStore == nil || len(states) == 0 {
 		return
diff --git a/pkg/services/ngalert/state/persist.go b/pkg/services/ngalert/state/persist.go
index 78bcd181daf..1dd8a0366ab 100644
--- a/pkg/services/ngalert/state/persist.go
+++ b/pkg/services/ngalert/state/persist.go
@@ -23,7 +23,7 @@ type RuleReader interface {
 // Historian maintains an audit log of alert state history.
 type Historian interface {
 	// RecordStates writes a number of state transitions for a given rule to state history.
-	RecordStates(ctx context.Context, rule *models.AlertRule, states []StateTransition)
+	RecordStatesAsync(ctx context.Context, rule *models.AlertRule, states []StateTransition)
 }
 
 // ImageCapturer captures images.
diff --git a/pkg/services/ngalert/state/state.go b/pkg/services/ngalert/state/state.go
index 839dc014c2c..d0892db5d74 100644
--- a/pkg/services/ngalert/state/state.go
+++ b/pkg/services/ngalert/state/state.go
@@ -103,7 +103,7 @@ func (c StateTransition) PreviousFormatted() string {
 	return FormatStateAndReason(c.PreviousState, c.PreviousStateReason)
 }
 
-func (c StateTransition) changed() bool {
+func (c StateTransition) Changed() bool {
 	return c.PreviousState != c.State.State || c.PreviousStateReason != c.State.StateReason
 }
 
diff --git a/pkg/services/ngalert/state/testing.go b/pkg/services/ngalert/state/testing.go
index 80c2a565e45..d1511fc972a 100644
--- a/pkg/services/ngalert/state/testing.go
+++ b/pkg/services/ngalert/state/testing.go
@@ -49,7 +49,7 @@ func (f *FakeRuleReader) ListAlertRules(_ context.Context, q *models.ListAlertRu
 
 type FakeHistorian struct{}
 
-func (f *FakeHistorian) RecordStates(ctx context.Context, rule *models.AlertRule, states []StateTransition) {
+func (f *FakeHistorian) RecordStatesAsync(ctx context.Context, rule *models.AlertRule, states []StateTransition) {
 }
 
 // NotAvailableImageService is a service that returns ErrScreenshotsUnavailable.