Alerting: Move annotation functionality behind a history persistence interface (#56133)

* Move annotation functionality behind a history persistence interface

* Rename to RecordState

* Fix lint error in import aliasing

* One more import linter error
This commit is contained in:
Alexander Weaver
2022-10-05 15:32:20 -05:00
committed by GitHub
parent b3087cfcd1
commit 8df830557a
9 changed files with 134 additions and 97 deletions

View File

@@ -29,6 +29,7 @@ import (
"github.com/grafana/grafana/pkg/services/ngalert/schedule"
"github.com/grafana/grafana/pkg/services/ngalert/sender"
"github.com/grafana/grafana/pkg/services/ngalert/state"
"github.com/grafana/grafana/pkg/services/ngalert/state/historian"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/services/notifications"
"github.com/grafana/grafana/pkg/services/quota"
@@ -167,7 +168,8 @@ func (ng *AlertNG) init() error {
AlertSender: alertsRouter,
}
stateManager := state.NewManager(ng.Log, ng.Metrics.GetStateMetrics(), appUrl, store, store, ng.dashboardService, ng.imageService, clk, ng.annotationsRepo)
historian := historian.NewAnnotationHistorian(ng.annotationsRepo, ng.dashboardService, ng.Log)
stateManager := state.NewManager(ng.Log, ng.Metrics.GetStateMetrics(), appUrl, store, store, ng.imageService, clk, historian)
scheduler := schedule.NewScheduler(schedCfg, appUrl, stateManager)
// if it is required to include folder title to the alerts, we need to subscribe to changes of alert title

View File

@@ -19,8 +19,6 @@ import (
"github.com/stretchr/testify/require"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/annotations/annotationstest"
"github.com/grafana/grafana/pkg/services/dashboards"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/image"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
@@ -112,7 +110,7 @@ func TestWarmStateCache(t *testing.T) {
RuleStore: dbstore,
Metrics: testMetrics.GetSchedulerMetrics(),
}
st := state.NewManager(schedCfg.Logger, testMetrics.GetStateMetrics(), nil, dbstore, dbstore, &dashboards.FakeDashboardService{}, &image.NoopImageService{}, clock.NewMock(), annotationstest.NewFakeAnnotationsRepo())
st := state.NewManager(schedCfg.Logger, testMetrics.GetStateMetrics(), nil, dbstore, dbstore, &image.NoopImageService{}, clock.NewMock(), &state.FakeHistorian{})
st.Warm(ctx)
t.Run("instance cache has expected entries", func(t *testing.T) {
@@ -165,7 +163,7 @@ func TestAlertingTicker(t *testing.T) {
Metrics: testMetrics.GetSchedulerMetrics(),
AlertSender: notifier,
}
st := state.NewManager(schedCfg.Logger, testMetrics.GetStateMetrics(), nil, dbstore, dbstore, &dashboards.FakeDashboardService{}, &image.NoopImageService{}, clock.NewMock(), annotationstest.NewFakeAnnotationsRepo())
st := state.NewManager(schedCfg.Logger, testMetrics.GetStateMetrics(), nil, dbstore, dbstore, &image.NoopImageService{}, clock.NewMock(), &state.FakeHistorian{})
appUrl := &url.URL{
Scheme: "http",
Host: "localhost",

View File

@@ -21,8 +21,6 @@ import (
"github.com/grafana/grafana/pkg/expr"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/annotations/annotationstest"
"github.com/grafana/grafana/pkg/services/dashboards"
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/image"
@@ -530,7 +528,7 @@ func setupScheduler(t *testing.T, rs *fakeRulesStore, is *state.FakeInstanceStor
}
stateRs := state.FakeRuleReader{}
st := state.NewManager(schedCfg.Logger, m.GetStateMetrics(), nil, &stateRs, is, &dashboards.FakeDashboardService{}, &image.NoopImageService{}, mockedClock, annotationstest.NewFakeAnnotationsRepo())
st := state.NewManager(schedCfg.Logger, m.GetStateMetrics(), nil, &stateRs, is, &image.NoopImageService{}, mockedClock, &state.FakeHistorian{})
return NewScheduler(schedCfg, appUrl, st)
}

View File

@@ -0,0 +1,88 @@
package historian
import (
"context"
"fmt"
"strconv"
"strings"
"time"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/models"
"github.com/grafana/grafana/pkg/services/annotations"
"github.com/grafana/grafana/pkg/services/dashboards"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/state"
)
// AnnotationStateHistorian is an implementation of state.Historian that uses Grafana Annotations as the backing datastore.
type AnnotationStateHistorian struct {
annotations annotations.Repository
dashboards dashboards.DashboardService
log log.Logger
}
func NewAnnotationHistorian(annotations annotations.Repository, dashboards dashboards.DashboardService, log log.Logger) *AnnotationStateHistorian {
return &AnnotationStateHistorian{
annotations: annotations,
dashboards: dashboards,
log: log,
}
}
func (h *AnnotationStateHistorian) RecordState(ctx context.Context, rule *ngmodels.AlertRule, labels data.Labels, evaluatedAt time.Time, currentData, previousData state.InstanceStateAndReason) {
h.log.Debug("alert state changed creating annotation", "alertRuleUID", rule.UID, "newState", currentData.String(), "oldState", previousData.String())
labels = removePrivateLabels(labels)
annotationText := fmt.Sprintf("%s {%s} - %s", rule.Title, labels.String(), currentData.String())
item := &annotations.Item{
AlertId: rule.ID,
OrgId: rule.OrgID,
PrevState: previousData.String(),
NewState: currentData.String(),
Text: annotationText,
Epoch: evaluatedAt.UnixNano() / int64(time.Millisecond),
}
dashUid, ok := rule.Annotations[ngmodels.DashboardUIDAnnotation]
if ok {
panelUid := rule.Annotations[ngmodels.PanelIDAnnotation]
panelId, err := strconv.ParseInt(panelUid, 10, 64)
if err != nil {
h.log.Error("error parsing panelUID for alert annotation", "panelUID", panelUid, "alertRuleUID", rule.UID, "err", err.Error())
return
}
query := &models.GetDashboardQuery{
Uid: dashUid,
OrgId: rule.OrgID,
}
err = h.dashboards.GetDashboard(ctx, query)
if err != nil {
h.log.Error("error getting dashboard for alert annotation", "dashboardUID", dashUid, "alertRuleUID", rule.UID, "err", err.Error())
return
}
item.PanelId = panelId
item.DashboardId = query.Result.Id
}
if err := h.annotations.Save(ctx, item); err != nil {
h.log.Error("error saving alert annotation", "alertRuleUID", rule.UID, "err", err.Error())
return
}
}
func removePrivateLabels(labels data.Labels) data.Labels {
result := make(data.Labels)
for k, v := range labels {
if !strings.HasPrefix(k, "__") && !strings.HasSuffix(k, "__") {
result[k] = v
}
}
return result
}

View File

@@ -5,17 +5,12 @@ import (
"errors"
"fmt"
"net/url"
"strconv"
"strings"
"time"
"github.com/benbjohnson/clock"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/models"
"github.com/grafana/grafana/pkg/services/annotations"
"github.com/grafana/grafana/pkg/services/dashboards"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/image"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
@@ -41,28 +36,25 @@ type Manager struct {
quit chan struct{}
ResendDelay time.Duration
ruleStore RuleReader
instanceStore InstanceStore
dashboardService dashboards.DashboardService
imageService image.ImageService
AnnotationsRepo annotations.Repository
ruleStore RuleReader
instanceStore InstanceStore
imageService image.ImageService
historian Historian
}
func NewManager(logger log.Logger, metrics *metrics.State, externalURL *url.URL,
ruleStore RuleReader, instanceStore InstanceStore,
dashboardService dashboards.DashboardService, imageService image.ImageService, clock clock.Clock, annotationsRepo annotations.Repository) *Manager {
ruleStore RuleReader, instanceStore InstanceStore, imageService image.ImageService, clock clock.Clock, historian Historian) *Manager {
manager := &Manager{
cache: newCache(logger, metrics, externalURL),
quit: make(chan struct{}),
ResendDelay: ResendDelay, // TODO: make this configurable
log: logger,
metrics: metrics,
ruleStore: ruleStore,
instanceStore: instanceStore,
dashboardService: dashboardService,
imageService: imageService,
clock: clock,
AnnotationsRepo: annotationsRepo,
cache: newCache(logger, metrics, externalURL),
quit: make(chan struct{}),
ResendDelay: ResendDelay, // TODO: make this configurable
log: logger,
metrics: metrics,
ruleStore: ruleStore,
instanceStore: instanceStore,
imageService: imageService,
historian: historian,
clock: clock,
}
go manager.recordMetrics()
return manager
@@ -281,7 +273,7 @@ func (st *Manager) setNextState(ctx context.Context, alertRule *ngModels.AlertRu
shouldUpdateAnnotation := oldState != currentState.State || oldReason != currentState.StateReason
if shouldUpdateAnnotation {
go st.annotateState(ctx, alertRule, currentState.Labels, result.EvaluatedAt, InstanceStateAndReason{State: currentState.State, Reason: currentState.StateReason}, InstanceStateAndReason{State: oldState, Reason: oldReason})
go st.historian.RecordState(ctx, alertRule, currentState.Labels, result.EvaluatedAt, InstanceStateAndReason{State: currentState.State, Reason: currentState.StateReason}, InstanceStateAndReason{State: oldState, Reason: oldReason})
}
return currentState
}
@@ -358,52 +350,6 @@ func (i InstanceStateAndReason) String() string {
return s
}
func (st *Manager) annotateState(ctx context.Context, alertRule *ngModels.AlertRule, labels data.Labels, evaluatedAt time.Time, currentData, previousData InstanceStateAndReason) {
st.log.Debug("alert state changed creating annotation", "alertRuleUID", alertRule.UID, "newState", currentData.String(), "oldState", previousData.String())
labels = removePrivateLabels(labels)
annotationText := fmt.Sprintf("%s {%s} - %s", alertRule.Title, labels.String(), currentData.String())
item := &annotations.Item{
AlertId: alertRule.ID,
OrgId: alertRule.OrgID,
PrevState: previousData.String(),
NewState: currentData.String(),
Text: annotationText,
Epoch: evaluatedAt.UnixNano() / int64(time.Millisecond),
}
dashUid, ok := alertRule.Annotations[ngModels.DashboardUIDAnnotation]
if ok {
panelUid := alertRule.Annotations[ngModels.PanelIDAnnotation]
panelId, err := strconv.ParseInt(panelUid, 10, 64)
if err != nil {
st.log.Error("error parsing panelUID for alert annotation", "panelUID", panelUid, "alertRuleUID", alertRule.UID, "err", err.Error())
return
}
query := &models.GetDashboardQuery{
Uid: dashUid,
OrgId: alertRule.OrgID,
}
err = st.dashboardService.GetDashboard(ctx, query)
if err != nil {
st.log.Error("error getting dashboard for alert annotation", "dashboardUID", dashUid, "alertRuleUID", alertRule.UID, "err", err.Error())
return
}
item.PanelId = panelId
item.DashboardId = query.Result.Id
}
if err := st.AnnotationsRepo.Save(ctx, item); err != nil {
st.log.Error("error saving alert annotation", "alertRuleUID", alertRule.UID, "err", err.Error())
return
}
}
func (st *Manager) staleResultsHandler(ctx context.Context, evaluatedAt time.Time, alertRule *ngModels.AlertRule, states map[string]*State) []*State {
var resolvedStates []*State
allStates := st.GetStatesForRuleUID(alertRule.OrgID, alertRule.UID)
@@ -428,7 +374,7 @@ func (st *Manager) staleResultsHandler(ctx context.Context, evaluatedAt time.Tim
s.StateReason = ngModels.StateReasonMissingSeries
s.EndsAt = evaluatedAt
s.Resolved = true
st.annotateState(ctx, alertRule, s.Labels, evaluatedAt,
st.historian.RecordState(ctx, alertRule, s.Labels, evaluatedAt,
InstanceStateAndReason{State: eval.Normal, Reason: s.StateReason},
previousState,
)
@@ -442,13 +388,3 @@ func (st *Manager) staleResultsHandler(ctx context.Context, evaluatedAt time.Tim
func isItStale(evaluatedAt time.Time, lastEval time.Time, intervalSeconds int64) bool {
return !lastEval.Add(2 * time.Duration(intervalSeconds) * time.Second).After(evaluatedAt)
}
func removePrivateLabels(labels data.Labels) data.Labels {
result := make(data.Labels)
for k, v := range labels {
if !strings.HasPrefix(k, "__") && !strings.HasSuffix(k, "__") {
result[k] = v
}
}
return result
}

View File

@@ -12,8 +12,6 @@ import (
"github.com/benbjohnson/clock"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/annotations/annotationstest"
"github.com/grafana/grafana/pkg/services/dashboards"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
@@ -95,7 +93,7 @@ func Test_maybeNewImage(t *testing.T) {
imageService := &CountingImageService{}
mgr := NewManager(log.NewNopLogger(), &metrics.State{}, nil,
&FakeRuleReader{}, &FakeInstanceStore{},
&dashboards.FakeDashboardService{}, imageService, clock.NewMock(), annotationstest.NewFakeAnnotationsRepo())
imageService, clock.NewMock(), &FakeHistorian{})
err := mgr.maybeTakeScreenshot(context.Background(), &ngmodels.AlertRule{}, test.state, test.oldState)
require.NoError(t, err)
if !test.shouldScreenshot {

View File

@@ -25,6 +25,7 @@ import (
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/state"
"github.com/grafana/grafana/pkg/services/ngalert/state/historian"
"github.com/grafana/grafana/pkg/services/ngalert/tests"
)
@@ -38,7 +39,8 @@ func TestDashboardAnnotations(t *testing.T) {
_, dbstore := tests.SetupTestEnv(t, 1)
fakeAnnoRepo := annotationstest.NewFakeAnnotationsRepo()
st := state.NewManager(log.New("test_stale_results_handler"), testMetrics.GetStateMetrics(), nil, dbstore, dbstore, &dashboards.FakeDashboardService{}, &image.NoopImageService{}, clock.New(), fakeAnnoRepo)
hist := historian.NewAnnotationHistorian(fakeAnnoRepo, &dashboards.FakeDashboardService{}, log.NewNopLogger())
st := state.NewManager(log.New("test_stale_results_handler"), testMetrics.GetStateMetrics(), nil, dbstore, dbstore, &image.NoopImageService{}, clock.New(), hist)
const mainOrgID int64 = 1
@@ -1980,7 +1982,8 @@ func TestProcessEvalResults(t *testing.T) {
for _, tc := range testCases {
fakeAnnoRepo := annotationstest.NewFakeAnnotationsRepo()
st := state.NewManager(log.New("test_state_manager"), testMetrics.GetStateMetrics(), nil, nil, &state.FakeInstanceStore{}, &dashboards.FakeDashboardService{}, &image.NotAvailableImageService{}, clock.New(), fakeAnnoRepo)
hist := historian.NewAnnotationHistorian(fakeAnnoRepo, &dashboards.FakeDashboardService{}, log.NewNopLogger())
st := state.NewManager(log.New("test_state_manager"), testMetrics.GetStateMetrics(), nil, nil, &state.FakeInstanceStore{}, &image.NotAvailableImageService{}, clock.New(), hist)
t.Run(tc.desc, func(t *testing.T) {
for _, res := range tc.evalResults {
_ = st.ProcessEvalResults(context.Background(), evaluationTime, tc.alertRule, res, data.Labels{
@@ -2008,7 +2011,7 @@ func TestProcessEvalResults(t *testing.T) {
t.Run("should save state to database", func(t *testing.T) {
instanceStore := &state.FakeInstanceStore{}
clk := clock.New()
st := state.NewManager(log.New("test_state_manager"), testMetrics.GetStateMetrics(), nil, nil, instanceStore, &dashboards.FakeDashboardService{}, &image.NotAvailableImageService{}, clk, annotationstest.NewFakeAnnotationsRepo())
st := state.NewManager(log.New("test_state_manager"), testMetrics.GetStateMetrics(), nil, nil, instanceStore, &image.NotAvailableImageService{}, clk, &state.FakeHistorian{})
rule := models.AlertRuleGen()()
var results = eval.GenerateResults(rand.Intn(4)+1, eval.ResultGen(eval.WithEvaluatedAt(clk.Now())))
@@ -2125,7 +2128,7 @@ func TestStaleResultsHandler(t *testing.T) {
for _, tc := range testCases {
ctx := context.Background()
st := state.NewManager(log.New("test_stale_results_handler"), testMetrics.GetStateMetrics(), nil, dbstore, dbstore, &dashboards.FakeDashboardService{}, &image.NoopImageService{}, clock.New(), annotationstest.NewFakeAnnotationsRepo())
st := state.NewManager(log.New("test_stale_results_handler"), testMetrics.GetStateMetrics(), nil, dbstore, dbstore, &image.NoopImageService{}, clock.New(), &state.FakeHistorian{})
st.Warm(ctx)
existingStatesForRule := st.GetStatesForRuleUID(rule.OrgID, rule.UID)
@@ -2188,7 +2191,7 @@ func TestStaleResults(t *testing.T) {
clk := clock.NewMock()
clk.Set(time.Now())
st := state.NewManager(log.New("test_stale_results_handler"), testMetrics.GetStateMetrics(), nil, dbstore, dbstore, &dashboards.FakeDashboardService{}, &image.NoopImageService{}, clk, annotationstest.NewFakeAnnotationsRepo())
st := state.NewManager(log.New("test_stale_results_handler"), testMetrics.GetStateMetrics(), nil, dbstore, dbstore, &image.NoopImageService{}, clk, &state.FakeHistorian{})
orgID := rand.Int63()
rule := tests.CreateTestAlertRule(t, ctx, dbstore, 10, orgID)

View File

@@ -2,7 +2,9 @@ package state
import (
"context"
"time"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/grafana/grafana/pkg/services/ngalert/models"
)
@@ -19,3 +21,8 @@ type InstanceStore interface {
type RuleReader interface {
ListAlertRules(ctx context.Context, query *models.ListAlertRulesQuery) error
}
// Historian maintains an audit log of alert state history.
type Historian interface {
RecordState(ctx context.Context, rule *models.AlertRule, labels data.Labels, evaluatedAt time.Time, currentData, previousData InstanceStateAndReason)
}

View File

@@ -3,7 +3,9 @@ package state
import (
"context"
"sync"
"time"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/grafana/grafana/pkg/services/ngalert/models"
)
@@ -41,3 +43,8 @@ type FakeRuleReader struct{}
func (f *FakeRuleReader) ListAlertRules(_ context.Context, q *models.ListAlertRulesQuery) error {
return nil
}
type FakeHistorian struct{}
func (f *FakeHistorian) RecordState(ctx context.Context, rule *models.AlertRule, labels data.Labels, evaluatedAt time.Time, currentData, previousData InstanceStateAndReason) {
}