Alerting/Annotations: Add annotation backend for Loki alert state history (#78156)

* Move scope type vars to testutil package

* Expose parts of state historian for use in annotation backend

* Implement Loki ASH Annotation store

This store will only implement the `Get` method of a RepositoryImpl since alert state history
writes to Loki elsewhere.

* Use interface for Loki HTTP Client

* Add tests for Loki ASH Annotation store

* Add missing test

* Fix lint

* Organize tests

* Add filter tests

* Improve tests

* Move filter logic into outer function

* Fix lint

* Add comment

* Fix tests

* Fix lint

* Rename historian store + refactor

* Cleanup historian store

* Fix tests

* Minor cleanup

* Use new `ShouldRecordAnnotation` filter

* Fix logic and add tests for this check

* Fix typos, remove unused variables, `< 1` -> `== 0`

* More closely mimic RBAC filter from xorm to ensure correct logic

* Move off weaveworks client

* Address PR comments
This commit is contained in:
William Wernert
2024-01-10 18:42:35 -05:00
committed by GitHub
parent 2c09f969f1
commit 48b5ac779b
10 changed files with 1025 additions and 50 deletions

View File

@@ -173,12 +173,12 @@ func (h *AnnotationBackend) Query(ctx context.Context, query ngmodels.HistoryQue
func buildAnnotations(rule history_model.RuleMeta, states []state.StateTransition, logger log.Logger) []annotations.Item {
items := make([]annotations.Item, 0, len(states))
for _, state := range states {
if !shouldRecordAnnotation(state) {
if !ShouldRecordAnnotation(state) {
continue
}
logger.Debug("Alert state changed creating annotation", "newState", state.Formatted(), "oldState", state.PreviousFormatted())
annotationText, annotationData := buildAnnotationTextAndData(rule, state.State)
annotationText, annotationData := BuildAnnotationTextAndData(rule, state.State)
item := annotations.Item{
AlertID: rule.ID,
@@ -195,7 +195,7 @@ func buildAnnotations(rule history_model.RuleMeta, states []state.StateTransitio
return items
}
func buildAnnotationTextAndData(rule history_model.RuleMeta, currentState *state.State) (string, *simplejson.Json) {
func BuildAnnotationTextAndData(rule history_model.RuleMeta, currentState *state.State) (string, *simplejson.Json) {
jsonData := simplejson.New()
var value string

View File

@@ -34,9 +34,9 @@ func shouldRecord(transition state.StateTransition) bool {
return true
}
// shouldRecordAnnotation returns true if an annotation should be created for a given state transition.
// ShouldRecordAnnotation returns true if an annotation should be created for a given state transition.
// This is stricter than shouldRecord to avoid cluttering panels with state transitions.
func shouldRecordAnnotation(t state.StateTransition) bool {
func ShouldRecordAnnotation(t state.StateTransition) bool {
if !shouldRecord(t) {
return false
}

View File

@@ -110,8 +110,8 @@ func TestShouldRecordAnnotation(t *testing.T) {
forward := transition(eval.Normal, "", eval.Normal, models.StateReasonNoData)
backward := transition(eval.Normal, models.StateReasonNoData, eval.Normal, "")
require.False(t, shouldRecordAnnotation(forward), "Normal -> Normal(NoData) should be false")
require.False(t, shouldRecordAnnotation(backward), "Normal(NoData) -> Normal should be false")
require.False(t, ShouldRecordAnnotation(forward), "Normal -> Normal(NoData) should be false")
require.False(t, ShouldRecordAnnotation(backward), "Normal(NoData) -> Normal should be false")
})
t.Run("other Normal transitions involving NoData still recorded", func(t *testing.T) {
@@ -121,11 +121,11 @@ func TestShouldRecordAnnotation(t *testing.T) {
errorBackward := transition(eval.Normal, models.StateReasonError, eval.Normal, models.StateReasonNoData)
missingSeriesBackward := transition(eval.Normal, models.StateReasonMissingSeries, eval.Normal, models.StateReasonNoData)
require.True(t, shouldRecordAnnotation(pauseForward), "Normal(NoData) -> Normal(Paused) should be true")
require.True(t, shouldRecordAnnotation(pauseBackward), "Normal(Paused) -> Normal(NoData) should be true")
require.True(t, shouldRecordAnnotation(errorForward), "Normal(NoData) -> Normal(Error) should be true")
require.True(t, shouldRecordAnnotation(errorBackward), "Normal(Error) -> Normal(NoData) should be true")
require.True(t, shouldRecordAnnotation(missingSeriesBackward), "Normal(MissingSeries) -> Normal(NoData) should be true")
require.True(t, ShouldRecordAnnotation(pauseForward), "Normal(NoData) -> Normal(Paused) should be true")
require.True(t, ShouldRecordAnnotation(pauseBackward), "Normal(Paused) -> Normal(NoData) should be true")
require.True(t, ShouldRecordAnnotation(errorForward), "Normal(NoData) -> Normal(Error) should be true")
require.True(t, ShouldRecordAnnotation(errorBackward), "Normal(Error) -> Normal(NoData) should be true")
require.True(t, ShouldRecordAnnotation(missingSeriesBackward), "Normal(MissingSeries) -> Normal(NoData) should be true")
})
t.Run("respects filters in shouldRecord()", func(t *testing.T) {
@@ -133,19 +133,19 @@ func TestShouldRecordAnnotation(t *testing.T) {
unpause := transition(eval.Normal, models.StateReasonPaused, eval.Normal, "")
afterUpdate := transition(eval.Normal, models.StateReasonUpdated, eval.Normal, "")
require.False(t, shouldRecordAnnotation(missingSeries), "Normal -> Normal(MissingSeries) should be false")
require.False(t, shouldRecordAnnotation(unpause), "Normal(Paused) -> Normal should be false")
require.False(t, shouldRecordAnnotation(afterUpdate), "Normal(Updated) -> Normal should be false")
require.False(t, ShouldRecordAnnotation(missingSeries), "Normal -> Normal(MissingSeries) should be false")
require.False(t, ShouldRecordAnnotation(unpause), "Normal(Paused) -> Normal should be false")
require.False(t, ShouldRecordAnnotation(afterUpdate), "Normal(Updated) -> Normal should be false")
// Smoke test a few basic ones, exhaustive tests for shouldRecord() already exist elsewhere.
basicPending := transition(eval.Normal, "", eval.Pending, "")
basicAlerting := transition(eval.Pending, "", eval.Alerting, "")
basicResolve := transition(eval.Alerting, "", eval.Normal, "")
basicError := transition(eval.Normal, "", eval.Error, "")
require.True(t, shouldRecordAnnotation(basicPending), "Normal -> Pending should be true")
require.True(t, shouldRecordAnnotation(basicAlerting), "Pending -> Alerting should be true")
require.True(t, shouldRecordAnnotation(basicResolve), "Alerting -> Normal should be true")
require.True(t, shouldRecordAnnotation(basicError), "Normal -> Error should be true")
require.True(t, ShouldRecordAnnotation(basicPending), "Normal -> Pending should be true")
require.True(t, ShouldRecordAnnotation(basicAlerting), "Pending -> Alerting should be true")
require.True(t, ShouldRecordAnnotation(basicResolve), "Alerting -> Normal should be true")
require.True(t, ShouldRecordAnnotation(basicError), "Normal -> Error should be true")
})
}

View File

@@ -73,7 +73,7 @@ func (h *RemoteLokiBackend) TestConnection(ctx context.Context) error {
// Record writes a number of state transitions for a given rule to an external Loki instance.
func (h *RemoteLokiBackend) Record(ctx context.Context, rule history_model.RuleMeta, states []state.StateTransition) <-chan error {
logger := h.log.FromContext(ctx)
logStream := statesToStream(rule, states, h.externalLabels, logger)
logStream := StatesToStream(rule, states, h.externalLabels, logger)
errCh := make(chan error, 1)
if len(logStream.Values) == 0 {
@@ -112,7 +112,7 @@ func (h *RemoteLokiBackend) Record(ctx context.Context, rule history_model.RuleM
// Query retrieves state history entries from an external Loki instance and formats the results into a dataframe.
func (h *RemoteLokiBackend) Query(ctx context.Context, query models.HistoryQuery) (*data.Frame, error) {
logQL, err := buildLogQuery(query)
logQL, err := BuildLogQuery(query)
if err != nil {
return nil, err
}
@@ -200,7 +200,7 @@ func merge(res QueryRes, ruleUID string) (*data.Frame, error) {
if minElStreamIdx == -1 {
break
}
var entry lokiEntry
var entry LokiEntry
err := json.Unmarshal([]byte(minEl.V), &entry)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal entry: %w", err)
@@ -231,7 +231,7 @@ func merge(res QueryRes, ruleUID string) (*data.Frame, error) {
return frame, nil
}
func statesToStream(rule history_model.RuleMeta, states []state.StateTransition, externalLabels map[string]string, logger log.Logger) Stream {
func StatesToStream(rule history_model.RuleMeta, states []state.StateTransition, externalLabels map[string]string, logger log.Logger) Stream {
labels := mergeLabels(make(map[string]string), externalLabels)
// System-defined labels take precedence over user-defined external labels.
labels[StateHistoryLabelKey] = StateHistoryLabelValue
@@ -246,7 +246,7 @@ func statesToStream(rule history_model.RuleMeta, states []state.StateTransition,
}
sanitizedLabels := removePrivateLabels(state.Labels)
entry := lokiEntry{
entry := LokiEntry{
SchemaVersion: 1,
Previous: state.PreviousFormatted(),
Current: state.Formatted(),
@@ -292,7 +292,7 @@ func (h *RemoteLokiBackend) recordStreams(ctx context.Context, streams []Stream,
return nil
}
type lokiEntry struct {
type LokiEntry struct {
SchemaVersion int `json:"schemaVersion"`
Previous string `json:"previous"`
Current string `json:"current"`
@@ -322,7 +322,7 @@ func jsonifyRow(line string) (json.RawMessage, error) {
// Ser/deser to validate the contents of the log line before shipping it forward.
// TODO: We may want to remove this in the future, as we already have the value in the form of a []byte, and json.RawMessage is also a []byte.
// TODO: Though, if the log line does not contain valid JSON, this can cause problems later on when rendering the dataframe.
var entry lokiEntry
var entry LokiEntry
if err := json.Unmarshal([]byte(line), &entry); err != nil {
return nil, err
}
@@ -366,7 +366,7 @@ func isValidOperator(op string) bool {
return false
}
func buildLogQuery(query models.HistoryQuery) (string, error) {
func BuildLogQuery(query models.HistoryQuery) (string, error) {
selectors, err := buildSelectors(query)
if err != nil {
return "", fmt.Errorf("failed to build the provided selectors: %w", err)

View File

@@ -31,7 +31,7 @@ func TestRemoteLokiBackend(t *testing.T) {
l := log.NewNopLogger()
states := singleFromNormal(&state.State{State: eval.Normal})
res := statesToStream(rule, states, nil, l)
res := StatesToStream(rule, states, nil, l)
require.Empty(t, res.Values)
})
@@ -41,7 +41,7 @@ func TestRemoteLokiBackend(t *testing.T) {
l := log.NewNopLogger()
states := singleFromNormal(&state.State{State: eval.Error, Error: fmt.Errorf("oh no")})
res := statesToStream(rule, states, nil, l)
res := StatesToStream(rule, states, nil, l)
entry := requireSingleEntry(t, res)
require.Contains(t, entry.Error, "oh no")
@@ -52,7 +52,7 @@ func TestRemoteLokiBackend(t *testing.T) {
l := log.NewNopLogger()
states := singleFromNormal(&state.State{State: eval.NoData})
res := statesToStream(rule, states, nil, l)
res := StatesToStream(rule, states, nil, l)
_ = requireSingleEntry(t, res)
})
@@ -65,7 +65,7 @@ func TestRemoteLokiBackend(t *testing.T) {
Labels: data.Labels{"a": "b"},
})
res := statesToStream(rule, states, nil, l)
res := StatesToStream(rule, states, nil, l)
exp := map[string]string{
StateHistoryLabelKey: StateHistoryLabelValue,
@@ -84,7 +84,7 @@ func TestRemoteLokiBackend(t *testing.T) {
Labels: data.Labels{"__private__": "b"},
})
res := statesToStream(rule, states, nil, l)
res := StatesToStream(rule, states, nil, l)
require.NotContains(t, res.Stream, "__private__")
})
@@ -97,7 +97,8 @@ func TestRemoteLokiBackend(t *testing.T) {
Labels: data.Labels{"a": "b"},
})
res := statesToStream(rule, states, nil, l)
res := StatesToStream(rule, states, nil, l)
entry := requireSingleEntry(t, res)
require.Equal(t, rule.Title, entry.RuleTitle)
@@ -113,7 +114,7 @@ func TestRemoteLokiBackend(t *testing.T) {
Labels: data.Labels{"statelabel": "labelvalue"},
})
res := statesToStream(rule, states, nil, l)
res := StatesToStream(rule, states, nil, l)
entry := requireSingleEntry(t, res)
require.Contains(t, entry.InstanceLabels, "statelabel")
@@ -131,7 +132,7 @@ func TestRemoteLokiBackend(t *testing.T) {
},
})
res := statesToStream(rule, states, nil, l)
res := StatesToStream(rule, states, nil, l)
entry := requireSingleEntry(t, res)
require.Len(t, entry.InstanceLabels, 3)
@@ -145,7 +146,7 @@ func TestRemoteLokiBackend(t *testing.T) {
Values: map[string]float64{"A": 2.0, "B": 5.5},
})
res := statesToStream(rule, states, nil, l)
res := StatesToStream(rule, states, nil, l)
entry := requireSingleEntry(t, res)
require.NotNil(t, entry.Values)
@@ -164,7 +165,7 @@ func TestRemoteLokiBackend(t *testing.T) {
Labels: data.Labels{"a": "b"},
})
res := statesToStream(rule, states, nil, l)
res := StatesToStream(rule, states, nil, l)
entry := requireSingleEntry(t, res)
require.Equal(t, rule.Condition, entry.Condition)
@@ -182,7 +183,7 @@ func TestRemoteLokiBackend(t *testing.T) {
},
})
res := statesToStream(rule, states, nil, l)
res := StatesToStream(rule, states, nil, l)
entry := requireSingleEntry(t, res)
exp := labelFingerprint(states[0].Labels)
@@ -281,7 +282,7 @@ func TestRemoteLokiBackend(t *testing.T) {
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
res, err := buildLogQuery(tc.query)
res, err := BuildLogQuery(tc.query)
require.NoError(t, err)
require.Equal(t, tc.exp, res)
})
@@ -537,15 +538,15 @@ func createTestRule() history_model.RuleMeta {
}
}
func requireSingleEntry(t *testing.T, res Stream) lokiEntry {
func requireSingleEntry(t *testing.T, res Stream) LokiEntry {
require.Len(t, res.Values, 1)
return requireEntry(t, res.Values[0])
}
func requireEntry(t *testing.T, row Sample) lokiEntry {
func requireEntry(t *testing.T, row Sample) LokiEntry {
t.Helper()
var entry lokiEntry
var entry LokiEntry
err := json.Unmarshal([]byte(row.V), &entry)
require.NoError(t, err)
return entry

View File

@@ -482,6 +482,27 @@ func FormatStateAndReason(state eval.State, reason string) string {
return s
}
// ParseFormattedState parses a state string in the format "state (reason)"
// and returns the state and reason separately.
func ParseFormattedState(stateStr string) (eval.State, string, error) {
split := strings.Split(stateStr, " ")
if len(split) == 0 {
return -1, "", errors.New("invalid state format")
}
state, err := eval.ParseStateString(split[0])
if err != nil {
return -1, "", err
}
var reason string
if len(split) > 1 {
reason = strings.Trim(split[1], "()")
}
return state, reason, nil
}
// GetRuleExtraLabels returns a map of built-in labels that should be added to an alert before it is sent to the Alertmanager or its state is cached.
func GetRuleExtraLabels(rule *models.AlertRule, folderTitle string, includeFolder bool) map[string]string {
extraLabels := make(map[string]string, 4)

View File

@@ -666,3 +666,26 @@ func TestTakeImage(t *testing.T) {
assert.Equal(t, ngmodels.Image{Path: "foo.png"}, *image)
})
}
func TestParseFormattedState(t *testing.T) {
t.Run("should parse formatted state", func(t *testing.T) {
stateStr := "Normal (MissingSeries)"
s, reason, err := ParseFormattedState(stateStr)
require.NoError(t, err)
require.Equal(t, eval.Normal, s)
require.Equal(t, ngmodels.StateReasonMissingSeries, reason)
})
t.Run("should error on empty string", func(t *testing.T) {
stateStr := ""
_, _, err := ParseFormattedState(stateStr)
require.Error(t, err)
})
t.Run("should error on invalid string content", func(t *testing.T) {
stateStr := "NotAState"
_, _, err := ParseFormattedState(stateStr)
require.Error(t, err)
})
}