Dparrott/labels on alert rule (#33057)

* move state tracker tests to /tests

* set default labels on alerts

* handle empty labels in result.Instance

* create annotation on transition to alerting state
This commit is contained in:
David Parrott 2021-04-16 06:11:40 -07:00 committed by GitHub
parent 8b843eb0a6
commit 555da77527
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 76 additions and 64 deletions

View File

@ -14,7 +14,7 @@ func FromAlertStateToPostableAlerts(firingStates []state.AlertState) []*notifier
if alertState.State == eval.Alerting {
alerts = append(alerts, &notifier.PostableAlert{
PostableAlert: models.PostableAlert{
Annotations: models.LabelSet{}, //TODO: add annotations to evaluation results, add them to the alertState struct, and then set them before sending to the notifier
Annotations: alertState.Annotations,
StartsAt: strfmt.DateTime(alertState.StartsAt),
EndsAt: strfmt.DateTime(alertState.EndsAt),
Alert: models.Alert{

View File

@ -80,7 +80,7 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key models.AlertRul
return err
}
processedStates := stateTracker.ProcessEvalResults(key.UID, results, condition)
processedStates := stateTracker.ProcessEvalResults(alertRule, results)
sch.saveAlertStates(processedStates)
alerts := FromAlertStateToPostableAlerts(processedStates)
sch.log.Debug("sending alerts to notifier", "count", len(alerts))

View File

@ -22,6 +22,7 @@ type AlertState struct {
StartsAt time.Time
EndsAt time.Time
LastEvaluationTime time.Time
Annotations map[string]string
}
type StateEvaluation struct {
@ -53,22 +54,36 @@ func NewStateTracker(logger log.Logger) *StateTracker {
return tracker
}
func (st *StateTracker) getOrCreate(uid string, orgId int64, result eval.Result) AlertState {
func (st *StateTracker) getOrCreate(alertRule *ngModels.AlertRule, result eval.Result) AlertState {
st.stateCache.mu.Lock()
defer st.stateCache.mu.Unlock()
lbs := data.Labels{}
if len(result.Instance) > 0 {
lbs = result.Instance
}
lbs["__alert_rule_uid__"] = alertRule.UID
lbs["__alert_rule_namespace_uid__"] = alertRule.NamespaceUID
lbs["__alert_rule_title__"] = alertRule.Title
idString := fmt.Sprintf("%s %s", uid, map[string]string(result.Instance))
annotations := map[string]string{}
if len(alertRule.Annotations) > 0 {
annotations = alertRule.Annotations
}
idString := fmt.Sprintf("%s", map[string]string(lbs))
if state, ok := st.stateCache.cacheMap[idString]; ok {
return state
}
st.Log.Debug("adding new alert state cache entry", "cacheId", idString, "state", result.State.String(), "evaluatedAt", result.EvaluatedAt.String())
newState := AlertState{
UID: uid,
OrgID: orgId,
CacheId: idString,
Labels: result.Instance,
State: result.State,
Results: []StateEvaluation{},
UID: alertRule.UID,
OrgID: alertRule.OrgID,
CacheId: idString,
Labels: lbs,
State: result.State,
Results: []StateEvaluation{},
Annotations: annotations,
}
if result.State == eval.Alerting {
newState.StartsAt = result.EvaluatedAt
@ -96,11 +111,11 @@ func (st *StateTracker) ResetCache() {
st.stateCache.cacheMap = make(map[string]AlertState)
}
func (st *StateTracker) ProcessEvalResults(uid string, results eval.Results, condition ngModels.Condition) []AlertState {
st.Log.Info("state tracker processing evaluation results", "uid", uid, "resultCount", len(results))
func (st *StateTracker) ProcessEvalResults(alertRule *ngModels.AlertRule, results eval.Results) []AlertState {
st.Log.Info("state tracker processing evaluation results", "uid", alertRule.UID, "resultCount", len(results))
var changedStates []AlertState
for _, result := range results {
s, _ := st.setNextState(uid, condition.OrgID, result)
s, _ := st.setNextState(alertRule, result)
changedStates = append(changedStates, s)
}
st.Log.Debug("returning changed states to scheduler", "count", len(changedStates))
@ -113,9 +128,9 @@ func (st *StateTracker) ProcessEvalResults(uid string, results eval.Results, con
// 3. The base interval defined by the scheduler - in the case where #2 is not yet an option we can use the base interval at which every alert runs.
//Set the current state based on evaluation results
//return the state and a bool indicating whether a state transition occurred
func (st *StateTracker) setNextState(uid string, orgId int64, result eval.Result) (AlertState, bool) {
currentState := st.getOrCreate(uid, orgId, result)
st.Log.Debug("setting alert state", "uid", uid)
func (st *StateTracker) setNextState(alertRule *ngModels.AlertRule, result eval.Result) (AlertState, bool) {
currentState := st.getOrCreate(alertRule, result)
st.Log.Debug("setting alert state", "uid", alertRule.UID)
switch {
case currentState.State == result.State:
st.Log.Debug("no state transition", "cacheId", currentState.CacheId, "state", currentState.State.String())
@ -139,6 +154,7 @@ func (st *StateTracker) setNextState(uid string, orgId int64, result eval.Result
EvaluationTime: result.EvaluatedAt,
EvaluationState: result.State,
})
currentState.Annotations["alerting"] = result.EvaluatedAt.String()
st.set(currentState)
return currentState, true
case currentState.State == eval.Alerting && result.State == eval.Normal:

View File

@ -1,10 +1,12 @@
package state
package tests
import (
"fmt"
"testing"
"time"
"github.com/grafana/grafana/pkg/services/ngalert/state"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana-plugin-sdk-go/data"
@ -18,16 +20,30 @@ func TestProcessEvalResults(t *testing.T) {
if err != nil {
t.Fatalf("error parsing date format: %s", err.Error())
}
cacheId := "test_uid map[label1:value1 label2:value2]"
cacheId := "map[__alert_rule_namespace_uid__:test_namespace __alert_rule_title__:test_title __alert_rule_uid__:test_uid label1:value1 label2:value2]"
alertRule := models.AlertRule{
ID: 1,
OrgID: 123,
Title: "test_title",
Condition: "A",
UID: "test_uid",
NamespaceUID: "test_namespace",
}
expectedLabels := data.Labels{
"label1": "value1",
"label2": "value2",
"__alert_rule_uid__": "test_uid",
"__alert_rule_namespace_uid__": "test_namespace",
"__alert_rule_title__": "test_title",
}
testCases := []struct {
desc string
uid string
evalResults eval.Results
condition models.Condition
expectedState eval.State
expectedReturnedStateCount int
expectedResultCount int
expectedCacheEntries []AlertState
expectedCacheEntries []state.AlertState
}{
{
desc: "given a single evaluation result",
@ -39,21 +55,17 @@ func TestProcessEvalResults(t *testing.T) {
EvaluatedAt: evaluationTime,
},
},
condition: models.Condition{
Condition: "A",
OrgID: 123,
},
expectedState: eval.Normal,
expectedReturnedStateCount: 0,
expectedResultCount: 1,
expectedCacheEntries: []AlertState{
expectedCacheEntries: []state.AlertState{
{
UID: "test_uid",
OrgID: 123,
CacheId: cacheId,
Labels: data.Labels{"label1": "value1", "label2": "value2"},
Labels: expectedLabels,
State: eval.Normal,
Results: []StateEvaluation{
Results: []state.StateEvaluation{
{EvaluationTime: evaluationTime, EvaluationState: eval.Normal},
},
StartsAt: time.Time{},
@ -77,21 +89,17 @@ func TestProcessEvalResults(t *testing.T) {
EvaluatedAt: evaluationTime.Add(1 * time.Minute),
},
},
condition: models.Condition{
Condition: "A",
OrgID: 123,
},
expectedState: eval.Alerting,
expectedReturnedStateCount: 1,
expectedResultCount: 2,
expectedCacheEntries: []AlertState{
expectedCacheEntries: []state.AlertState{
{
UID: "test_uid",
OrgID: 123,
CacheId: cacheId,
Labels: data.Labels{"label1": "value1", "label2": "value2"},
Labels: expectedLabels,
State: eval.Alerting,
Results: []StateEvaluation{
Results: []state.StateEvaluation{
{EvaluationTime: evaluationTime, EvaluationState: eval.Normal},
{EvaluationTime: evaluationTime.Add(1 * time.Minute), EvaluationState: eval.Alerting},
},
@ -116,21 +124,17 @@ func TestProcessEvalResults(t *testing.T) {
EvaluatedAt: evaluationTime.Add(1 * time.Minute),
},
},
condition: models.Condition{
Condition: "A",
OrgID: 123,
},
expectedState: eval.Normal,
expectedReturnedStateCount: 1,
expectedResultCount: 2,
expectedCacheEntries: []AlertState{
expectedCacheEntries: []state.AlertState{
{
UID: "test_uid",
OrgID: 123,
CacheId: cacheId,
Labels: data.Labels{"label1": "value1", "label2": "value2"},
Labels: expectedLabels,
State: eval.Normal,
Results: []StateEvaluation{
Results: []state.StateEvaluation{
{EvaluationTime: evaluationTime, EvaluationState: eval.Alerting},
{EvaluationTime: evaluationTime.Add(1 * time.Minute), EvaluationState: eval.Normal},
},
@ -155,21 +159,17 @@ func TestProcessEvalResults(t *testing.T) {
EvaluatedAt: evaluationTime.Add(1 * time.Minute),
},
},
condition: models.Condition{
Condition: "A",
OrgID: 123,
},
expectedState: eval.Alerting,
expectedReturnedStateCount: 0,
expectedResultCount: 2,
expectedCacheEntries: []AlertState{
expectedCacheEntries: []state.AlertState{
{
UID: "test_uid",
OrgID: 123,
CacheId: cacheId,
Labels: data.Labels{"label1": "value1", "label2": "value2"},
Labels: expectedLabels,
State: eval.Alerting,
Results: []StateEvaluation{
Results: []state.StateEvaluation{
{EvaluationTime: evaluationTime, EvaluationState: eval.Alerting},
{EvaluationTime: evaluationTime.Add(1 * time.Minute), EvaluationState: eval.Alerting},
},
@ -194,22 +194,18 @@ func TestProcessEvalResults(t *testing.T) {
EvaluatedAt: evaluationTime.Add(1 * time.Minute),
},
},
condition: models.Condition{
Condition: "A",
OrgID: 123,
},
expectedState: eval.Normal,
expectedReturnedStateCount: 0,
expectedResultCount: 2,
expectedCacheEntries: []AlertState{
expectedCacheEntries: []state.AlertState{
{
UID: "test_uid",
OrgID: 123,
CacheId: cacheId,
Labels: data.Labels{"label1": "value1", "label2": "value2"},
Labels: expectedLabels,
State: eval.Normal,
Results: []StateEvaluation{
{evaluationTime, eval.Normal},
Results: []state.StateEvaluation{
{EvaluationTime: evaluationTime, EvaluationState: eval.Normal},
{EvaluationTime: evaluationTime.Add(1 * time.Minute), EvaluationState: eval.Normal},
},
StartsAt: time.Time{},
@ -222,8 +218,8 @@ func TestProcessEvalResults(t *testing.T) {
for _, tc := range testCases {
t.Run("all fields for a cache entry are set correctly", func(t *testing.T) {
st := NewStateTracker(log.New("test_state_tracker"))
_ = st.ProcessEvalResults(tc.uid, tc.evalResults, tc.condition)
st := state.NewStateTracker(log.New("test_state_tracker"))
_ = st.ProcessEvalResults(&alertRule, tc.evalResults)
for _, entry := range tc.expectedCacheEntries {
if !entry.Equals(st.Get(entry.CacheId)) {
t.Log(tc.desc)
@ -234,23 +230,23 @@ func TestProcessEvalResults(t *testing.T) {
})
t.Run("the expected number of entries are added to the cache", func(t *testing.T) {
st := NewStateTracker(log.New("test_state_tracker"))
st.ProcessEvalResults(tc.uid, tc.evalResults, tc.condition)
assert.Equal(t, len(tc.expectedCacheEntries), len(st.stateCache.cacheMap))
st := state.NewStateTracker(log.New("test_state_tracker"))
st.ProcessEvalResults(&alertRule, tc.evalResults)
assert.Equal(t, len(tc.expectedCacheEntries), len(st.GetAll()))
})
//This test, as configured, does not quite represent the behavior of the system.
//It is expected that each batch of evaluation results will have only one result
//for a unique set of labels.
t.Run("the expected number of states are returned to the caller", func(t *testing.T) {
st := NewStateTracker(log.New("test_state_tracker"))
results := st.ProcessEvalResults(tc.uid, tc.evalResults, tc.condition)
st := state.NewStateTracker(log.New("test_state_tracker"))
results := st.ProcessEvalResults(&alertRule, tc.evalResults)
assert.Equal(t, len(tc.evalResults), len(results))
})
}
}
func printEntryDiff(a, b AlertState, t *testing.T) {
func printEntryDiff(a, b state.AlertState, t *testing.T) {
if a.UID != b.UID {
t.Log(fmt.Sprintf("%v \t %v\n", a.UID, b.UID))
}