mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Dparrott/labels on alert rule (#33057)
* move state tracker tests to /tests * set default labels on alerts * handle empty labels in result.Instance * create annotation on transition to alerting state
This commit is contained in:
@@ -22,6 +22,7 @@ type AlertState struct {
|
||||
StartsAt time.Time
|
||||
EndsAt time.Time
|
||||
LastEvaluationTime time.Time
|
||||
Annotations map[string]string
|
||||
}
|
||||
|
||||
type StateEvaluation struct {
|
||||
@@ -53,22 +54,36 @@ func NewStateTracker(logger log.Logger) *StateTracker {
|
||||
return tracker
|
||||
}
|
||||
|
||||
func (st *StateTracker) getOrCreate(uid string, orgId int64, result eval.Result) AlertState {
|
||||
func (st *StateTracker) getOrCreate(alertRule *ngModels.AlertRule, result eval.Result) AlertState {
|
||||
st.stateCache.mu.Lock()
|
||||
defer st.stateCache.mu.Unlock()
|
||||
lbs := data.Labels{}
|
||||
if len(result.Instance) > 0 {
|
||||
lbs = result.Instance
|
||||
}
|
||||
lbs["__alert_rule_uid__"] = alertRule.UID
|
||||
lbs["__alert_rule_namespace_uid__"] = alertRule.NamespaceUID
|
||||
lbs["__alert_rule_title__"] = alertRule.Title
|
||||
|
||||
idString := fmt.Sprintf("%s %s", uid, map[string]string(result.Instance))
|
||||
annotations := map[string]string{}
|
||||
if len(alertRule.Annotations) > 0 {
|
||||
annotations = alertRule.Annotations
|
||||
}
|
||||
|
||||
idString := fmt.Sprintf("%s", map[string]string(lbs))
|
||||
if state, ok := st.stateCache.cacheMap[idString]; ok {
|
||||
return state
|
||||
}
|
||||
|
||||
st.Log.Debug("adding new alert state cache entry", "cacheId", idString, "state", result.State.String(), "evaluatedAt", result.EvaluatedAt.String())
|
||||
newState := AlertState{
|
||||
UID: uid,
|
||||
OrgID: orgId,
|
||||
CacheId: idString,
|
||||
Labels: result.Instance,
|
||||
State: result.State,
|
||||
Results: []StateEvaluation{},
|
||||
UID: alertRule.UID,
|
||||
OrgID: alertRule.OrgID,
|
||||
CacheId: idString,
|
||||
Labels: lbs,
|
||||
State: result.State,
|
||||
Results: []StateEvaluation{},
|
||||
Annotations: annotations,
|
||||
}
|
||||
if result.State == eval.Alerting {
|
||||
newState.StartsAt = result.EvaluatedAt
|
||||
@@ -96,11 +111,11 @@ func (st *StateTracker) ResetCache() {
|
||||
st.stateCache.cacheMap = make(map[string]AlertState)
|
||||
}
|
||||
|
||||
func (st *StateTracker) ProcessEvalResults(uid string, results eval.Results, condition ngModels.Condition) []AlertState {
|
||||
st.Log.Info("state tracker processing evaluation results", "uid", uid, "resultCount", len(results))
|
||||
func (st *StateTracker) ProcessEvalResults(alertRule *ngModels.AlertRule, results eval.Results) []AlertState {
|
||||
st.Log.Info("state tracker processing evaluation results", "uid", alertRule.UID, "resultCount", len(results))
|
||||
var changedStates []AlertState
|
||||
for _, result := range results {
|
||||
s, _ := st.setNextState(uid, condition.OrgID, result)
|
||||
s, _ := st.setNextState(alertRule, result)
|
||||
changedStates = append(changedStates, s)
|
||||
}
|
||||
st.Log.Debug("returning changed states to scheduler", "count", len(changedStates))
|
||||
@@ -113,9 +128,9 @@ func (st *StateTracker) ProcessEvalResults(uid string, results eval.Results, con
|
||||
// 3. The base interval defined by the scheduler - in the case where #2 is not yet an option we can use the base interval at which every alert runs.
|
||||
//Set the current state based on evaluation results
|
||||
//return the state and a bool indicating whether a state transition occurred
|
||||
func (st *StateTracker) setNextState(uid string, orgId int64, result eval.Result) (AlertState, bool) {
|
||||
currentState := st.getOrCreate(uid, orgId, result)
|
||||
st.Log.Debug("setting alert state", "uid", uid)
|
||||
func (st *StateTracker) setNextState(alertRule *ngModels.AlertRule, result eval.Result) (AlertState, bool) {
|
||||
currentState := st.getOrCreate(alertRule, result)
|
||||
st.Log.Debug("setting alert state", "uid", alertRule.UID)
|
||||
switch {
|
||||
case currentState.State == result.State:
|
||||
st.Log.Debug("no state transition", "cacheId", currentState.CacheId, "state", currentState.State.String())
|
||||
@@ -139,6 +154,7 @@ func (st *StateTracker) setNextState(uid string, orgId int64, result eval.Result
|
||||
EvaluationTime: result.EvaluatedAt,
|
||||
EvaluationState: result.State,
|
||||
})
|
||||
currentState.Annotations["alerting"] = result.EvaluatedAt.String()
|
||||
st.set(currentState)
|
||||
return currentState, true
|
||||
case currentState.State == eval.Alerting && result.State == eval.Normal:
|
||||
|
@@ -1,286 +0,0 @@
|
||||
package state
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
|
||||
"github.com/grafana/grafana-plugin-sdk-go/data"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestProcessEvalResults(t *testing.T) {
|
||||
evaluationTime, err := time.Parse("2006-01-02", "2021-03-25")
|
||||
if err != nil {
|
||||
t.Fatalf("error parsing date format: %s", err.Error())
|
||||
}
|
||||
cacheId := "test_uid map[label1:value1 label2:value2]"
|
||||
testCases := []struct {
|
||||
desc string
|
||||
uid string
|
||||
evalResults eval.Results
|
||||
condition models.Condition
|
||||
expectedState eval.State
|
||||
expectedReturnedStateCount int
|
||||
expectedResultCount int
|
||||
expectedCacheEntries []AlertState
|
||||
}{
|
||||
{
|
||||
desc: "given a single evaluation result",
|
||||
uid: "test_uid",
|
||||
evalResults: eval.Results{
|
||||
eval.Result{
|
||||
Instance: data.Labels{"label1": "value1", "label2": "value2"},
|
||||
State: eval.Normal,
|
||||
EvaluatedAt: evaluationTime,
|
||||
},
|
||||
},
|
||||
condition: models.Condition{
|
||||
Condition: "A",
|
||||
OrgID: 123,
|
||||
},
|
||||
expectedState: eval.Normal,
|
||||
expectedReturnedStateCount: 0,
|
||||
expectedResultCount: 1,
|
||||
expectedCacheEntries: []AlertState{
|
||||
{
|
||||
UID: "test_uid",
|
||||
OrgID: 123,
|
||||
CacheId: cacheId,
|
||||
Labels: data.Labels{"label1": "value1", "label2": "value2"},
|
||||
State: eval.Normal,
|
||||
Results: []StateEvaluation{
|
||||
{EvaluationTime: evaluationTime, EvaluationState: eval.Normal},
|
||||
},
|
||||
StartsAt: time.Time{},
|
||||
EndsAt: time.Time{},
|
||||
LastEvaluationTime: evaluationTime,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "given a state change from normal to alerting for a single entity",
|
||||
uid: "test_uid",
|
||||
evalResults: eval.Results{
|
||||
eval.Result{
|
||||
Instance: data.Labels{"label1": "value1", "label2": "value2"},
|
||||
State: eval.Normal,
|
||||
EvaluatedAt: evaluationTime,
|
||||
},
|
||||
eval.Result{
|
||||
Instance: data.Labels{"label1": "value1", "label2": "value2"},
|
||||
State: eval.Alerting,
|
||||
EvaluatedAt: evaluationTime.Add(1 * time.Minute),
|
||||
},
|
||||
},
|
||||
condition: models.Condition{
|
||||
Condition: "A",
|
||||
OrgID: 123,
|
||||
},
|
||||
expectedState: eval.Alerting,
|
||||
expectedReturnedStateCount: 1,
|
||||
expectedResultCount: 2,
|
||||
expectedCacheEntries: []AlertState{
|
||||
{
|
||||
UID: "test_uid",
|
||||
OrgID: 123,
|
||||
CacheId: cacheId,
|
||||
Labels: data.Labels{"label1": "value1", "label2": "value2"},
|
||||
State: eval.Alerting,
|
||||
Results: []StateEvaluation{
|
||||
{EvaluationTime: evaluationTime, EvaluationState: eval.Normal},
|
||||
{EvaluationTime: evaluationTime.Add(1 * time.Minute), EvaluationState: eval.Alerting},
|
||||
},
|
||||
StartsAt: evaluationTime.Add(1 * time.Minute),
|
||||
EndsAt: evaluationTime.Add(100 * time.Second),
|
||||
LastEvaluationTime: evaluationTime.Add(1 * time.Minute),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "given a state change from alerting to normal for a single entity",
|
||||
uid: "test_uid",
|
||||
evalResults: eval.Results{
|
||||
eval.Result{
|
||||
Instance: data.Labels{"label1": "value1", "label2": "value2"},
|
||||
State: eval.Alerting,
|
||||
EvaluatedAt: evaluationTime,
|
||||
},
|
||||
eval.Result{
|
||||
Instance: data.Labels{"label1": "value1", "label2": "value2"},
|
||||
State: eval.Normal,
|
||||
EvaluatedAt: evaluationTime.Add(1 * time.Minute),
|
||||
},
|
||||
},
|
||||
condition: models.Condition{
|
||||
Condition: "A",
|
||||
OrgID: 123,
|
||||
},
|
||||
expectedState: eval.Normal,
|
||||
expectedReturnedStateCount: 1,
|
||||
expectedResultCount: 2,
|
||||
expectedCacheEntries: []AlertState{
|
||||
{
|
||||
UID: "test_uid",
|
||||
OrgID: 123,
|
||||
CacheId: cacheId,
|
||||
Labels: data.Labels{"label1": "value1", "label2": "value2"},
|
||||
State: eval.Normal,
|
||||
Results: []StateEvaluation{
|
||||
{EvaluationTime: evaluationTime, EvaluationState: eval.Alerting},
|
||||
{EvaluationTime: evaluationTime.Add(1 * time.Minute), EvaluationState: eval.Normal},
|
||||
},
|
||||
StartsAt: evaluationTime,
|
||||
EndsAt: evaluationTime.Add(1 * time.Minute),
|
||||
LastEvaluationTime: evaluationTime.Add(1 * time.Minute),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "given a constant alerting state for a single entity",
|
||||
uid: "test_uid",
|
||||
evalResults: eval.Results{
|
||||
eval.Result{
|
||||
Instance: data.Labels{"label1": "value1", "label2": "value2"},
|
||||
State: eval.Alerting,
|
||||
EvaluatedAt: evaluationTime,
|
||||
},
|
||||
eval.Result{
|
||||
Instance: data.Labels{"label1": "value1", "label2": "value2"},
|
||||
State: eval.Alerting,
|
||||
EvaluatedAt: evaluationTime.Add(1 * time.Minute),
|
||||
},
|
||||
},
|
||||
condition: models.Condition{
|
||||
Condition: "A",
|
||||
OrgID: 123,
|
||||
},
|
||||
expectedState: eval.Alerting,
|
||||
expectedReturnedStateCount: 0,
|
||||
expectedResultCount: 2,
|
||||
expectedCacheEntries: []AlertState{
|
||||
{
|
||||
UID: "test_uid",
|
||||
OrgID: 123,
|
||||
CacheId: cacheId,
|
||||
Labels: data.Labels{"label1": "value1", "label2": "value2"},
|
||||
State: eval.Alerting,
|
||||
Results: []StateEvaluation{
|
||||
{EvaluationTime: evaluationTime, EvaluationState: eval.Alerting},
|
||||
{EvaluationTime: evaluationTime.Add(1 * time.Minute), EvaluationState: eval.Alerting},
|
||||
},
|
||||
StartsAt: evaluationTime,
|
||||
EndsAt: evaluationTime.Add(100 * time.Second),
|
||||
LastEvaluationTime: evaluationTime.Add(1 * time.Minute),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "given a constant normal state for a single entity",
|
||||
uid: "test_uid",
|
||||
evalResults: eval.Results{
|
||||
eval.Result{
|
||||
Instance: data.Labels{"label1": "value1", "label2": "value2"},
|
||||
State: eval.Normal,
|
||||
EvaluatedAt: evaluationTime,
|
||||
},
|
||||
eval.Result{
|
||||
Instance: data.Labels{"label1": "value1", "label2": "value2"},
|
||||
State: eval.Normal,
|
||||
EvaluatedAt: evaluationTime.Add(1 * time.Minute),
|
||||
},
|
||||
},
|
||||
condition: models.Condition{
|
||||
Condition: "A",
|
||||
OrgID: 123,
|
||||
},
|
||||
expectedState: eval.Normal,
|
||||
expectedReturnedStateCount: 0,
|
||||
expectedResultCount: 2,
|
||||
expectedCacheEntries: []AlertState{
|
||||
{
|
||||
UID: "test_uid",
|
||||
OrgID: 123,
|
||||
CacheId: cacheId,
|
||||
Labels: data.Labels{"label1": "value1", "label2": "value2"},
|
||||
State: eval.Normal,
|
||||
Results: []StateEvaluation{
|
||||
{evaluationTime, eval.Normal},
|
||||
{EvaluationTime: evaluationTime.Add(1 * time.Minute), EvaluationState: eval.Normal},
|
||||
},
|
||||
StartsAt: time.Time{},
|
||||
EndsAt: time.Time{},
|
||||
LastEvaluationTime: evaluationTime.Add(1 * time.Minute),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run("all fields for a cache entry are set correctly", func(t *testing.T) {
|
||||
st := NewStateTracker(log.New("test_state_tracker"))
|
||||
_ = st.ProcessEvalResults(tc.uid, tc.evalResults, tc.condition)
|
||||
for _, entry := range tc.expectedCacheEntries {
|
||||
if !entry.Equals(st.Get(entry.CacheId)) {
|
||||
t.Log(tc.desc)
|
||||
printEntryDiff(entry, st.Get(entry.CacheId), t)
|
||||
}
|
||||
assert.True(t, entry.Equals(st.Get(entry.CacheId)))
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("the expected number of entries are added to the cache", func(t *testing.T) {
|
||||
st := NewStateTracker(log.New("test_state_tracker"))
|
||||
st.ProcessEvalResults(tc.uid, tc.evalResults, tc.condition)
|
||||
assert.Equal(t, len(tc.expectedCacheEntries), len(st.stateCache.cacheMap))
|
||||
})
|
||||
|
||||
//This test, as configured, does not quite represent the behavior of the system.
|
||||
//It is expected that each batch of evaluation results will have only one result
|
||||
//for a unique set of labels.
|
||||
t.Run("the expected number of states are returned to the caller", func(t *testing.T) {
|
||||
st := NewStateTracker(log.New("test_state_tracker"))
|
||||
results := st.ProcessEvalResults(tc.uid, tc.evalResults, tc.condition)
|
||||
assert.Equal(t, len(tc.evalResults), len(results))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func printEntryDiff(a, b AlertState, t *testing.T) {
|
||||
if a.UID != b.UID {
|
||||
t.Log(fmt.Sprintf("%v \t %v\n", a.UID, b.UID))
|
||||
}
|
||||
if a.OrgID != b.OrgID {
|
||||
t.Log(fmt.Sprintf("%v \t %v\n", a.OrgID, b.OrgID))
|
||||
}
|
||||
if a.CacheId != b.CacheId {
|
||||
t.Log(fmt.Sprintf("%v \t %v\n", a.CacheId, b.CacheId))
|
||||
}
|
||||
if !a.Labels.Equals(b.Labels) {
|
||||
t.Log(fmt.Sprintf("%v \t %v\n", a.Labels, b.Labels))
|
||||
}
|
||||
if a.StartsAt != b.StartsAt {
|
||||
t.Log(fmt.Sprintf("%v \t %v\n", a.StartsAt, b.StartsAt))
|
||||
}
|
||||
if a.EndsAt != b.EndsAt {
|
||||
t.Log(fmt.Sprintf("%v \t %v\n", a.EndsAt, b.EndsAt))
|
||||
}
|
||||
if a.LastEvaluationTime != b.LastEvaluationTime {
|
||||
t.Log(fmt.Sprintf("%v \t %v\n", a.LastEvaluationTime, b.LastEvaluationTime))
|
||||
}
|
||||
if len(a.Results) != len(b.Results) {
|
||||
t.Log(fmt.Sprintf("a: %d b: %d", len(a.Results), len(b.Results)))
|
||||
t.Log("a")
|
||||
for i := 0; i < len(a.Results); i++ {
|
||||
t.Log(fmt.Sprintf("%v\n", a.Results[i]))
|
||||
}
|
||||
t.Log("b")
|
||||
for i := 0; i < len(b.Results); i++ {
|
||||
t.Log(fmt.Sprintf("%v\n", b.Results[i]))
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user