Alerting: Support hysteresis command expression (#75189)

Backend: 

* Update the Grafana Alerting engine to provide feedback to HysteresisCommand. The feedback information is stored in state.Manager as a fingerprint of each state. The fingerprint is persisted to the database. Only fingerprints that belong to Pending and Alerting states are considered as "loaded" and provided back to the command.
   - add ResultFingerprint to state.State. It's different from other fingerprints we store in the state because it is calculated from the result labels.
  -  add rule_fingerprint column to alert_instance
   - update alerting evaluator to accept AlertingResultsReader via context, and update scheduler to provide it.
   - add AlertingResultsFromRuleState that implements the new interface in eval package
   - update getExprRequest to patch the hysteresis command.

* Only one "Recovery Threshold" query is allowed to be used in the alert rule and it must be the Condition.


Frontend:

* Add hysteresis option to Threshold in UI. It's called "Recovery Threshold"
* Add test for getUnloadEvaluatorTypeFromCondition
* Hide hysteresis in panel expressions

* Refactor isInvalid and add test for it
* Remove unnecesary React.memo
* Add tests for updateEvaluatorConditions

---------

Co-authored-by: Sonia Aguilar <soniaaguilarpeiron@gmail.com>
This commit is contained in:
Yuri Tseretyan
2024-01-04 11:47:13 -05:00
committed by GitHub
parent 29c251851d
commit f6a46744a6
33 changed files with 1804 additions and 201 deletions

View File

@@ -190,6 +190,7 @@ func calculateState(ctx context.Context, log log.Logger, alertRule *ngModels.Ale
Values: values,
StartsAt: result.EvaluatedAt,
EndsAt: result.EvaluatedAt,
ResultFingerprint: result.Instance.Fingerprint(),
}
return newState
}

View File

@@ -3,6 +3,7 @@ package state
import (
"context"
"net/url"
"strconv"
"time"
"github.com/benbjohnson/clock"
@@ -158,6 +159,14 @@ func (st *Manager) Warm(ctx context.Context, rulesReader RuleReader) {
if err != nil {
st.log.Error("Error getting cacheId for entry", "error", err)
}
var resultFp data.Fingerprint
if entry.ResultFingerprint != "" {
fp, err := strconv.ParseUint(entry.ResultFingerprint, 16, 64)
if err != nil {
st.log.Error("Failed to parse result fingerprint of alert instance", "error", err, "ruleUID", entry.RuleUID)
}
resultFp = data.Fingerprint(fp)
}
rulesStates.states[cacheID] = &State{
AlertRuleUID: entry.RuleUID,
OrgID: entry.RuleOrgID,
@@ -170,6 +179,7 @@ func (st *Manager) Warm(ctx context.Context, rulesReader RuleReader) {
EndsAt: entry.CurrentStateEnd,
LastEvaluationTime: entry.LastEvalTime,
Annotations: ruleForEntry.Annotations,
ResultFingerprint: resultFp,
}
statesCount++
}
@@ -458,6 +468,7 @@ func (st *Manager) saveAlertStates(ctx context.Context, logger log.Logger, state
LastEvalTime: s.LastEvaluationTime,
CurrentStateSince: s.StartsAt,
CurrentStateEnd: s.EndsAt,
ResultFingerprint: s.ResultFingerprint.String(),
}
err = st.instanceStore.SaveAlertInstance(ctx, instance)

View File

@@ -286,6 +286,15 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
"system + rule + datasource-error": mergeLabels(mergeLabels(expectedDatasourceErrorLabels, baseRule.Labels), systemLabels),
}
resultFingerprints := map[string]data.Fingerprint{
"system + rule": data.Labels{}.Fingerprint(),
"system + rule + labels1": labels1.Fingerprint(),
"system + rule + labels2": labels2.Fingerprint(),
"system + rule + labels3": labels3.Fingerprint(),
"system + rule + no-data": noDataLabels.Fingerprint(),
"system + rule + datasource-error": data.Labels{}.Fingerprint(),
}
patchState := func(r *ngmodels.AlertRule, s *State) {
// patch all optional fields of the expected state
setCacheID(s)
@@ -304,6 +313,14 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
if s.Values == nil {
s.Values = make(map[string]float64)
}
if s.ResultFingerprint == data.Fingerprint(0) {
for key, set := range labels {
if set.Fingerprint() == s.Labels.Fingerprint() {
s.ResultFingerprint = resultFingerprints[key]
break
}
}
}
}
executeTest := func(t *testing.T, alertRule *ngmodels.AlertRule, resultsAtTime map[time.Time]eval.Results, expectedTransitionsAtTime map[time.Time][]StateTransition, applyNoDataErrorToAllStates bool) {

View File

@@ -5,6 +5,7 @@ import (
"context"
"errors"
"fmt"
"math"
"math/rand"
"sort"
"strings"
@@ -58,6 +59,7 @@ func TestWarmStateCache(t *testing.T) {
EndsAt: evaluationTime.Add(1 * time.Minute),
LastEvaluationTime: evaluationTime,
Annotations: map[string]string{"testAnnoKey": "testAnnoValue"},
ResultFingerprint: data.Fingerprint(math.MaxUint64),
}, {
AlertRuleUID: rule.UID,
OrgID: rule.OrgID,
@@ -70,6 +72,7 @@ func TestWarmStateCache(t *testing.T) {
EndsAt: evaluationTime.Add(1 * time.Minute),
LastEvaluationTime: evaluationTime,
Annotations: map[string]string{"testAnnoKey": "testAnnoValue"},
ResultFingerprint: data.Fingerprint(math.MaxUint64 - 1),
},
{
AlertRuleUID: rule.UID,
@@ -83,6 +86,7 @@ func TestWarmStateCache(t *testing.T) {
EndsAt: evaluationTime.Add(1 * time.Minute),
LastEvaluationTime: evaluationTime,
Annotations: map[string]string{"testAnnoKey": "testAnnoValue"},
ResultFingerprint: data.Fingerprint(0),
},
{
AlertRuleUID: rule.UID,
@@ -96,6 +100,7 @@ func TestWarmStateCache(t *testing.T) {
EndsAt: evaluationTime.Add(1 * time.Minute),
LastEvaluationTime: evaluationTime,
Annotations: map[string]string{"testAnnoKey": "testAnnoValue"},
ResultFingerprint: data.Fingerprint(1),
},
{
AlertRuleUID: rule.UID,
@@ -109,6 +114,7 @@ func TestWarmStateCache(t *testing.T) {
EndsAt: evaluationTime.Add(1 * time.Minute),
LastEvaluationTime: evaluationTime,
Annotations: map[string]string{"testAnnoKey": "testAnnoValue"},
ResultFingerprint: data.Fingerprint(2),
},
}
@@ -127,6 +133,7 @@ func TestWarmStateCache(t *testing.T) {
CurrentStateSince: evaluationTime.Add(-1 * time.Minute),
CurrentStateEnd: evaluationTime.Add(1 * time.Minute),
Labels: labels,
ResultFingerprint: data.Fingerprint(math.MaxUint64).String(),
})
labels = models.InstanceLabels{"test2": "testValue2"}
@@ -142,6 +149,7 @@ func TestWarmStateCache(t *testing.T) {
CurrentStateSince: evaluationTime.Add(-1 * time.Minute),
CurrentStateEnd: evaluationTime.Add(1 * time.Minute),
Labels: labels,
ResultFingerprint: data.Fingerprint(math.MaxUint64 - 1).String(),
})
labels = models.InstanceLabels{"test3": "testValue3"}
@@ -157,6 +165,7 @@ func TestWarmStateCache(t *testing.T) {
CurrentStateSince: evaluationTime.Add(-1 * time.Minute),
CurrentStateEnd: evaluationTime.Add(1 * time.Minute),
Labels: labels,
ResultFingerprint: data.Fingerprint(0).String(),
})
labels = models.InstanceLabels{"test4": "testValue4"}
@@ -172,6 +181,7 @@ func TestWarmStateCache(t *testing.T) {
CurrentStateSince: evaluationTime.Add(-1 * time.Minute),
CurrentStateEnd: evaluationTime.Add(1 * time.Minute),
Labels: labels,
ResultFingerprint: data.Fingerprint(1).String(),
})
labels = models.InstanceLabels{"test5": "testValue5"}
@@ -187,6 +197,7 @@ func TestWarmStateCache(t *testing.T) {
CurrentStateSince: evaluationTime.Add(-1 * time.Minute),
CurrentStateEnd: evaluationTime.Add(1 * time.Minute),
Labels: labels,
ResultFingerprint: data.Fingerprint(2).String(),
})
for _, instance := range instances {
_ = dbstore.SaveAlertInstance(ctx, instance)
@@ -384,8 +395,9 @@ func TestProcessEvalResults(t *testing.T) {
},
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
State: eval.Normal,
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.Normal,
Results: []state.Evaluation{
newEvaluation(t1, eval.Normal),
},
@@ -407,8 +419,9 @@ func TestProcessEvalResults(t *testing.T) {
expectedAnnotations: 1,
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
State: eval.Normal,
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.Normal,
Results: []state.Evaluation{
newEvaluation(t1, eval.Normal),
},
@@ -417,8 +430,9 @@ func TestProcessEvalResults(t *testing.T) {
LastEvaluationTime: t1,
},
{
Labels: labels["system + rule + labels2"],
State: eval.Alerting,
Labels: labels["system + rule + labels2"],
ResultFingerprint: labels2.Fingerprint(),
State: eval.Alerting,
Results: []state.Evaluation{
newEvaluation(t1, eval.Alerting),
},
@@ -441,8 +455,9 @@ func TestProcessEvalResults(t *testing.T) {
},
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
State: eval.Normal,
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.Normal,
Results: []state.Evaluation{
newEvaluation(t1, eval.Normal),
newEvaluation(tn(6), eval.Normal),
@@ -467,8 +482,9 @@ func TestProcessEvalResults(t *testing.T) {
expectedAnnotations: 1,
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
State: eval.Alerting,
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.Alerting,
Results: []state.Evaluation{
newEvaluation(t1, eval.Normal),
newEvaluation(t2, eval.Alerting),
@@ -499,8 +515,9 @@ func TestProcessEvalResults(t *testing.T) {
expectedAnnotations: 2,
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
State: eval.Alerting,
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.Alerting,
Results: []state.Evaluation{
newEvaluation(t3, eval.Alerting),
newEvaluation(tn(4), eval.Alerting),
@@ -534,8 +551,9 @@ func TestProcessEvalResults(t *testing.T) {
expectedAnnotations: 3, // Normal -> Pending, Pending -> NoData, NoData -> Pending
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
State: eval.Pending,
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.Pending,
Results: []state.Evaluation{
newEvaluation(tn(4), eval.Alerting),
newEvaluation(tn(5), eval.Alerting),
@@ -566,8 +584,9 @@ func TestProcessEvalResults(t *testing.T) {
expectedAnnotations: 3,
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
State: eval.NoData,
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.NoData,
Results: []state.Evaluation{
newEvaluation(t3, eval.Alerting),
newEvaluation(tn(4), eval.NoData),
@@ -592,7 +611,8 @@ func TestProcessEvalResults(t *testing.T) {
expectedAnnotations: 1,
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.Pending,
Results: []state.Evaluation{
@@ -619,8 +639,9 @@ func TestProcessEvalResults(t *testing.T) {
expectedAnnotations: 1,
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
State: eval.Pending,
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.Pending,
Results: []state.Evaluation{
newEvaluation(t1, eval.Alerting),
newEvaluation(t2, eval.Alerting),
@@ -645,9 +666,10 @@ func TestProcessEvalResults(t *testing.T) {
expectedAnnotations: 1,
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
State: eval.Pending,
StateReason: eval.NoData.String(),
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.Pending,
StateReason: eval.NoData.String(),
Results: []state.Evaluation{
newEvaluation(t1, eval.Normal),
newEvaluation(t2, eval.NoData),
@@ -681,9 +703,10 @@ func TestProcessEvalResults(t *testing.T) {
expectedAnnotations: 2,
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
State: eval.Alerting,
StateReason: eval.NoData.String(),
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.Alerting,
StateReason: eval.NoData.String(),
Results: []state.Evaluation{
newEvaluation(t3, eval.NoData),
newEvaluation(tn(4), eval.NoData),
@@ -709,8 +732,9 @@ func TestProcessEvalResults(t *testing.T) {
expectedAnnotations: 1,
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
State: eval.NoData,
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.NoData,
Results: []state.Evaluation{
newEvaluation(t1, eval.Normal),
newEvaluation(t2, eval.NoData),
@@ -735,8 +759,9 @@ func TestProcessEvalResults(t *testing.T) {
expectedAnnotations: 1,
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
State: eval.Normal,
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.Normal,
Results: []state.Evaluation{
newEvaluation(t1, eval.Normal),
},
@@ -745,8 +770,9 @@ func TestProcessEvalResults(t *testing.T) {
LastEvaluationTime: t1,
},
{
Labels: labels["system + rule"],
State: eval.NoData,
Labels: labels["system + rule"],
ResultFingerprint: data.Labels{}.Fingerprint(),
State: eval.NoData,
Results: []state.Evaluation{
newEvaluation(t2, eval.NoData),
},
@@ -771,8 +797,9 @@ func TestProcessEvalResults(t *testing.T) {
expectedAnnotations: 1,
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
State: eval.Normal,
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.Normal,
Results: []state.Evaluation{
newEvaluation(t1, eval.Normal),
},
@@ -781,8 +808,9 @@ func TestProcessEvalResults(t *testing.T) {
LastEvaluationTime: t1,
},
{
Labels: labels["system + rule + labels2"],
State: eval.Normal,
Labels: labels["system + rule + labels2"],
ResultFingerprint: labels2.Fingerprint(),
State: eval.Normal,
Results: []state.Evaluation{
newEvaluation(t1, eval.Normal),
},
@@ -791,8 +819,9 @@ func TestProcessEvalResults(t *testing.T) {
LastEvaluationTime: t1,
},
{
Labels: labels["system + rule"],
State: eval.NoData,
Labels: labels["system + rule"],
ResultFingerprint: data.Labels{}.Fingerprint(),
State: eval.NoData,
Results: []state.Evaluation{
newEvaluation(t2, eval.NoData),
},
@@ -819,8 +848,9 @@ func TestProcessEvalResults(t *testing.T) {
expectedAnnotations: 1,
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
State: eval.Normal,
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.Normal,
Results: []state.Evaluation{
newEvaluation(t1, eval.Normal),
newEvaluation(t3, eval.Normal),
@@ -830,8 +860,9 @@ func TestProcessEvalResults(t *testing.T) {
LastEvaluationTime: t3,
},
{
Labels: labels["system + rule + no-data"],
State: eval.NoData,
Labels: labels["system + rule + no-data"],
ResultFingerprint: noDataLabels.Fingerprint(),
State: eval.NoData,
Results: []state.Evaluation{
newEvaluation(t2, eval.NoData),
},
@@ -855,9 +886,10 @@ func TestProcessEvalResults(t *testing.T) {
expectedAnnotations: 0,
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
State: eval.Normal,
StateReason: eval.NoData.String(),
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.Normal,
StateReason: eval.NoData.String(),
Results: []state.Evaluation{
newEvaluation(t1, eval.Normal),
newEvaluation(t2, eval.NoData),
@@ -882,10 +914,11 @@ func TestProcessEvalResults(t *testing.T) {
expectedAnnotations: 1,
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
State: eval.Pending,
StateReason: eval.Error.String(),
Error: errors.New("with_state_error"),
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.Pending,
StateReason: eval.Error.String(),
Error: errors.New("with_state_error"),
Results: []state.Evaluation{
newEvaluation(t1, eval.Normal),
newEvaluation(t2, eval.Error),
@@ -919,10 +952,11 @@ func TestProcessEvalResults(t *testing.T) {
expectedAnnotations: 2,
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
State: eval.Alerting,
StateReason: eval.Error.String(),
Error: errors.New("with_state_error"),
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.Alerting,
StateReason: eval.Error.String(),
Error: errors.New("with_state_error"),
Results: []state.Evaluation{
newEvaluation(t3, eval.Error),
newEvaluation(tn(4), eval.Error),
@@ -960,8 +994,9 @@ func TestProcessEvalResults(t *testing.T) {
"datasource_uid": "datasource_uid_1",
"ref_id": "A",
}),
State: eval.Error,
Error: expr.MakeQueryError("A", "datasource_uid_1", errors.New("this is an error")),
ResultFingerprint: labels1.Fingerprint(),
State: eval.Error,
Error: expr.MakeQueryError("A", "datasource_uid_1", errors.New("this is an error")),
Results: []state.Evaluation{
newEvaluation(t1, eval.Normal),
newEvaluation(t2, eval.Error),
@@ -988,9 +1023,10 @@ func TestProcessEvalResults(t *testing.T) {
expectedAnnotations: 1,
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
State: eval.Normal,
StateReason: eval.Error.String(),
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.Normal,
StateReason: eval.Error.String(),
Results: []state.Evaluation{
newEvaluation(t1, eval.Normal),
newEvaluation(t2, eval.Error),
@@ -1015,9 +1051,10 @@ func TestProcessEvalResults(t *testing.T) {
expectedAnnotations: 2,
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
State: eval.Normal,
StateReason: eval.Error.String(),
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.Normal,
StateReason: eval.Error.String(),
Results: []state.Evaluation{
newEvaluation(t1, eval.Alerting),
newEvaluation(t2, eval.Error),
@@ -1054,9 +1091,10 @@ func TestProcessEvalResults(t *testing.T) {
expectedAnnotations: 3,
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
State: eval.Error,
Error: fmt.Errorf("with_state_error"),
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.Error,
Error: fmt.Errorf("with_state_error"),
Results: []state.Evaluation{
newEvaluation(tn(5), eval.Error),
newEvaluation(tn(6), eval.Error),
@@ -1087,8 +1125,9 @@ func TestProcessEvalResults(t *testing.T) {
expectedAnnotations: 3,
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
State: eval.Pending,
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.Pending,
Results: []state.Evaluation{
newEvaluation(tn(4), eval.Alerting),
newEvaluation(tn(5), eval.Error),
@@ -1120,8 +1159,9 @@ func TestProcessEvalResults(t *testing.T) {
expectedAnnotations: 3,
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
State: eval.NoData,
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.NoData,
Results: []state.Evaluation{
newEvaluation(tn(4), eval.Alerting),
newEvaluation(tn(5), eval.Error),
@@ -1166,6 +1206,11 @@ func TestProcessEvalResults(t *testing.T) {
LastEvaluationTime: t1,
EvaluationDuration: evaluationDuration,
Annotations: map[string]string{"summary": "grafana is down in us-central-1 cluster -> prod namespace"},
ResultFingerprint: data.Labels{
"cluster": "us-central-1",
"namespace": "prod",
"pod": "grafana",
}.Fingerprint(),
},
},
},
@@ -1186,8 +1231,9 @@ func TestProcessEvalResults(t *testing.T) {
},
expectedStates: []*state.State{
{
Labels: labels["system + rule"],
State: eval.Alerting,
Labels: labels["system + rule"],
ResultFingerprint: data.Labels{}.Fingerprint(),
State: eval.Alerting,
Results: []state.Evaluation{
newEvaluation(t1, eval.Alerting),
newEvaluation(t2, eval.Error),
@@ -1454,6 +1500,7 @@ func TestStaleResultsHandler(t *testing.T) {
LastEvaluationTime: evaluationTime,
EvaluationDuration: 0,
Annotations: map[string]string{"testAnnoKey": "testAnnoValue"},
ResultFingerprint: data.Labels{"test1": "testValue1"}.Fingerprint(),
},
},
startingStateCount: 2,

View File

@@ -34,6 +34,9 @@ type State struct {
// StateReason is a textual description to explain why the state has its current state.
StateReason string
// ResultFingerprint is a hash of labels of the result before it is processed by
ResultFingerprint data.Fingerprint
// Results contains the result of the current and previous evaluations.
Results []Evaluation