Alerting: Eval pkg tests and more specific error handling (#33496)

* comment updates
* more friendly error messages, in particular if it looks like time series data
This commit is contained in:
Kyle Brandt 2021-04-29 07:27:32 -04:00 committed by GitHub
parent 840828b5d2
commit d32fcbe2bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 399 additions and 44 deletions

View File

@ -46,8 +46,6 @@ func (e *invalidEvalResultFormatError) Unwrap() error {
// ExecutionResults contains the unevaluated results from executing
// a condition.
type ExecutionResults struct {
AlertDefinitionID int64
Error error
Results data.Frames
@ -59,8 +57,10 @@ type Results []Result
// Result contains the evaluated State of an alert instance
// identified by its labels.
type Result struct {
Instance data.Labels
State State // Enum
Instance data.Labels
State State // Enum
// Error message for Error state. should be nil if State != Error.
Error error
EvaluatedAt time.Time
EvaluationDuration time.Duration
}
@ -141,13 +141,13 @@ func GetExprRequest(ctx AlertExecCtx, data []models.AlertQuery, now time.Time) (
return req, nil
}
func executeCondition(ctx AlertExecCtx, c *models.Condition, now time.Time, dataService *tsdb.Service) (*ExecutionResults, error) {
func executeCondition(ctx AlertExecCtx, c *models.Condition, now time.Time, dataService *tsdb.Service) ExecutionResults {
result := ExecutionResults{}
execResp, err := executeQueriesAndExpressions(ctx, c.Data, now, dataService)
if err != nil {
return &result, err
return ExecutionResults{Error: err}
}
for refID, res := range execResp.Responses {
@ -157,13 +157,7 @@ func executeCondition(ctx AlertExecCtx, c *models.Condition, now time.Time, data
result.Results = res.Frames
}
if len(result.Results) == 0 {
err = fmt.Errorf("no transformation results")
result.Error = err
return &result, err
}
return &result, nil
return result
}
func executeQueriesAndExpressions(ctx AlertExecCtx, data []models.AlertQuery, now time.Time, dataService *tsdb.Service) (*backend.QueryDataResponse, error) {
@ -179,39 +173,93 @@ func executeQueriesAndExpressions(ctx AlertExecCtx, data []models.AlertQuery, no
return exprService.TransformData(ctx.Ctx, queryDataReq)
}
// evaluateExecutionResult takes the ExecutionResult, and returns a frame where
// each column is a string type that holds a string representing its State.
func evaluateExecutionResult(results *ExecutionResults, ts time.Time) (Results, error) {
// evaluateExecutionResult takes the ExecutionResult which includes data.Frames returned
// from SSE (Server Side Expressions). It will create Results (slice of Result) with a State
// extracted from each Frame.
//
// If the ExecutionResults error property is not nil, a single Error result will be returned.
// If there is no error and no results then a single NoData state Result will be returned.
//
// Each non-empty Frame must be a single Field of type []*float64 and of length 1.
// Also, each Frame must be uniquely identified by its Field.Labels or a single Error result will be returned.
//
// Per Frame, data becomes a State based on the following rules:
// - Empty or zero length Frames result in NoData.
// - If a value:
// - 0 results in Normal.
// - Nonzero (e.g 1.2, NaN) results in Alerting.
// - nil results in noData.
// - unsupported Frame schemas results in Error.
func evaluateExecutionResult(execResults ExecutionResults, ts time.Time) Results {
evalResults := make([]Result, 0)
labels := make(map[string]bool)
for _, f := range results.Results {
appendErrRes := func(e error) {
evalResults = append(evalResults, Result{
State: Error,
Error: e,
EvaluatedAt: ts,
EvaluationDuration: time.Since(ts),
})
}
appendNoData := func(l data.Labels) {
evalResults = append(evalResults, Result{
State: NoData,
Instance: l,
EvaluatedAt: ts,
EvaluationDuration: time.Since(ts),
})
}
if execResults.Error != nil {
appendErrRes(execResults.Error)
return evalResults
}
if len(execResults.Results) == 0 {
appendNoData(nil)
return evalResults
}
for _, f := range execResults.Results {
rowLen, err := f.RowLen()
if err != nil {
return nil, &invalidEvalResultFormatError{refID: f.RefID, reason: "unable to get frame row length", err: err}
appendErrRes(&invalidEvalResultFormatError{refID: f.RefID, reason: "unable to get frame row length", err: err})
continue
}
if len(f.TypeIndices(data.FieldTypeTime, data.FieldTypeNullableTime)) > 0 {
appendErrRes(&invalidEvalResultFormatError{refID: f.RefID, reason: "looks like time series data, only reduced data can be alerted on."})
continue
}
if rowLen == 0 {
if len(f.Fields) == 0 {
appendNoData(nil)
continue
}
if len(f.Fields) == 1 {
appendNoData(f.Fields[0].Labels)
continue
}
}
if rowLen > 1 {
return nil, &invalidEvalResultFormatError{refID: f.RefID, reason: fmt.Sprintf("unexpected row length: %d instead of 1", rowLen)}
appendErrRes(&invalidEvalResultFormatError{refID: f.RefID, reason: fmt.Sprintf("unexpected row length: %d instead of 0 or 1", rowLen)})
continue
}
if len(f.Fields) > 1 {
return nil, &invalidEvalResultFormatError{refID: f.RefID, reason: fmt.Sprintf("unexpected field length: %d instead of 1", len(f.Fields))}
appendErrRes(&invalidEvalResultFormatError{refID: f.RefID, reason: fmt.Sprintf("unexpected field length: %d instead of 1", len(f.Fields))})
continue
}
if f.Fields[0].Type() != data.FieldTypeNullableFloat64 {
return nil, &invalidEvalResultFormatError{refID: f.RefID, reason: fmt.Sprintf("invalid field type: %d", f.Fields[0].Type())}
appendErrRes(&invalidEvalResultFormatError{refID: f.RefID, reason: fmt.Sprintf("invalid field type: %s", f.Fields[0].Type())})
continue
}
labelsStr := f.Fields[0].Labels.String()
_, ok := labels[labelsStr]
if ok {
return nil, &invalidEvalResultFormatError{refID: f.RefID, reason: fmt.Sprintf("frame cannot uniquely be identified by its labels: %s", labelsStr)}
}
labels[labelsStr] = true
val, ok := f.Fields[0].At(0).(*float64)
if !ok {
return nil, &invalidEvalResultFormatError{refID: f.RefID, reason: fmt.Sprintf("expected nullable float64 but got type %T", f.Fields[0].Type())}
}
val := f.Fields[0].At(0).(*float64) // type checked by data.FieldTypeNullableFloat64 above
r := Result{
Instance: f.Fields[0].Labels,
@ -220,8 +268,6 @@ func evaluateExecutionResult(results *ExecutionResults, ts time.Time) (Results,
}
switch {
case err != nil:
r.State = Error
case val == nil:
r.State = NoData
case *val == 0:
@ -232,7 +278,26 @@ func evaluateExecutionResult(results *ExecutionResults, ts time.Time) (Results,
evalResults = append(evalResults, r)
}
return evalResults, nil
seenLabels := make(map[string]bool)
for _, res := range evalResults {
labelsStr := res.Instance.String()
_, ok := seenLabels[labelsStr]
if ok {
return Results{
Result{
State: Error,
Instance: res.Instance,
EvaluatedAt: ts,
EvaluationDuration: time.Since(ts),
Error: &invalidEvalResultFormatError{reason: fmt.Sprintf("frame cannot uniquely be identified by its labels: has duplicate results with labels {%s}", labelsStr)},
},
}
}
seenLabels[labelsStr] = true
}
return evalResults
}
// AsDataFrame forms the EvalResults in Frame suitable for displaying in the table panel of the front end.
@ -277,15 +342,9 @@ func (e *Evaluator) ConditionEval(condition *models.Condition, now time.Time, da
alertExecCtx := AlertExecCtx{OrgID: condition.OrgID, Ctx: alertCtx, ExpressionsEnabled: e.Cfg.ExpressionsEnabled}
execResult, err := executeCondition(alertExecCtx, condition, now, dataService)
if err != nil {
return nil, fmt.Errorf("failed to execute conditions: %w", err)
}
execResult := executeCondition(alertExecCtx, condition, now, dataService)
evalResults, err := evaluateExecutionResult(execResult, now)
if err != nil {
return nil, fmt.Errorf("failed to evaluate results: %w", err)
}
evalResults := evaluateExecutionResult(execResult, now)
return evalResults, nil
}

View File

@ -0,0 +1,296 @@
package eval
import (
"fmt"
"testing"
"time"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/stretchr/testify/require"
ptr "github.com/xorcare/pointer"
)
func TestEvaluateExecutionResult(t *testing.T) {
cases := []struct {
desc string
execResults ExecutionResults
expectResultLength int
expectResults Results
}{
{
desc: "zero valued single instance is single Normal state result",
execResults: ExecutionResults{
Results: []*data.Frame{
data.NewFrame("", data.NewField("", nil, []*float64{ptr.Float64(0)})),
},
},
expectResultLength: 1,
expectResults: Results{
{
State: Normal,
},
},
},
{
desc: "non-zero valued single instance is single Alerting state result",
execResults: ExecutionResults{
Results: []*data.Frame{
data.NewFrame("", data.NewField("", nil, []*float64{ptr.Float64(1)})),
},
},
expectResultLength: 1,
expectResults: Results{
{
State: Alerting,
},
},
},
{
desc: "nil value single instance is single a NoData state result",
execResults: ExecutionResults{
Results: []*data.Frame{
data.NewFrame("", data.NewField("", nil, []*float64{nil})),
},
},
expectResultLength: 1,
expectResults: Results{
{
State: NoData,
},
},
},
{
desc: "an execution error produces a single Error state result",
execResults: ExecutionResults{
Error: fmt.Errorf("an execution error"),
},
expectResultLength: 1,
expectResults: Results{
{
State: Error,
Error: fmt.Errorf("an execution error"),
},
},
},
{
desc: "empty results produces a single NoData state result",
execResults: ExecutionResults{},
expectResultLength: 1,
expectResults: Results{
{
State: NoData,
},
},
},
{
desc: "frame with no fields produces a NoData state result",
execResults: ExecutionResults{
Results: []*data.Frame{
data.NewFrame(""),
},
},
expectResultLength: 1,
expectResults: Results{
{
State: NoData,
},
},
},
{
desc: "empty field produces a NoData state result",
execResults: ExecutionResults{
Results: []*data.Frame{
data.NewFrame("", data.NewField("", nil, []*float64{})),
},
},
expectResultLength: 1,
expectResults: Results{
{
State: NoData,
},
},
},
{
desc: "empty field with labels produces a NoData state result with labels",
execResults: ExecutionResults{
Results: []*data.Frame{
data.NewFrame("", data.NewField("", data.Labels{"a": "b"}, []*float64{})),
},
},
expectResultLength: 1,
expectResults: Results{
{
State: NoData,
Instance: data.Labels{"a": "b"},
},
},
},
{
desc: "malformed frame (unequal lengths) produces Error state result",
execResults: ExecutionResults{
Results: []*data.Frame{
data.NewFrame("",
data.NewField("", nil, []*float64{ptr.Float64(23)}),
data.NewField("", nil, []*float64{}),
),
},
},
expectResultLength: 1,
expectResults: Results{
{
State: Error,
Error: fmt.Errorf("invalid format of evaluation results for the alert definition : unable to get frame row length: frame has different field lengths, field 0 is len 1 but field 1 is len 0"),
},
},
},
{
desc: "too many fields produces Error state result",
execResults: ExecutionResults{
Results: []*data.Frame{
data.NewFrame("",
data.NewField("", nil, []*float64{}),
data.NewField("", nil, []*float64{}),
),
},
},
expectResultLength: 1,
expectResults: Results{
{
State: Error,
Error: fmt.Errorf("invalid format of evaluation results for the alert definition : unexpected field length: 2 instead of 1"),
},
},
},
{
desc: "more than one row produces Error state result",
execResults: ExecutionResults{
Results: []*data.Frame{
data.NewFrame("",
data.NewField("", nil, []*float64{ptr.Float64(2), ptr.Float64(3)}),
),
},
},
expectResultLength: 1,
expectResults: Results{
{
State: Error,
Error: fmt.Errorf("invalid format of evaluation results for the alert definition : unexpected row length: 2 instead of 0 or 1"),
},
},
},
{
desc: "time fields (looks like time series) returns error",
execResults: ExecutionResults{
Results: []*data.Frame{
data.NewFrame("",
data.NewField("", nil, []time.Time{}),
),
},
},
expectResultLength: 1,
expectResults: Results{
{
State: Error,
Error: fmt.Errorf("invalid format of evaluation results for the alert definition : looks like time series data, only reduced data can be alerted on."),
},
},
},
{
desc: "non []*float64 field will produce Error state result",
execResults: ExecutionResults{
Results: []*data.Frame{
data.NewFrame("",
data.NewField("", nil, []float64{2}),
),
},
},
expectResultLength: 1,
expectResults: Results{
{
State: Error,
Error: fmt.Errorf("invalid format of evaluation results for the alert definition : invalid field type: []float64"),
},
},
},
{
desc: "duplicate labels produce a single Error state result",
execResults: ExecutionResults{
Results: []*data.Frame{
data.NewFrame("",
data.NewField("", nil, []*float64{ptr.Float64(1)}),
),
data.NewFrame("",
data.NewField("", nil, []*float64{ptr.Float64(2)}),
),
},
},
expectResultLength: 1,
expectResults: Results{
{
State: Error,
Error: fmt.Errorf("invalid format of evaluation results for the alert definition : frame cannot uniquely be identified by its labels: has duplicate results with labels {}"),
},
},
},
{
desc: "error that produce duplicate empty labels produce a single Error state result",
execResults: ExecutionResults{
Results: []*data.Frame{
data.NewFrame("",
data.NewField("", data.Labels{"a": "b"}, []float64{2}),
),
data.NewFrame("",
data.NewField("", nil, []float64{2}),
),
},
},
expectResultLength: 1,
expectResults: Results{
{
State: Error,
Error: fmt.Errorf("invalid format of evaluation results for the alert definition : frame cannot uniquely be identified by its labels: has duplicate results with labels {}"),
},
},
},
{
desc: "certain errors will produce multiple mixed Error and other state results",
execResults: ExecutionResults{
Results: []*data.Frame{
data.NewFrame("",
data.NewField("", nil, []float64{3}),
),
data.NewFrame("",
data.NewField("", data.Labels{"a": "b"}, []*float64{ptr.Float64(2)}),
),
},
},
expectResultLength: 2,
expectResults: Results{
{
State: Error,
Error: fmt.Errorf("invalid format of evaluation results for the alert definition : invalid field type: []float64"),
},
{
State: Alerting,
Instance: data.Labels{"a": "b"},
},
},
},
}
for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
res := evaluateExecutionResult(tc.execResults, time.Time{})
require.Equal(t, tc.expectResultLength, len(res))
for i, r := range res {
require.Equal(t, tc.expectResults[i].State, r.State)
require.Equal(t, tc.expectResults[i].Instance, r.Instance)
if tc.expectResults[i].State == Error {
require.EqualError(t, tc.expectResults[i].Error, r.Error.Error())
}
}
})
}
}