Alerting: Refactor Evaluator (#51673)

* AlertRule to return condition
* update ConditionEval to not return an error because it's always nil
* make getExprRequest private
* refactor executeCondition to just converter and move execution to the ConditionEval as this makes code more readable.
* log error if results have errors
* change signature of evaluate function to not return an error
This commit is contained in:
Yuriy Tseretyan
2022-07-12 16:51:32 -04:00
committed by GitHub
parent 2d8a91a846
commit 554ebd647b
6 changed files with 51 additions and 59 deletions

View File

@@ -55,10 +55,7 @@ func (srv TestingApiSrv) RouteTestGrafanaRuleConfig(c *models.ReqContext, body a
now = timeNow() now = timeNow()
} }
evalResults, err := srv.evaluator.ConditionEval(&evalCond, now) evalResults := srv.evaluator.ConditionEval(evalCond, now)
if err != nil {
return ErrResp(http.StatusBadRequest, err, "Failed to evaluate conditions")
}
frame := evalResults.AsDataFrame() frame := evalResults.AsDataFrame()
return response.JSONStreaming(http.StatusOK, util.DynMap{ return response.JSONStreaming(http.StatusOK, util.DynMap{

View File

@@ -69,7 +69,7 @@ func TestRouteTestGrafanaRuleConfig(t *testing.T) {
evaluator := &eval.FakeEvaluator{} evaluator := &eval.FakeEvaluator{}
var result []eval.Result var result []eval.Result
evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result, nil) evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result)
srv := createTestingApiSrv(ds, ac, evaluator) srv := createTestingApiSrv(ds, ac, evaluator)
@@ -109,7 +109,7 @@ func TestRouteTestGrafanaRuleConfig(t *testing.T) {
evaluator := &eval.FakeEvaluator{} evaluator := &eval.FakeEvaluator{}
var result []eval.Result var result []eval.Result
evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result, nil) evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result)
srv := createTestingApiSrv(ds, ac, evaluator) srv := createTestingApiSrv(ds, ac, evaluator)

View File

@@ -28,7 +28,7 @@ import (
//go:generate mockery --name Evaluator --structname FakeEvaluator --inpackage --filename evaluator_mock.go --with-expecter //go:generate mockery --name Evaluator --structname FakeEvaluator --inpackage --filename evaluator_mock.go --with-expecter
type Evaluator interface { type Evaluator interface {
// ConditionEval executes conditions and evaluates the result. // ConditionEval executes conditions and evaluates the result.
ConditionEval(condition *models.Condition, now time.Time) (Results, error) ConditionEval(condition models.Condition, now time.Time) Results
// QueriesAndExpressionsEval executes queries and expressions and returns the result. // QueriesAndExpressionsEval executes queries and expressions and returns the result.
QueriesAndExpressionsEval(orgID int64, data []models.AlertQuery, now time.Time) (*backend.QueryDataResponse, error) QueriesAndExpressionsEval(orgID int64, data []models.AlertQuery, now time.Time) (*backend.QueryDataResponse, error)
} }
@@ -89,6 +89,15 @@ type ExecutionResults struct {
// Results is a slice of evaluated alert instances states. // Results is a slice of evaluated alert instances states.
type Results []Result type Results []Result
func (evalResults Results) HasErrors() bool {
for _, r := range evalResults {
if r.State == Error {
return true
}
}
return false
}
// Result contains the evaluated State of an alert instance // Result contains the evaluated State of an alert instance
// identified by its labels. // identified by its labels.
type Result struct { type Result struct {
@@ -153,8 +162,8 @@ type AlertExecCtx struct {
Ctx context.Context Ctx context.Context
} }
// GetExprRequest validates the condition, gets the datasource information and creates an expr.Request from it. // getExprRequest validates the condition, gets the datasource information and creates an expr.Request from it.
func GetExprRequest(ctx AlertExecCtx, data []models.AlertQuery, now time.Time, dsCacheService datasources.CacheService, secretsService secrets.Service) (*expr.Request, error) { func getExprRequest(ctx AlertExecCtx, data []models.AlertQuery, now time.Time, dsCacheService datasources.CacheService, secretsService secrets.Service) (*expr.Request, error) {
req := &expr.Request{ req := &expr.Request{
OrgId: ctx.OrgID, OrgId: ctx.OrgID,
Headers: map[string]string{ Headers: map[string]string{
@@ -166,8 +175,7 @@ func GetExprRequest(ctx AlertExecCtx, data []models.AlertQuery, now time.Time, d
datasources := make(map[string]*datasources.DataSource, len(data)) datasources := make(map[string]*datasources.DataSource, len(data))
for i := range data { for _, q := range data {
q := data[i]
model, err := q.GetModel() model, err := q.GetModel()
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get query model: %w", err) return nil, fmt.Errorf("failed to get query model: %w", err)
@@ -259,12 +267,7 @@ type NumberValueCapture struct {
Value *float64 Value *float64
} }
func executeCondition(ctx AlertExecCtx, c *models.Condition, now time.Time, exprService *expr.Service, dsCacheService datasources.CacheService, secretsService secrets.Service) ExecutionResults { func queryDataResponseToExecutionResults(c models.Condition, execResp *backend.QueryDataResponse) ExecutionResults {
execResp, err := executeQueriesAndExpressions(ctx, c.Data, now, exprService, dsCacheService, secretsService)
if err != nil {
return ExecutionResults{Error: err}
}
// eval captures for the '__value_string__' annotation and the Value property of the API response. // eval captures for the '__value_string__' annotation and the Value property of the API response.
captures := make([]NumberValueCapture, 0, len(execResp.Responses)) captures := make([]NumberValueCapture, 0, len(execResp.Responses))
captureVal := func(refID string, labels data.Labels, value *float64) { captureVal := func(refID string, labels data.Labels, value *float64) {
@@ -356,7 +359,7 @@ func executeQueriesAndExpressions(ctx AlertExecCtx, data []models.AlertQuery, no
} }
}() }()
queryDataReq, err := GetExprRequest(ctx, data, now, dsCacheService, secretsService) queryDataReq, err := getExprRequest(ctx, data, now, dsCacheService, secretsService)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -564,8 +567,6 @@ func (evalResults Results) AsDataFrame() data.Frame {
labelColumns = append(labelColumns, k) labelColumns = append(labelColumns, k)
} }
labelColumns = sort.StringSlice(labelColumns)
frame := data.NewFrame("evaluation results") frame := data.NewFrame("evaluation results")
for _, lKey := range labelColumns { for _, lKey := range labelColumns {
frame.Fields = append(frame.Fields, data.NewField(lKey, nil, make([]string, fieldLen))) frame.Fields = append(frame.Fields, data.NewField(lKey, nil, make([]string, fieldLen)))
@@ -591,16 +592,15 @@ func (evalResults Results) AsDataFrame() data.Frame {
} }
// ConditionEval executes conditions and evaluates the result. // ConditionEval executes conditions and evaluates the result.
func (e *evaluatorImpl) ConditionEval(condition *models.Condition, now time.Time) (Results, error) { func (e *evaluatorImpl) ConditionEval(condition models.Condition, now time.Time) Results {
alertCtx, cancelFn := context.WithTimeout(context.Background(), e.cfg.UnifiedAlerting.EvaluationTimeout) execResp, err := e.QueriesAndExpressionsEval(condition.OrgID, condition.Data, now)
defer cancelFn() var execResults ExecutionResults
if err != nil {
alertExecCtx := AlertExecCtx{OrgID: condition.OrgID, Ctx: alertCtx, ExpressionsEnabled: e.cfg.ExpressionsEnabled, Log: e.log} execResults = ExecutionResults{Error: err}
} else {
execResult := executeCondition(alertExecCtx, condition, now, e.expressionService, e.dataSourceCache, e.secretsService) execResults = queryDataResponseToExecutionResults(condition, execResp)
}
evalResults := evaluateExecutionResult(execResult, now) return evaluateExecutionResult(execResults, now)
return evalResults, nil
} }
// QueriesAndExpressionsEval executes queries and expressions and returns the result. // QueriesAndExpressionsEval executes queries and expressions and returns the result.

View File

@@ -25,11 +25,11 @@ func (_m *FakeEvaluator) EXPECT() *FakeEvaluator_Expecter {
} }
// ConditionEval provides a mock function with given fields: condition, now // ConditionEval provides a mock function with given fields: condition, now
func (_m *FakeEvaluator) ConditionEval(condition *models.Condition, now time.Time) (Results, error) { func (_m *FakeEvaluator) ConditionEval(condition models.Condition, now time.Time) Results {
ret := _m.Called(condition, now) ret := _m.Called(condition, now)
var r0 Results var r0 Results
if rf, ok := ret.Get(0).(func(*models.Condition, time.Time) Results); ok { if rf, ok := ret.Get(0).(func(models.Condition, time.Time) Results); ok {
r0 = rf(condition, now) r0 = rf(condition, now)
} else { } else {
if ret.Get(0) != nil { if ret.Get(0) != nil {
@@ -37,14 +37,7 @@ func (_m *FakeEvaluator) ConditionEval(condition *models.Condition, now time.Tim
} }
} }
var r1 error return r0
if rf, ok := ret.Get(1).(func(*models.Condition, time.Time) error); ok {
r1 = rf(condition, now)
} else {
r1 = ret.Error(1)
}
return r0, r1
} }
// FakeEvaluator_ConditionEval_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ConditionEval' // FakeEvaluator_ConditionEval_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ConditionEval'
@@ -53,21 +46,21 @@ type FakeEvaluator_ConditionEval_Call struct {
} }
// ConditionEval is a helper method to define mock.On call // ConditionEval is a helper method to define mock.On call
// - condition *models.Condition // - condition models.Condition
// - now time.Time // - now time.Time
func (_e *FakeEvaluator_Expecter) ConditionEval(condition interface{}, now interface{}) *FakeEvaluator_ConditionEval_Call { func (_e *FakeEvaluator_Expecter) ConditionEval(condition interface{}, now interface{}) *FakeEvaluator_ConditionEval_Call {
return &FakeEvaluator_ConditionEval_Call{Call: _e.mock.On("ConditionEval", condition, now)} return &FakeEvaluator_ConditionEval_Call{Call: _e.mock.On("ConditionEval", condition, now)}
} }
func (_c *FakeEvaluator_ConditionEval_Call) Run(run func(condition *models.Condition, now time.Time)) *FakeEvaluator_ConditionEval_Call { func (_c *FakeEvaluator_ConditionEval_Call) Run(run func(condition models.Condition, now time.Time)) *FakeEvaluator_ConditionEval_Call {
_c.Call.Run(func(args mock.Arguments) { _c.Call.Run(func(args mock.Arguments) {
run(args[0].(*models.Condition), args[1].(time.Time)) run(args[0].(models.Condition), args[1].(time.Time))
}) })
return _c return _c
} }
func (_c *FakeEvaluator_ConditionEval_Call) Return(_a0 Results, _a1 error) *FakeEvaluator_ConditionEval_Call { func (_c *FakeEvaluator_ConditionEval_Call) Return(_a0 Results) *FakeEvaluator_ConditionEval_Call {
_c.Call.Return(_a0, _a1) _c.Call.Return(_a0)
return _c return _c
} }

View File

@@ -170,6 +170,14 @@ func (alertRule *AlertRule) GetLabels(opts ...LabelOption) map[string]string {
return labels return labels
} }
func (alertRule *AlertRule) GetEvalCondition() Condition {
return Condition{
Condition: alertRule.Condition,
OrgID: alertRule.OrgID,
Data: alertRule.Data,
}
}
// Diff calculates diff between two alert rules. Returns nil if two rules are equal. Otherwise, returns cmputil.DiffReport // Diff calculates diff between two alert rules. Returns nil if two rules are equal. Otherwise, returns cmputil.DiffReport
func (alertRule *AlertRule) Diff(rule *AlertRule, ignore ...string) cmputil.DiffReport { func (alertRule *AlertRule) Diff(rule *AlertRule, ignore ...string) cmputil.DiffReport {
var reporter cmputil.DiffReporter var reporter cmputil.DiffReporter

View File

@@ -388,32 +388,25 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertR
return q.Result, nil return q.Result, nil
} }
evaluate := func(ctx context.Context, r *ngmodels.AlertRule, attempt int64, e *evaluation) error { evaluate := func(ctx context.Context, r *ngmodels.AlertRule, attempt int64, e *evaluation) {
logger := logger.New("version", r.Version, "attempt", attempt, "now", e.scheduledAt) logger := logger.New("version", r.Version, "attempt", attempt, "now", e.scheduledAt)
start := sch.clock.Now() start := sch.clock.Now()
condition := ngmodels.Condition{ results := sch.evaluator.ConditionEval(r.GetEvalCondition(), e.scheduledAt)
Condition: r.Condition,
OrgID: r.OrgID,
Data: r.Data,
}
results, err := sch.evaluator.ConditionEval(&condition, e.scheduledAt)
dur := sch.clock.Now().Sub(start) dur := sch.clock.Now().Sub(start)
evalTotal.Inc() evalTotal.Inc()
evalDuration.Observe(dur.Seconds()) evalDuration.Observe(dur.Seconds())
if err != nil { if results.HasErrors() {
evalTotalFailures.Inc() evalTotalFailures.Inc()
// consider saving alert instance on error logger.Error("failed to evaluate alert rule", "results", results, "duration", dur)
logger.Error("failed to evaluate alert rule", "duration", dur, "err", err) } else {
return err logger.Debug("alert rule evaluated", "results", results, "duration", dur)
} }
logger.Debug("alert rule evaluated", "results", results, "duration", dur)
processedStates := sch.stateManager.ProcessEvalResults(ctx, e.scheduledAt, r, results) processedStates := sch.stateManager.ProcessEvalResults(ctx, e.scheduledAt, r, results)
sch.saveAlertStates(ctx, processedStates) sch.saveAlertStates(ctx, processedStates)
alerts := FromAlertStateToPostableAlerts(processedStates, sch.stateManager, sch.appURL) alerts := FromAlertStateToPostableAlerts(processedStates, sch.stateManager, sch.appURL)
sch.alertsSender.Send(key, alerts) sch.alertsSender.Send(key, alerts)
return nil
} }
retryIfError := func(f func(attempt int64) error) error { retryIfError := func(f func(attempt int64) error) error {
@@ -475,7 +468,8 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertR
currentRule = newRule currentRule = newRule
logger.Debug("new alert rule version fetched", "title", newRule.Title, "version", newRule.Version) logger.Debug("new alert rule version fetched", "title", newRule.Title, "version", newRule.Version)
} }
return evaluate(grafanaCtx, currentRule, attempt, ctx) evaluate(grafanaCtx, currentRule, attempt, ctx)
return nil
}) })
if err != nil { if err != nil {
logger.Error("evaluation failed after all retries", "err", err) logger.Error("evaluation failed after all retries", "err", err)