Alerting: Refactor Evaluator (#51673)

* AlertRule to return condition
* update ConditionEval to not return an error because it's always nil
* make getExprRequest private
* refactor executeCondition to just converter and move execution to the ConditionEval as this makes code more readable.
* log error if results have errors
* change signature of evaluate function to not return an error
This commit is contained in:
Yuriy Tseretyan 2022-07-12 16:51:32 -04:00 committed by GitHub
parent 2d8a91a846
commit 554ebd647b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 51 additions and 59 deletions

View File

@ -55,10 +55,7 @@ func (srv TestingApiSrv) RouteTestGrafanaRuleConfig(c *models.ReqContext, body a
now = timeNow()
}
evalResults, err := srv.evaluator.ConditionEval(&evalCond, now)
if err != nil {
return ErrResp(http.StatusBadRequest, err, "Failed to evaluate conditions")
}
evalResults := srv.evaluator.ConditionEval(evalCond, now)
frame := evalResults.AsDataFrame()
return response.JSONStreaming(http.StatusOK, util.DynMap{

View File

@ -69,7 +69,7 @@ func TestRouteTestGrafanaRuleConfig(t *testing.T) {
evaluator := &eval.FakeEvaluator{}
var result []eval.Result
evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result, nil)
evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result)
srv := createTestingApiSrv(ds, ac, evaluator)
@ -109,7 +109,7 @@ func TestRouteTestGrafanaRuleConfig(t *testing.T) {
evaluator := &eval.FakeEvaluator{}
var result []eval.Result
evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result, nil)
evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result)
srv := createTestingApiSrv(ds, ac, evaluator)

View File

@ -28,7 +28,7 @@ import (
//go:generate mockery --name Evaluator --structname FakeEvaluator --inpackage --filename evaluator_mock.go --with-expecter
type Evaluator interface {
// ConditionEval executes conditions and evaluates the result.
ConditionEval(condition *models.Condition, now time.Time) (Results, error)
ConditionEval(condition models.Condition, now time.Time) Results
// QueriesAndExpressionsEval executes queries and expressions and returns the result.
QueriesAndExpressionsEval(orgID int64, data []models.AlertQuery, now time.Time) (*backend.QueryDataResponse, error)
}
@ -89,6 +89,15 @@ type ExecutionResults struct {
// Results is a slice of evaluated alert instances states.
type Results []Result
func (evalResults Results) HasErrors() bool {
for _, r := range evalResults {
if r.State == Error {
return true
}
}
return false
}
// Result contains the evaluated State of an alert instance
// identified by its labels.
type Result struct {
@ -153,8 +162,8 @@ type AlertExecCtx struct {
Ctx context.Context
}
// GetExprRequest validates the condition, gets the datasource information and creates an expr.Request from it.
func GetExprRequest(ctx AlertExecCtx, data []models.AlertQuery, now time.Time, dsCacheService datasources.CacheService, secretsService secrets.Service) (*expr.Request, error) {
// getExprRequest validates the condition, gets the datasource information and creates an expr.Request from it.
func getExprRequest(ctx AlertExecCtx, data []models.AlertQuery, now time.Time, dsCacheService datasources.CacheService, secretsService secrets.Service) (*expr.Request, error) {
req := &expr.Request{
OrgId: ctx.OrgID,
Headers: map[string]string{
@ -166,8 +175,7 @@ func GetExprRequest(ctx AlertExecCtx, data []models.AlertQuery, now time.Time, d
datasources := make(map[string]*datasources.DataSource, len(data))
for i := range data {
q := data[i]
for _, q := range data {
model, err := q.GetModel()
if err != nil {
return nil, fmt.Errorf("failed to get query model: %w", err)
@ -259,12 +267,7 @@ type NumberValueCapture struct {
Value *float64
}
func executeCondition(ctx AlertExecCtx, c *models.Condition, now time.Time, exprService *expr.Service, dsCacheService datasources.CacheService, secretsService secrets.Service) ExecutionResults {
execResp, err := executeQueriesAndExpressions(ctx, c.Data, now, exprService, dsCacheService, secretsService)
if err != nil {
return ExecutionResults{Error: err}
}
func queryDataResponseToExecutionResults(c models.Condition, execResp *backend.QueryDataResponse) ExecutionResults {
// eval captures for the '__value_string__' annotation and the Value property of the API response.
captures := make([]NumberValueCapture, 0, len(execResp.Responses))
captureVal := func(refID string, labels data.Labels, value *float64) {
@ -356,7 +359,7 @@ func executeQueriesAndExpressions(ctx AlertExecCtx, data []models.AlertQuery, no
}
}()
queryDataReq, err := GetExprRequest(ctx, data, now, dsCacheService, secretsService)
queryDataReq, err := getExprRequest(ctx, data, now, dsCacheService, secretsService)
if err != nil {
return nil, err
}
@ -564,8 +567,6 @@ func (evalResults Results) AsDataFrame() data.Frame {
labelColumns = append(labelColumns, k)
}
labelColumns = sort.StringSlice(labelColumns)
frame := data.NewFrame("evaluation results")
for _, lKey := range labelColumns {
frame.Fields = append(frame.Fields, data.NewField(lKey, nil, make([]string, fieldLen)))
@ -591,16 +592,15 @@ func (evalResults Results) AsDataFrame() data.Frame {
}
// ConditionEval executes conditions and evaluates the result.
func (e *evaluatorImpl) ConditionEval(condition *models.Condition, now time.Time) (Results, error) {
alertCtx, cancelFn := context.WithTimeout(context.Background(), e.cfg.UnifiedAlerting.EvaluationTimeout)
defer cancelFn()
alertExecCtx := AlertExecCtx{OrgID: condition.OrgID, Ctx: alertCtx, ExpressionsEnabled: e.cfg.ExpressionsEnabled, Log: e.log}
execResult := executeCondition(alertExecCtx, condition, now, e.expressionService, e.dataSourceCache, e.secretsService)
evalResults := evaluateExecutionResult(execResult, now)
return evalResults, nil
func (e *evaluatorImpl) ConditionEval(condition models.Condition, now time.Time) Results {
execResp, err := e.QueriesAndExpressionsEval(condition.OrgID, condition.Data, now)
var execResults ExecutionResults
if err != nil {
execResults = ExecutionResults{Error: err}
} else {
execResults = queryDataResponseToExecutionResults(condition, execResp)
}
return evaluateExecutionResult(execResults, now)
}
// QueriesAndExpressionsEval executes queries and expressions and returns the result.

View File

@ -25,11 +25,11 @@ func (_m *FakeEvaluator) EXPECT() *FakeEvaluator_Expecter {
}
// ConditionEval provides a mock function with given fields: condition, now
func (_m *FakeEvaluator) ConditionEval(condition *models.Condition, now time.Time) (Results, error) {
func (_m *FakeEvaluator) ConditionEval(condition models.Condition, now time.Time) Results {
ret := _m.Called(condition, now)
var r0 Results
if rf, ok := ret.Get(0).(func(*models.Condition, time.Time) Results); ok {
if rf, ok := ret.Get(0).(func(models.Condition, time.Time) Results); ok {
r0 = rf(condition, now)
} else {
if ret.Get(0) != nil {
@ -37,14 +37,7 @@ func (_m *FakeEvaluator) ConditionEval(condition *models.Condition, now time.Tim
}
}
var r1 error
if rf, ok := ret.Get(1).(func(*models.Condition, time.Time) error); ok {
r1 = rf(condition, now)
} else {
r1 = ret.Error(1)
}
return r0, r1
return r0
}
// FakeEvaluator_ConditionEval_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ConditionEval'
@ -53,21 +46,21 @@ type FakeEvaluator_ConditionEval_Call struct {
}
// ConditionEval is a helper method to define mock.On call
// - condition *models.Condition
// - condition models.Condition
// - now time.Time
func (_e *FakeEvaluator_Expecter) ConditionEval(condition interface{}, now interface{}) *FakeEvaluator_ConditionEval_Call {
return &FakeEvaluator_ConditionEval_Call{Call: _e.mock.On("ConditionEval", condition, now)}
}
func (_c *FakeEvaluator_ConditionEval_Call) Run(run func(condition *models.Condition, now time.Time)) *FakeEvaluator_ConditionEval_Call {
func (_c *FakeEvaluator_ConditionEval_Call) Run(run func(condition models.Condition, now time.Time)) *FakeEvaluator_ConditionEval_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(*models.Condition), args[1].(time.Time))
run(args[0].(models.Condition), args[1].(time.Time))
})
return _c
}
func (_c *FakeEvaluator_ConditionEval_Call) Return(_a0 Results, _a1 error) *FakeEvaluator_ConditionEval_Call {
_c.Call.Return(_a0, _a1)
func (_c *FakeEvaluator_ConditionEval_Call) Return(_a0 Results) *FakeEvaluator_ConditionEval_Call {
_c.Call.Return(_a0)
return _c
}

View File

@ -170,6 +170,14 @@ func (alertRule *AlertRule) GetLabels(opts ...LabelOption) map[string]string {
return labels
}
func (alertRule *AlertRule) GetEvalCondition() Condition {
return Condition{
Condition: alertRule.Condition,
OrgID: alertRule.OrgID,
Data: alertRule.Data,
}
}
// Diff calculates diff between two alert rules. Returns nil if two rules are equal. Otherwise, returns cmputil.DiffReport
func (alertRule *AlertRule) Diff(rule *AlertRule, ignore ...string) cmputil.DiffReport {
var reporter cmputil.DiffReporter

View File

@ -388,32 +388,25 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertR
return q.Result, nil
}
evaluate := func(ctx context.Context, r *ngmodels.AlertRule, attempt int64, e *evaluation) error {
evaluate := func(ctx context.Context, r *ngmodels.AlertRule, attempt int64, e *evaluation) {
logger := logger.New("version", r.Version, "attempt", attempt, "now", e.scheduledAt)
start := sch.clock.Now()
condition := ngmodels.Condition{
Condition: r.Condition,
OrgID: r.OrgID,
Data: r.Data,
}
results, err := sch.evaluator.ConditionEval(&condition, e.scheduledAt)
results := sch.evaluator.ConditionEval(r.GetEvalCondition(), e.scheduledAt)
dur := sch.clock.Now().Sub(start)
evalTotal.Inc()
evalDuration.Observe(dur.Seconds())
if err != nil {
if results.HasErrors() {
evalTotalFailures.Inc()
// consider saving alert instance on error
logger.Error("failed to evaluate alert rule", "duration", dur, "err", err)
return err
logger.Error("failed to evaluate alert rule", "results", results, "duration", dur)
} else {
logger.Debug("alert rule evaluated", "results", results, "duration", dur)
}
logger.Debug("alert rule evaluated", "results", results, "duration", dur)
processedStates := sch.stateManager.ProcessEvalResults(ctx, e.scheduledAt, r, results)
sch.saveAlertStates(ctx, processedStates)
alerts := FromAlertStateToPostableAlerts(processedStates, sch.stateManager, sch.appURL)
sch.alertsSender.Send(key, alerts)
return nil
}
retryIfError := func(f func(attempt int64) error) error {
@ -475,7 +468,8 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertR
currentRule = newRule
logger.Debug("new alert rule version fetched", "title", newRule.Title, "version", newRule.Version)
}
return evaluate(grafanaCtx, currentRule, attempt, ctx)
evaluate(grafanaCtx, currentRule, attempt, ctx)
return nil
})
if err != nil {
logger.Error("evaluation failed after all retries", "err", err)