mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Refactor Evaluator (#51673)
* AlertRule to return condition * update ConditionEval to not return an error because it's always nil * make getExprRequest private * refactor executeCondition to just converter and move execution to the ConditionEval as this makes code more readable. * log error if results have errors * change signature of evaluate function to not return an error
This commit is contained in:
@@ -55,10 +55,7 @@ func (srv TestingApiSrv) RouteTestGrafanaRuleConfig(c *models.ReqContext, body a
|
|||||||
now = timeNow()
|
now = timeNow()
|
||||||
}
|
}
|
||||||
|
|
||||||
evalResults, err := srv.evaluator.ConditionEval(&evalCond, now)
|
evalResults := srv.evaluator.ConditionEval(evalCond, now)
|
||||||
if err != nil {
|
|
||||||
return ErrResp(http.StatusBadRequest, err, "Failed to evaluate conditions")
|
|
||||||
}
|
|
||||||
|
|
||||||
frame := evalResults.AsDataFrame()
|
frame := evalResults.AsDataFrame()
|
||||||
return response.JSONStreaming(http.StatusOK, util.DynMap{
|
return response.JSONStreaming(http.StatusOK, util.DynMap{
|
||||||
|
|||||||
@@ -69,7 +69,7 @@ func TestRouteTestGrafanaRuleConfig(t *testing.T) {
|
|||||||
|
|
||||||
evaluator := &eval.FakeEvaluator{}
|
evaluator := &eval.FakeEvaluator{}
|
||||||
var result []eval.Result
|
var result []eval.Result
|
||||||
evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result, nil)
|
evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result)
|
||||||
|
|
||||||
srv := createTestingApiSrv(ds, ac, evaluator)
|
srv := createTestingApiSrv(ds, ac, evaluator)
|
||||||
|
|
||||||
@@ -109,7 +109,7 @@ func TestRouteTestGrafanaRuleConfig(t *testing.T) {
|
|||||||
|
|
||||||
evaluator := &eval.FakeEvaluator{}
|
evaluator := &eval.FakeEvaluator{}
|
||||||
var result []eval.Result
|
var result []eval.Result
|
||||||
evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result, nil)
|
evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result)
|
||||||
|
|
||||||
srv := createTestingApiSrv(ds, ac, evaluator)
|
srv := createTestingApiSrv(ds, ac, evaluator)
|
||||||
|
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ import (
|
|||||||
//go:generate mockery --name Evaluator --structname FakeEvaluator --inpackage --filename evaluator_mock.go --with-expecter
|
//go:generate mockery --name Evaluator --structname FakeEvaluator --inpackage --filename evaluator_mock.go --with-expecter
|
||||||
type Evaluator interface {
|
type Evaluator interface {
|
||||||
// ConditionEval executes conditions and evaluates the result.
|
// ConditionEval executes conditions and evaluates the result.
|
||||||
ConditionEval(condition *models.Condition, now time.Time) (Results, error)
|
ConditionEval(condition models.Condition, now time.Time) Results
|
||||||
// QueriesAndExpressionsEval executes queries and expressions and returns the result.
|
// QueriesAndExpressionsEval executes queries and expressions and returns the result.
|
||||||
QueriesAndExpressionsEval(orgID int64, data []models.AlertQuery, now time.Time) (*backend.QueryDataResponse, error)
|
QueriesAndExpressionsEval(orgID int64, data []models.AlertQuery, now time.Time) (*backend.QueryDataResponse, error)
|
||||||
}
|
}
|
||||||
@@ -89,6 +89,15 @@ type ExecutionResults struct {
|
|||||||
// Results is a slice of evaluated alert instances states.
|
// Results is a slice of evaluated alert instances states.
|
||||||
type Results []Result
|
type Results []Result
|
||||||
|
|
||||||
|
func (evalResults Results) HasErrors() bool {
|
||||||
|
for _, r := range evalResults {
|
||||||
|
if r.State == Error {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// Result contains the evaluated State of an alert instance
|
// Result contains the evaluated State of an alert instance
|
||||||
// identified by its labels.
|
// identified by its labels.
|
||||||
type Result struct {
|
type Result struct {
|
||||||
@@ -153,8 +162,8 @@ type AlertExecCtx struct {
|
|||||||
Ctx context.Context
|
Ctx context.Context
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetExprRequest validates the condition, gets the datasource information and creates an expr.Request from it.
|
// getExprRequest validates the condition, gets the datasource information and creates an expr.Request from it.
|
||||||
func GetExprRequest(ctx AlertExecCtx, data []models.AlertQuery, now time.Time, dsCacheService datasources.CacheService, secretsService secrets.Service) (*expr.Request, error) {
|
func getExprRequest(ctx AlertExecCtx, data []models.AlertQuery, now time.Time, dsCacheService datasources.CacheService, secretsService secrets.Service) (*expr.Request, error) {
|
||||||
req := &expr.Request{
|
req := &expr.Request{
|
||||||
OrgId: ctx.OrgID,
|
OrgId: ctx.OrgID,
|
||||||
Headers: map[string]string{
|
Headers: map[string]string{
|
||||||
@@ -166,8 +175,7 @@ func GetExprRequest(ctx AlertExecCtx, data []models.AlertQuery, now time.Time, d
|
|||||||
|
|
||||||
datasources := make(map[string]*datasources.DataSource, len(data))
|
datasources := make(map[string]*datasources.DataSource, len(data))
|
||||||
|
|
||||||
for i := range data {
|
for _, q := range data {
|
||||||
q := data[i]
|
|
||||||
model, err := q.GetModel()
|
model, err := q.GetModel()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to get query model: %w", err)
|
return nil, fmt.Errorf("failed to get query model: %w", err)
|
||||||
@@ -259,12 +267,7 @@ type NumberValueCapture struct {
|
|||||||
Value *float64
|
Value *float64
|
||||||
}
|
}
|
||||||
|
|
||||||
func executeCondition(ctx AlertExecCtx, c *models.Condition, now time.Time, exprService *expr.Service, dsCacheService datasources.CacheService, secretsService secrets.Service) ExecutionResults {
|
func queryDataResponseToExecutionResults(c models.Condition, execResp *backend.QueryDataResponse) ExecutionResults {
|
||||||
execResp, err := executeQueriesAndExpressions(ctx, c.Data, now, exprService, dsCacheService, secretsService)
|
|
||||||
if err != nil {
|
|
||||||
return ExecutionResults{Error: err}
|
|
||||||
}
|
|
||||||
|
|
||||||
// eval captures for the '__value_string__' annotation and the Value property of the API response.
|
// eval captures for the '__value_string__' annotation and the Value property of the API response.
|
||||||
captures := make([]NumberValueCapture, 0, len(execResp.Responses))
|
captures := make([]NumberValueCapture, 0, len(execResp.Responses))
|
||||||
captureVal := func(refID string, labels data.Labels, value *float64) {
|
captureVal := func(refID string, labels data.Labels, value *float64) {
|
||||||
@@ -356,7 +359,7 @@ func executeQueriesAndExpressions(ctx AlertExecCtx, data []models.AlertQuery, no
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
queryDataReq, err := GetExprRequest(ctx, data, now, dsCacheService, secretsService)
|
queryDataReq, err := getExprRequest(ctx, data, now, dsCacheService, secretsService)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -564,8 +567,6 @@ func (evalResults Results) AsDataFrame() data.Frame {
|
|||||||
labelColumns = append(labelColumns, k)
|
labelColumns = append(labelColumns, k)
|
||||||
}
|
}
|
||||||
|
|
||||||
labelColumns = sort.StringSlice(labelColumns)
|
|
||||||
|
|
||||||
frame := data.NewFrame("evaluation results")
|
frame := data.NewFrame("evaluation results")
|
||||||
for _, lKey := range labelColumns {
|
for _, lKey := range labelColumns {
|
||||||
frame.Fields = append(frame.Fields, data.NewField(lKey, nil, make([]string, fieldLen)))
|
frame.Fields = append(frame.Fields, data.NewField(lKey, nil, make([]string, fieldLen)))
|
||||||
@@ -591,16 +592,15 @@ func (evalResults Results) AsDataFrame() data.Frame {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ConditionEval executes conditions and evaluates the result.
|
// ConditionEval executes conditions and evaluates the result.
|
||||||
func (e *evaluatorImpl) ConditionEval(condition *models.Condition, now time.Time) (Results, error) {
|
func (e *evaluatorImpl) ConditionEval(condition models.Condition, now time.Time) Results {
|
||||||
alertCtx, cancelFn := context.WithTimeout(context.Background(), e.cfg.UnifiedAlerting.EvaluationTimeout)
|
execResp, err := e.QueriesAndExpressionsEval(condition.OrgID, condition.Data, now)
|
||||||
defer cancelFn()
|
var execResults ExecutionResults
|
||||||
|
if err != nil {
|
||||||
alertExecCtx := AlertExecCtx{OrgID: condition.OrgID, Ctx: alertCtx, ExpressionsEnabled: e.cfg.ExpressionsEnabled, Log: e.log}
|
execResults = ExecutionResults{Error: err}
|
||||||
|
} else {
|
||||||
execResult := executeCondition(alertExecCtx, condition, now, e.expressionService, e.dataSourceCache, e.secretsService)
|
execResults = queryDataResponseToExecutionResults(condition, execResp)
|
||||||
|
}
|
||||||
evalResults := evaluateExecutionResult(execResult, now)
|
return evaluateExecutionResult(execResults, now)
|
||||||
return evalResults, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// QueriesAndExpressionsEval executes queries and expressions and returns the result.
|
// QueriesAndExpressionsEval executes queries and expressions and returns the result.
|
||||||
|
|||||||
@@ -25,11 +25,11 @@ func (_m *FakeEvaluator) EXPECT() *FakeEvaluator_Expecter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ConditionEval provides a mock function with given fields: condition, now
|
// ConditionEval provides a mock function with given fields: condition, now
|
||||||
func (_m *FakeEvaluator) ConditionEval(condition *models.Condition, now time.Time) (Results, error) {
|
func (_m *FakeEvaluator) ConditionEval(condition models.Condition, now time.Time) Results {
|
||||||
ret := _m.Called(condition, now)
|
ret := _m.Called(condition, now)
|
||||||
|
|
||||||
var r0 Results
|
var r0 Results
|
||||||
if rf, ok := ret.Get(0).(func(*models.Condition, time.Time) Results); ok {
|
if rf, ok := ret.Get(0).(func(models.Condition, time.Time) Results); ok {
|
||||||
r0 = rf(condition, now)
|
r0 = rf(condition, now)
|
||||||
} else {
|
} else {
|
||||||
if ret.Get(0) != nil {
|
if ret.Get(0) != nil {
|
||||||
@@ -37,14 +37,7 @@ func (_m *FakeEvaluator) ConditionEval(condition *models.Condition, now time.Tim
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var r1 error
|
return r0
|
||||||
if rf, ok := ret.Get(1).(func(*models.Condition, time.Time) error); ok {
|
|
||||||
r1 = rf(condition, now)
|
|
||||||
} else {
|
|
||||||
r1 = ret.Error(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
return r0, r1
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// FakeEvaluator_ConditionEval_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ConditionEval'
|
// FakeEvaluator_ConditionEval_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ConditionEval'
|
||||||
@@ -53,21 +46,21 @@ type FakeEvaluator_ConditionEval_Call struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ConditionEval is a helper method to define mock.On call
|
// ConditionEval is a helper method to define mock.On call
|
||||||
// - condition *models.Condition
|
// - condition models.Condition
|
||||||
// - now time.Time
|
// - now time.Time
|
||||||
func (_e *FakeEvaluator_Expecter) ConditionEval(condition interface{}, now interface{}) *FakeEvaluator_ConditionEval_Call {
|
func (_e *FakeEvaluator_Expecter) ConditionEval(condition interface{}, now interface{}) *FakeEvaluator_ConditionEval_Call {
|
||||||
return &FakeEvaluator_ConditionEval_Call{Call: _e.mock.On("ConditionEval", condition, now)}
|
return &FakeEvaluator_ConditionEval_Call{Call: _e.mock.On("ConditionEval", condition, now)}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (_c *FakeEvaluator_ConditionEval_Call) Run(run func(condition *models.Condition, now time.Time)) *FakeEvaluator_ConditionEval_Call {
|
func (_c *FakeEvaluator_ConditionEval_Call) Run(run func(condition models.Condition, now time.Time)) *FakeEvaluator_ConditionEval_Call {
|
||||||
_c.Call.Run(func(args mock.Arguments) {
|
_c.Call.Run(func(args mock.Arguments) {
|
||||||
run(args[0].(*models.Condition), args[1].(time.Time))
|
run(args[0].(models.Condition), args[1].(time.Time))
|
||||||
})
|
})
|
||||||
return _c
|
return _c
|
||||||
}
|
}
|
||||||
|
|
||||||
func (_c *FakeEvaluator_ConditionEval_Call) Return(_a0 Results, _a1 error) *FakeEvaluator_ConditionEval_Call {
|
func (_c *FakeEvaluator_ConditionEval_Call) Return(_a0 Results) *FakeEvaluator_ConditionEval_Call {
|
||||||
_c.Call.Return(_a0, _a1)
|
_c.Call.Return(_a0)
|
||||||
return _c
|
return _c
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -170,6 +170,14 @@ func (alertRule *AlertRule) GetLabels(opts ...LabelOption) map[string]string {
|
|||||||
return labels
|
return labels
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (alertRule *AlertRule) GetEvalCondition() Condition {
|
||||||
|
return Condition{
|
||||||
|
Condition: alertRule.Condition,
|
||||||
|
OrgID: alertRule.OrgID,
|
||||||
|
Data: alertRule.Data,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Diff calculates diff between two alert rules. Returns nil if two rules are equal. Otherwise, returns cmputil.DiffReport
|
// Diff calculates diff between two alert rules. Returns nil if two rules are equal. Otherwise, returns cmputil.DiffReport
|
||||||
func (alertRule *AlertRule) Diff(rule *AlertRule, ignore ...string) cmputil.DiffReport {
|
func (alertRule *AlertRule) Diff(rule *AlertRule, ignore ...string) cmputil.DiffReport {
|
||||||
var reporter cmputil.DiffReporter
|
var reporter cmputil.DiffReporter
|
||||||
|
|||||||
@@ -388,32 +388,25 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertR
|
|||||||
return q.Result, nil
|
return q.Result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
evaluate := func(ctx context.Context, r *ngmodels.AlertRule, attempt int64, e *evaluation) error {
|
evaluate := func(ctx context.Context, r *ngmodels.AlertRule, attempt int64, e *evaluation) {
|
||||||
logger := logger.New("version", r.Version, "attempt", attempt, "now", e.scheduledAt)
|
logger := logger.New("version", r.Version, "attempt", attempt, "now", e.scheduledAt)
|
||||||
start := sch.clock.Now()
|
start := sch.clock.Now()
|
||||||
|
|
||||||
condition := ngmodels.Condition{
|
results := sch.evaluator.ConditionEval(r.GetEvalCondition(), e.scheduledAt)
|
||||||
Condition: r.Condition,
|
|
||||||
OrgID: r.OrgID,
|
|
||||||
Data: r.Data,
|
|
||||||
}
|
|
||||||
results, err := sch.evaluator.ConditionEval(&condition, e.scheduledAt)
|
|
||||||
dur := sch.clock.Now().Sub(start)
|
dur := sch.clock.Now().Sub(start)
|
||||||
evalTotal.Inc()
|
evalTotal.Inc()
|
||||||
evalDuration.Observe(dur.Seconds())
|
evalDuration.Observe(dur.Seconds())
|
||||||
if err != nil {
|
if results.HasErrors() {
|
||||||
evalTotalFailures.Inc()
|
evalTotalFailures.Inc()
|
||||||
// consider saving alert instance on error
|
logger.Error("failed to evaluate alert rule", "results", results, "duration", dur)
|
||||||
logger.Error("failed to evaluate alert rule", "duration", dur, "err", err)
|
} else {
|
||||||
return err
|
logger.Debug("alert rule evaluated", "results", results, "duration", dur)
|
||||||
}
|
}
|
||||||
logger.Debug("alert rule evaluated", "results", results, "duration", dur)
|
|
||||||
|
|
||||||
processedStates := sch.stateManager.ProcessEvalResults(ctx, e.scheduledAt, r, results)
|
processedStates := sch.stateManager.ProcessEvalResults(ctx, e.scheduledAt, r, results)
|
||||||
sch.saveAlertStates(ctx, processedStates)
|
sch.saveAlertStates(ctx, processedStates)
|
||||||
alerts := FromAlertStateToPostableAlerts(processedStates, sch.stateManager, sch.appURL)
|
alerts := FromAlertStateToPostableAlerts(processedStates, sch.stateManager, sch.appURL)
|
||||||
sch.alertsSender.Send(key, alerts)
|
sch.alertsSender.Send(key, alerts)
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
retryIfError := func(f func(attempt int64) error) error {
|
retryIfError := func(f func(attempt int64) error) error {
|
||||||
@@ -475,7 +468,8 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertR
|
|||||||
currentRule = newRule
|
currentRule = newRule
|
||||||
logger.Debug("new alert rule version fetched", "title", newRule.Title, "version", newRule.Version)
|
logger.Debug("new alert rule version fetched", "title", newRule.Title, "version", newRule.Version)
|
||||||
}
|
}
|
||||||
return evaluate(grafanaCtx, currentRule, attempt, ctx)
|
evaluate(grafanaCtx, currentRule, attempt, ctx)
|
||||||
|
return nil
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error("evaluation failed after all retries", "err", err)
|
logger.Error("evaluation failed after all retries", "err", err)
|
||||||
|
|||||||
Reference in New Issue
Block a user