mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Refactor Evaluator (#51673)
* AlertRule to return condition * update ConditionEval to not return an error because it's always nil * make getExprRequest private * refactor executeCondition to just converter and move execution to the ConditionEval as this makes code more readable. * log error if results have errors * change signature of evaluate function to not return an error
This commit is contained in:
parent
2d8a91a846
commit
554ebd647b
@ -55,10 +55,7 @@ func (srv TestingApiSrv) RouteTestGrafanaRuleConfig(c *models.ReqContext, body a
|
||||
now = timeNow()
|
||||
}
|
||||
|
||||
evalResults, err := srv.evaluator.ConditionEval(&evalCond, now)
|
||||
if err != nil {
|
||||
return ErrResp(http.StatusBadRequest, err, "Failed to evaluate conditions")
|
||||
}
|
||||
evalResults := srv.evaluator.ConditionEval(evalCond, now)
|
||||
|
||||
frame := evalResults.AsDataFrame()
|
||||
return response.JSONStreaming(http.StatusOK, util.DynMap{
|
||||
|
@ -69,7 +69,7 @@ func TestRouteTestGrafanaRuleConfig(t *testing.T) {
|
||||
|
||||
evaluator := &eval.FakeEvaluator{}
|
||||
var result []eval.Result
|
||||
evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result, nil)
|
||||
evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result)
|
||||
|
||||
srv := createTestingApiSrv(ds, ac, evaluator)
|
||||
|
||||
@ -109,7 +109,7 @@ func TestRouteTestGrafanaRuleConfig(t *testing.T) {
|
||||
|
||||
evaluator := &eval.FakeEvaluator{}
|
||||
var result []eval.Result
|
||||
evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result, nil)
|
||||
evaluator.EXPECT().ConditionEval(mock.Anything, mock.Anything).Return(result)
|
||||
|
||||
srv := createTestingApiSrv(ds, ac, evaluator)
|
||||
|
||||
|
@ -28,7 +28,7 @@ import (
|
||||
//go:generate mockery --name Evaluator --structname FakeEvaluator --inpackage --filename evaluator_mock.go --with-expecter
|
||||
type Evaluator interface {
|
||||
// ConditionEval executes conditions and evaluates the result.
|
||||
ConditionEval(condition *models.Condition, now time.Time) (Results, error)
|
||||
ConditionEval(condition models.Condition, now time.Time) Results
|
||||
// QueriesAndExpressionsEval executes queries and expressions and returns the result.
|
||||
QueriesAndExpressionsEval(orgID int64, data []models.AlertQuery, now time.Time) (*backend.QueryDataResponse, error)
|
||||
}
|
||||
@ -89,6 +89,15 @@ type ExecutionResults struct {
|
||||
// Results is a slice of evaluated alert instances states.
|
||||
type Results []Result
|
||||
|
||||
func (evalResults Results) HasErrors() bool {
|
||||
for _, r := range evalResults {
|
||||
if r.State == Error {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Result contains the evaluated State of an alert instance
|
||||
// identified by its labels.
|
||||
type Result struct {
|
||||
@ -153,8 +162,8 @@ type AlertExecCtx struct {
|
||||
Ctx context.Context
|
||||
}
|
||||
|
||||
// GetExprRequest validates the condition, gets the datasource information and creates an expr.Request from it.
|
||||
func GetExprRequest(ctx AlertExecCtx, data []models.AlertQuery, now time.Time, dsCacheService datasources.CacheService, secretsService secrets.Service) (*expr.Request, error) {
|
||||
// getExprRequest validates the condition, gets the datasource information and creates an expr.Request from it.
|
||||
func getExprRequest(ctx AlertExecCtx, data []models.AlertQuery, now time.Time, dsCacheService datasources.CacheService, secretsService secrets.Service) (*expr.Request, error) {
|
||||
req := &expr.Request{
|
||||
OrgId: ctx.OrgID,
|
||||
Headers: map[string]string{
|
||||
@ -166,8 +175,7 @@ func GetExprRequest(ctx AlertExecCtx, data []models.AlertQuery, now time.Time, d
|
||||
|
||||
datasources := make(map[string]*datasources.DataSource, len(data))
|
||||
|
||||
for i := range data {
|
||||
q := data[i]
|
||||
for _, q := range data {
|
||||
model, err := q.GetModel()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get query model: %w", err)
|
||||
@ -259,12 +267,7 @@ type NumberValueCapture struct {
|
||||
Value *float64
|
||||
}
|
||||
|
||||
func executeCondition(ctx AlertExecCtx, c *models.Condition, now time.Time, exprService *expr.Service, dsCacheService datasources.CacheService, secretsService secrets.Service) ExecutionResults {
|
||||
execResp, err := executeQueriesAndExpressions(ctx, c.Data, now, exprService, dsCacheService, secretsService)
|
||||
if err != nil {
|
||||
return ExecutionResults{Error: err}
|
||||
}
|
||||
|
||||
func queryDataResponseToExecutionResults(c models.Condition, execResp *backend.QueryDataResponse) ExecutionResults {
|
||||
// eval captures for the '__value_string__' annotation and the Value property of the API response.
|
||||
captures := make([]NumberValueCapture, 0, len(execResp.Responses))
|
||||
captureVal := func(refID string, labels data.Labels, value *float64) {
|
||||
@ -356,7 +359,7 @@ func executeQueriesAndExpressions(ctx AlertExecCtx, data []models.AlertQuery, no
|
||||
}
|
||||
}()
|
||||
|
||||
queryDataReq, err := GetExprRequest(ctx, data, now, dsCacheService, secretsService)
|
||||
queryDataReq, err := getExprRequest(ctx, data, now, dsCacheService, secretsService)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -564,8 +567,6 @@ func (evalResults Results) AsDataFrame() data.Frame {
|
||||
labelColumns = append(labelColumns, k)
|
||||
}
|
||||
|
||||
labelColumns = sort.StringSlice(labelColumns)
|
||||
|
||||
frame := data.NewFrame("evaluation results")
|
||||
for _, lKey := range labelColumns {
|
||||
frame.Fields = append(frame.Fields, data.NewField(lKey, nil, make([]string, fieldLen)))
|
||||
@ -591,16 +592,15 @@ func (evalResults Results) AsDataFrame() data.Frame {
|
||||
}
|
||||
|
||||
// ConditionEval executes conditions and evaluates the result.
|
||||
func (e *evaluatorImpl) ConditionEval(condition *models.Condition, now time.Time) (Results, error) {
|
||||
alertCtx, cancelFn := context.WithTimeout(context.Background(), e.cfg.UnifiedAlerting.EvaluationTimeout)
|
||||
defer cancelFn()
|
||||
|
||||
alertExecCtx := AlertExecCtx{OrgID: condition.OrgID, Ctx: alertCtx, ExpressionsEnabled: e.cfg.ExpressionsEnabled, Log: e.log}
|
||||
|
||||
execResult := executeCondition(alertExecCtx, condition, now, e.expressionService, e.dataSourceCache, e.secretsService)
|
||||
|
||||
evalResults := evaluateExecutionResult(execResult, now)
|
||||
return evalResults, nil
|
||||
func (e *evaluatorImpl) ConditionEval(condition models.Condition, now time.Time) Results {
|
||||
execResp, err := e.QueriesAndExpressionsEval(condition.OrgID, condition.Data, now)
|
||||
var execResults ExecutionResults
|
||||
if err != nil {
|
||||
execResults = ExecutionResults{Error: err}
|
||||
} else {
|
||||
execResults = queryDataResponseToExecutionResults(condition, execResp)
|
||||
}
|
||||
return evaluateExecutionResult(execResults, now)
|
||||
}
|
||||
|
||||
// QueriesAndExpressionsEval executes queries and expressions and returns the result.
|
||||
|
@ -25,11 +25,11 @@ func (_m *FakeEvaluator) EXPECT() *FakeEvaluator_Expecter {
|
||||
}
|
||||
|
||||
// ConditionEval provides a mock function with given fields: condition, now
|
||||
func (_m *FakeEvaluator) ConditionEval(condition *models.Condition, now time.Time) (Results, error) {
|
||||
func (_m *FakeEvaluator) ConditionEval(condition models.Condition, now time.Time) Results {
|
||||
ret := _m.Called(condition, now)
|
||||
|
||||
var r0 Results
|
||||
if rf, ok := ret.Get(0).(func(*models.Condition, time.Time) Results); ok {
|
||||
if rf, ok := ret.Get(0).(func(models.Condition, time.Time) Results); ok {
|
||||
r0 = rf(condition, now)
|
||||
} else {
|
||||
if ret.Get(0) != nil {
|
||||
@ -37,14 +37,7 @@ func (_m *FakeEvaluator) ConditionEval(condition *models.Condition, now time.Tim
|
||||
}
|
||||
}
|
||||
|
||||
var r1 error
|
||||
if rf, ok := ret.Get(1).(func(*models.Condition, time.Time) error); ok {
|
||||
r1 = rf(condition, now)
|
||||
} else {
|
||||
r1 = ret.Error(1)
|
||||
}
|
||||
|
||||
return r0, r1
|
||||
return r0
|
||||
}
|
||||
|
||||
// FakeEvaluator_ConditionEval_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ConditionEval'
|
||||
@ -53,21 +46,21 @@ type FakeEvaluator_ConditionEval_Call struct {
|
||||
}
|
||||
|
||||
// ConditionEval is a helper method to define mock.On call
|
||||
// - condition *models.Condition
|
||||
// - condition models.Condition
|
||||
// - now time.Time
|
||||
func (_e *FakeEvaluator_Expecter) ConditionEval(condition interface{}, now interface{}) *FakeEvaluator_ConditionEval_Call {
|
||||
return &FakeEvaluator_ConditionEval_Call{Call: _e.mock.On("ConditionEval", condition, now)}
|
||||
}
|
||||
|
||||
func (_c *FakeEvaluator_ConditionEval_Call) Run(run func(condition *models.Condition, now time.Time)) *FakeEvaluator_ConditionEval_Call {
|
||||
func (_c *FakeEvaluator_ConditionEval_Call) Run(run func(condition models.Condition, now time.Time)) *FakeEvaluator_ConditionEval_Call {
|
||||
_c.Call.Run(func(args mock.Arguments) {
|
||||
run(args[0].(*models.Condition), args[1].(time.Time))
|
||||
run(args[0].(models.Condition), args[1].(time.Time))
|
||||
})
|
||||
return _c
|
||||
}
|
||||
|
||||
func (_c *FakeEvaluator_ConditionEval_Call) Return(_a0 Results, _a1 error) *FakeEvaluator_ConditionEval_Call {
|
||||
_c.Call.Return(_a0, _a1)
|
||||
func (_c *FakeEvaluator_ConditionEval_Call) Return(_a0 Results) *FakeEvaluator_ConditionEval_Call {
|
||||
_c.Call.Return(_a0)
|
||||
return _c
|
||||
}
|
||||
|
||||
|
@ -170,6 +170,14 @@ func (alertRule *AlertRule) GetLabels(opts ...LabelOption) map[string]string {
|
||||
return labels
|
||||
}
|
||||
|
||||
func (alertRule *AlertRule) GetEvalCondition() Condition {
|
||||
return Condition{
|
||||
Condition: alertRule.Condition,
|
||||
OrgID: alertRule.OrgID,
|
||||
Data: alertRule.Data,
|
||||
}
|
||||
}
|
||||
|
||||
// Diff calculates diff between two alert rules. Returns nil if two rules are equal. Otherwise, returns cmputil.DiffReport
|
||||
func (alertRule *AlertRule) Diff(rule *AlertRule, ignore ...string) cmputil.DiffReport {
|
||||
var reporter cmputil.DiffReporter
|
||||
|
@ -388,32 +388,25 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertR
|
||||
return q.Result, nil
|
||||
}
|
||||
|
||||
evaluate := func(ctx context.Context, r *ngmodels.AlertRule, attempt int64, e *evaluation) error {
|
||||
evaluate := func(ctx context.Context, r *ngmodels.AlertRule, attempt int64, e *evaluation) {
|
||||
logger := logger.New("version", r.Version, "attempt", attempt, "now", e.scheduledAt)
|
||||
start := sch.clock.Now()
|
||||
|
||||
condition := ngmodels.Condition{
|
||||
Condition: r.Condition,
|
||||
OrgID: r.OrgID,
|
||||
Data: r.Data,
|
||||
}
|
||||
results, err := sch.evaluator.ConditionEval(&condition, e.scheduledAt)
|
||||
results := sch.evaluator.ConditionEval(r.GetEvalCondition(), e.scheduledAt)
|
||||
dur := sch.clock.Now().Sub(start)
|
||||
evalTotal.Inc()
|
||||
evalDuration.Observe(dur.Seconds())
|
||||
if err != nil {
|
||||
if results.HasErrors() {
|
||||
evalTotalFailures.Inc()
|
||||
// consider saving alert instance on error
|
||||
logger.Error("failed to evaluate alert rule", "duration", dur, "err", err)
|
||||
return err
|
||||
logger.Error("failed to evaluate alert rule", "results", results, "duration", dur)
|
||||
} else {
|
||||
logger.Debug("alert rule evaluated", "results", results, "duration", dur)
|
||||
}
|
||||
logger.Debug("alert rule evaluated", "results", results, "duration", dur)
|
||||
|
||||
processedStates := sch.stateManager.ProcessEvalResults(ctx, e.scheduledAt, r, results)
|
||||
sch.saveAlertStates(ctx, processedStates)
|
||||
alerts := FromAlertStateToPostableAlerts(processedStates, sch.stateManager, sch.appURL)
|
||||
sch.alertsSender.Send(key, alerts)
|
||||
return nil
|
||||
}
|
||||
|
||||
retryIfError := func(f func(attempt int64) error) error {
|
||||
@ -475,7 +468,8 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertR
|
||||
currentRule = newRule
|
||||
logger.Debug("new alert rule version fetched", "title", newRule.Title, "version", newRule.Version)
|
||||
}
|
||||
return evaluate(grafanaCtx, currentRule, attempt, ctx)
|
||||
evaluate(grafanaCtx, currentRule, attempt, ctx)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
logger.Error("evaluation failed after all retries", "err", err)
|
||||
|
Loading…
Reference in New Issue
Block a user