From ad09feed837737c188e37fc6adfead7b8a45715e Mon Sep 17 00:00:00 2001 From: Yuri Tseretyan Date: Wed, 14 Dec 2022 09:44:14 -0500 Subject: [PATCH] Alerting: rule backtesting API (#57318) * Implement backtesting engine that can process regular rule specification (with queries to datasource) as well as special kind of rules that have data frame instead of query. * declare a new API endpoint and model * add feature toggle `alertingBacktesting` --- .../feature-toggles/index.md | 1 + .../src/types/featureToggles.gen.ts | 1 + pkg/services/featuremgmt/registry.go | 5 + pkg/services/featuremgmt/toggles_gen.go | 4 + pkg/services/ngalert/api/api.go | 8 + pkg/services/ngalert/api/api_testing.go | 76 ++++ pkg/services/ngalert/api/authorization.go | 5 + .../ngalert/api/authorization_test.go | 2 +- .../ngalert/api/generated_base_api_testing.go | 19 + pkg/services/ngalert/api/testing_api.go | 4 + pkg/services/ngalert/api/tooling/api.json | 54 +++ .../api/tooling/definitions/testing.go | 35 ++ pkg/services/ngalert/api/tooling/post.json | 77 ++++ pkg/services/ngalert/api/tooling/spec.json | 77 ++++ pkg/services/ngalert/backtesting/engine.go | 158 ++++++++ .../ngalert/backtesting/engine_test.go | 376 ++++++++++++++++++ pkg/services/ngalert/backtesting/eval_data.go | 96 +++++ .../ngalert/backtesting/eval_data_test.go | 295 ++++++++++++++ .../ngalert/backtesting/eval_query.go | 27 ++ .../ngalert/backtesting/eval_query_test.go | 89 +++++ pkg/services/ngalert/ngalert.go | 2 + .../api/alerting/api_backtesting_data.json | 306 ++++++++++++++ .../api/alerting/api_backtesting_test.go | 136 +++++++ pkg/tests/api/alerting/testing.go | 19 + 24 files changed, 1871 insertions(+), 1 deletion(-) create mode 100644 pkg/services/ngalert/backtesting/engine.go create mode 100644 pkg/services/ngalert/backtesting/engine_test.go create mode 100644 pkg/services/ngalert/backtesting/eval_data.go create mode 100644 pkg/services/ngalert/backtesting/eval_data_test.go create mode 100644 pkg/services/ngalert/backtesting/eval_query.go create mode 100644 pkg/services/ngalert/backtesting/eval_query_test.go create mode 100644 pkg/tests/api/alerting/api_backtesting_data.json create mode 100644 pkg/tests/api/alerting/api_backtesting_test.go diff --git a/docs/sources/setup-grafana/configure-grafana/feature-toggles/index.md b/docs/sources/setup-grafana/configure-grafana/feature-toggles/index.md index 82ac02a9ad4..169f0a6b0a0 100644 --- a/docs/sources/setup-grafana/configure-grafana/feature-toggles/index.md +++ b/docs/sources/setup-grafana/configure-grafana/feature-toggles/index.md @@ -92,6 +92,7 @@ Alpha features might be changed or removed without prior notice. | `secureSocksDatasourceProxy` | Enable secure socks tunneling for supported core datasources | | `authnService` | Use new auth service to perform authentication | | `sessionRemoteCache` | Enable using remote cache for user sessions | +| `alertingBacktesting` | Rule backtesting API for alerting | ## Development feature toggles diff --git a/packages/grafana-data/src/types/featureToggles.gen.ts b/packages/grafana-data/src/types/featureToggles.gen.ts index 82d2144285b..a6af682c83d 100644 --- a/packages/grafana-data/src/types/featureToggles.gen.ts +++ b/packages/grafana-data/src/types/featureToggles.gen.ts @@ -85,4 +85,5 @@ export interface FeatureToggles { secureSocksDatasourceProxy?: boolean; authnService?: boolean; sessionRemoteCache?: boolean; + alertingBacktesting?: boolean; } diff --git a/pkg/services/featuremgmt/registry.go b/pkg/services/featuremgmt/registry.go index 48b39b8e897..4e125ec1aee 100644 --- a/pkg/services/featuremgmt/registry.go +++ b/pkg/services/featuremgmt/registry.go @@ -389,5 +389,10 @@ var ( Description: "Enable using remote cache for user sessions", State: FeatureStateAlpha, }, + { + Name: "alertingBacktesting", + Description: "Rule backtesting API for alerting", + State: FeatureStateAlpha, + }, } ) diff --git a/pkg/services/featuremgmt/toggles_gen.go b/pkg/services/featuremgmt/toggles_gen.go index 6c798b7d135..5eba9115943 100644 --- a/pkg/services/featuremgmt/toggles_gen.go +++ b/pkg/services/featuremgmt/toggles_gen.go @@ -282,4 +282,8 @@ const ( // FlagSessionRemoteCache // Enable using remote cache for user sessions FlagSessionRemoteCache = "sessionRemoteCache" + + // FlagAlertingBacktesting + // Rule backtesting API for alerting + FlagAlertingBacktesting = "alertingBacktesting" ) diff --git a/pkg/services/ngalert/api/api.go b/pkg/services/ngalert/api/api.go index ffca5f0c293..3defbc4c995 100644 --- a/pkg/services/ngalert/api/api.go +++ b/pkg/services/ngalert/api/api.go @@ -10,7 +10,9 @@ import ( "github.com/grafana/grafana/pkg/services/accesscontrol" "github.com/grafana/grafana/pkg/services/datasourceproxy" "github.com/grafana/grafana/pkg/services/datasources" + "github.com/grafana/grafana/pkg/services/featuremgmt" apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" + "github.com/grafana/grafana/pkg/services/ngalert/backtesting" "github.com/grafana/grafana/pkg/services/ngalert/eval" "github.com/grafana/grafana/pkg/services/ngalert/metrics" "github.com/grafana/grafana/pkg/services/ngalert/models" @@ -81,6 +83,9 @@ type API struct { AlertRules *provisioning.AlertRuleService AlertsRouter *sender.AlertsRouter EvaluatorFactory eval.EvaluatorFactory + FeatureManager featuremgmt.FeatureToggles + + AppUrl *url.URL } // RegisterAPIEndpoints registers API handlers @@ -126,6 +131,9 @@ func (api *API) RegisterAPIEndpoints(m *metrics.API) { log: logger, accessControl: api.AccessControl, evaluator: api.EvaluatorFactory, + cfg: &api.Cfg.UnifiedAlerting, + backtesting: backtesting.NewEngine(api.AppUrl, api.EvaluatorFactory), + featureManager: api.FeatureManager, }), m) api.RegisterConfigurationApiEndpoints(NewConfiguration( &ConfigSrv{ diff --git a/pkg/services/ngalert/api/api_testing.go b/pkg/services/ngalert/api/api_testing.go index 77ad87403f6..91a3d09a1dd 100644 --- a/pkg/services/ngalert/api/api_testing.go +++ b/pkg/services/ngalert/api/api_testing.go @@ -1,10 +1,12 @@ package api import ( + "errors" "fmt" "net/http" "net/url" "strconv" + "time" "github.com/grafana/grafana-plugin-sdk-go/data" @@ -13,9 +15,12 @@ import ( "github.com/grafana/grafana/pkg/models" "github.com/grafana/grafana/pkg/services/accesscontrol" "github.com/grafana/grafana/pkg/services/datasources" + "github.com/grafana/grafana/pkg/services/featuremgmt" apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" + "github.com/grafana/grafana/pkg/services/ngalert/backtesting" "github.com/grafana/grafana/pkg/services/ngalert/eval" ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models" + "github.com/grafana/grafana/pkg/setting" "github.com/grafana/grafana/pkg/util" ) @@ -25,6 +30,9 @@ type TestingApiSrv struct { log log.Logger accessControl accesscontrol.AccessControl evaluator eval.EvaluatorFactory + cfg *setting.UnifiedAlertingSettings + backtesting *backtesting.Engine + featureManager featuremgmt.FeatureToggles } func (srv TestingApiSrv) RouteTestGrafanaRuleConfig(c *models.ReqContext, body apimodels.TestRulePayload) response.Response { @@ -136,3 +144,71 @@ func (srv TestingApiSrv) RouteEvalQueries(c *models.ReqContext, cmd apimodels.Ev return response.JSONStreaming(http.StatusOK, evalResults) } + +func (srv TestingApiSrv) BacktestAlertRule(c *models.ReqContext, cmd apimodels.BacktestConfig) response.Response { + if !srv.featureManager.IsEnabled(featuremgmt.FlagAlertingBacktesting) { + return ErrResp(http.StatusNotFound, nil, "Backgtesting API is not enabled") + } + + if cmd.From.After(cmd.To) { + return ErrResp(400, nil, "From cannot be greater than To") + } + + noDataState, err := ngmodels.NoDataStateFromString(string(cmd.NoDataState)) + + if err != nil { + return ErrResp(400, err, "") + } + forInterval := time.Duration(cmd.For) + if forInterval < 0 { + return ErrResp(400, nil, "Bad For interval") + } + + intervalSeconds, err := validateInterval(srv.cfg, time.Duration(cmd.Interval)) + if err != nil { + return ErrResp(400, err, "") + } + + if !authorizeDatasourceAccessForRule(&ngmodels.AlertRule{Data: cmd.Data}, func(evaluator accesscontrol.Evaluator) bool { + return accesscontrol.HasAccess(srv.accessControl, c)(accesscontrol.ReqSignedIn, evaluator) + }) { + return errorToResponse(fmt.Errorf("%w to query one or many data sources used by the rule", ErrAuthorization)) + } + + rule := &ngmodels.AlertRule{ + // ID: 0, + // Updated: time.Time{}, + // Version: 0, + // NamespaceUID: "", + // DashboardUID: nil, + // PanelID: nil, + // RuleGroup: "", + // RuleGroupIndex: 0, + // ExecErrState: "", + Title: cmd.Title, + // prefix backtesting- is to distinguish between executions of regular rule and backtesting in logs (like expression engine, evaluator, state manager etc) + UID: "backtesting-" + util.GenerateShortUID(), + OrgID: c.OrgID, + Condition: cmd.Condition, + Data: cmd.Data, + IntervalSeconds: intervalSeconds, + NoDataState: noDataState, + For: forInterval, + Annotations: cmd.Annotations, + Labels: cmd.Labels, + } + + result, err := srv.backtesting.Test(c.Req.Context(), c.SignedInUser, rule, cmd.From, cmd.To) + if err != nil { + if errors.Is(err, backtesting.ErrInvalidInputData) { + return ErrResp(400, err, "Failed to evaluate") + } + return ErrResp(500, err, "Failed to evaluate") + } + + body, err := data.FrameToJSON(result, data.IncludeAll) + if err != nil { + return ErrResp(500, err, "Failed to convert frame to JSON") + } + return response.JSON(http.StatusOK, body) +} diff --git a/pkg/services/ngalert/api/authorization.go b/pkg/services/ngalert/api/authorization.go index a8315c840d2..4dfc8a85ab3 100644 --- a/pkg/services/ngalert/api/authorization.go +++ b/pkg/services/ngalert/api/authorization.go @@ -68,6 +68,11 @@ func (api *API) authorize(method, path string) web.Handler { fallback = middleware.ReqSignedIn // additional authorization is done in the request handler eval = ac.EvalPermission(ac.ActionAlertingRuleRead) + // Grafana Rules Testing Paths + case http.MethodPost + "/api/v1/rule/backtest": + fallback = middleware.ReqSignedIn + // additional authorization is done in the request handler + eval = ac.EvalPermission(ac.ActionAlertingRuleRead) case http.MethodPost + "/api/v1/eval": fallback = middleware.ReqSignedIn // additional authorization is done in the request handler diff --git a/pkg/services/ngalert/api/authorization_test.go b/pkg/services/ngalert/api/authorization_test.go index d8b67290309..b4da61566fd 100644 --- a/pkg/services/ngalert/api/authorization_test.go +++ b/pkg/services/ngalert/api/authorization_test.go @@ -49,7 +49,7 @@ func TestAuthorize(t *testing.T) { } paths[p] = methods } - require.Len(t, paths, 40) + require.Len(t, paths, 41) ac := acmock.New() api := &API{AccessControl: ac} diff --git a/pkg/services/ngalert/api/generated_base_api_testing.go b/pkg/services/ngalert/api/generated_base_api_testing.go index c25842a1e6f..06304ed4983 100644 --- a/pkg/services/ngalert/api/generated_base_api_testing.go +++ b/pkg/services/ngalert/api/generated_base_api_testing.go @@ -19,11 +19,20 @@ import ( ) type TestingApi interface { + BacktestConfig(*models.ReqContext) response.Response RouteEvalQueries(*models.ReqContext) response.Response RouteTestRuleConfig(*models.ReqContext) response.Response RouteTestRuleGrafanaConfig(*models.ReqContext) response.Response } +func (f *TestingApiHandler) BacktestConfig(ctx *models.ReqContext) response.Response { + // Parse Request Body + conf := apimodels.BacktestConfig{} + if err := web.Bind(ctx.Req, &conf); err != nil { + return response.Error(http.StatusBadRequest, "bad request data", err) + } + return f.handleBacktestingConfig(ctx, conf) +} func (f *TestingApiHandler) RouteEvalQueries(ctx *models.ReqContext) response.Response { // Parse Request Body conf := apimodels.EvalQueriesPayload{} @@ -53,6 +62,16 @@ func (f *TestingApiHandler) RouteTestRuleGrafanaConfig(ctx *models.ReqContext) r func (api *API) RegisterTestingApiEndpoints(srv TestingApi, m *metrics.API) { api.RouteRegister.Group("", func(group routing.RouteRegister) { + group.Post( + toMacaronPath("/api/v1/rule/backtest"), + api.authorize(http.MethodPost, "/api/v1/rule/backtest"), + metrics.Instrument( + http.MethodPost, + "/api/v1/rule/backtest", + srv.BacktestConfig, + m, + ), + ) group.Post( toMacaronPath("/api/v1/eval"), api.authorize(http.MethodPost, "/api/v1/eval"), diff --git a/pkg/services/ngalert/api/testing_api.go b/pkg/services/ngalert/api/testing_api.go index 7a84a1e1bf0..b960717f41f 100644 --- a/pkg/services/ngalert/api/testing_api.go +++ b/pkg/services/ngalert/api/testing_api.go @@ -28,3 +28,7 @@ func (f *TestingApiHandler) handleRouteTestRuleGrafanaConfig(c *models.ReqContex func (f *TestingApiHandler) handleRouteEvalQueries(c *models.ReqContext, body apimodels.EvalQueriesPayload) response.Response { return f.svc.RouteEvalQueries(c, body) } + +func (f *TestingApiHandler) handleBacktestingConfig(ctx *models.ReqContext, conf apimodels.BacktestConfig) response.Response { + return f.svc.BacktestAlertRule(ctx, conf) +} diff --git a/pkg/services/ngalert/api/tooling/api.json b/pkg/services/ngalert/api/tooling/api.json index 33533e36e63..1613d8685b5 100644 --- a/pkg/services/ngalert/api/tooling/api.json +++ b/pkg/services/ngalert/api/tooling/api.json @@ -289,6 +289,60 @@ "title": "Authorization contains HTTP authorization credentials.", "type": "object" }, + "BacktestConfig": { + "properties": { + "annotations": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "condition": { + "type": "string" + }, + "data": { + "items": { + "$ref": "#/definitions/AlertQuery" + }, + "type": "array" + }, + "for": { + "$ref": "#/definitions/Duration" + }, + "from": { + "format": "date-time", + "type": "string" + }, + "interval": { + "$ref": "#/definitions/Duration" + }, + "labels": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "no_data_state": { + "enum": [ + "Alerting", + "NoData", + "OK" + ], + "type": "string" + }, + "title": { + "type": "string" + }, + "to": { + "format": "date-time", + "type": "string" + } + }, + "type": "object" + }, + "BacktestResult": { + "$ref": "#/definitions/Frame" + }, "BasicAuth": { "properties": { "password": { diff --git a/pkg/services/ngalert/api/tooling/definitions/testing.go b/pkg/services/ngalert/api/tooling/definitions/testing.go index 99e1aa27c93..92bad3b8e77 100644 --- a/pkg/services/ngalert/api/tooling/definitions/testing.go +++ b/pkg/services/ngalert/api/tooling/definitions/testing.go @@ -6,6 +6,8 @@ import ( "time" "github.com/grafana/grafana-plugin-sdk-go/backend" + "github.com/grafana/grafana-plugin-sdk-go/data" + "github.com/prometheus/common/model" "github.com/prometheus/alertmanager/config" "github.com/prometheus/prometheus/promql" @@ -53,6 +55,19 @@ import ( // Responses: // 200: EvalQueriesResponse +// swagger:route Post /api/v1/rule/backtest testing BacktestConfig +// +// Test rule +// +// Consumes: +// - application/json +// +// Produces: +// - application/json +// +// Responses: +// 200: BacktestResult + // swagger:parameters RouteTestReceiverConfig type TestReceiverRequest struct { // in:body @@ -160,3 +175,23 @@ type Failure ResponseDetails type ResponseDetails struct { Msg string `json:"msg"` } + +// swagger:model +type BacktestConfig struct { + From time.Time `json:"from"` + To time.Time `json:"to"` + Interval model.Duration `json:"interval,omitempty"` + + Condition string `json:"condition"` + Data []models.AlertQuery `json:"data"` // TODO yuri. Create API model for AlertQuery + For model.Duration `json:"for,omitempty"` + + Title string `json:"title"` + Labels map[string]string `json:"labels,omitempty"` + Annotations map[string]string `json:"annotations,omitempty"` + + NoDataState NoDataState `json:"no_data_state"` +} + +// swagger:model +type BacktestResult data.Frame diff --git a/pkg/services/ngalert/api/tooling/post.json b/pkg/services/ngalert/api/tooling/post.json index 1981634ab72..b049cbad482 100644 --- a/pkg/services/ngalert/api/tooling/post.json +++ b/pkg/services/ngalert/api/tooling/post.json @@ -289,6 +289,60 @@ "title": "Authorization contains HTTP authorization credentials.", "type": "object" }, + "BacktestConfig": { + "properties": { + "annotations": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "condition": { + "type": "string" + }, + "data": { + "items": { + "$ref": "#/definitions/AlertQuery" + }, + "type": "array" + }, + "for": { + "$ref": "#/definitions/Duration" + }, + "from": { + "format": "date-time", + "type": "string" + }, + "interval": { + "$ref": "#/definitions/Duration" + }, + "labels": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "no_data_state": { + "enum": [ + "Alerting", + "NoData", + "OK" + ], + "type": "string" + }, + "title": { + "type": "string" + }, + "to": { + "format": "date-time", + "type": "string" + } + }, + "type": "object" + }, + "BacktestResult": { + "$ref": "#/definitions/Frame" + }, "BasicAuth": { "properties": { "password": { @@ -6162,6 +6216,29 @@ ] } }, + "/api/v1/rule/backtest": { + "post": { + "consumes": [ + "application/json" + ], + "description": "Test rule", + "operationId": "BacktestConfig", + "produces": [ + "application/json" + ], + "responses": { + "200": { + "description": "BacktestResult", + "schema": { + "$ref": "#/definitions/BacktestResult" + } + } + }, + "tags": [ + "testing" + ] + } + }, "/api/v1/rule/test/grafana": { "post": { "consumes": [ diff --git a/pkg/services/ngalert/api/tooling/spec.json b/pkg/services/ngalert/api/tooling/spec.json index d88fa6723ce..7013308329d 100644 --- a/pkg/services/ngalert/api/tooling/spec.json +++ b/pkg/services/ngalert/api/tooling/spec.json @@ -2384,6 +2384,29 @@ } } }, + "/api/v1/rule/backtest": { + "post": { + "description": "Test rule", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "testing" + ], + "operationId": "BacktestConfig", + "responses": { + "200": { + "description": "BacktestResult", + "schema": { + "$ref": "#/definitions/BacktestResult" + } + } + } + } + }, "/api/v1/rule/test/grafana": { "post": { "description": "Test a rule against Grafana ruler", @@ -2748,6 +2771,60 @@ } } }, + "BacktestConfig": { + "type": "object", + "properties": { + "annotations": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "condition": { + "type": "string" + }, + "data": { + "type": "array", + "items": { + "$ref": "#/definitions/AlertQuery" + } + }, + "for": { + "$ref": "#/definitions/Duration" + }, + "from": { + "type": "string", + "format": "date-time" + }, + "interval": { + "$ref": "#/definitions/Duration" + }, + "labels": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "no_data_state": { + "type": "string", + "enum": [ + "Alerting", + "NoData", + "OK" + ] + }, + "title": { + "type": "string" + }, + "to": { + "type": "string", + "format": "date-time" + } + } + }, + "BacktestResult": { + "$ref": "#/definitions/Frame" + }, "BasicAuth": { "type": "object", "title": "BasicAuth contains basic HTTP authentication credentials.", diff --git a/pkg/services/ngalert/backtesting/engine.go b/pkg/services/ngalert/backtesting/engine.go new file mode 100644 index 00000000000..dfcf1d3e220 --- /dev/null +++ b/pkg/services/ngalert/backtesting/engine.go @@ -0,0 +1,158 @@ +package backtesting + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "net/url" + "time" + + "github.com/benbjohnson/clock" + "github.com/grafana/grafana-plugin-sdk-go/data" + "github.com/hashicorp/go-multierror" + + "github.com/grafana/grafana/pkg/infra/log" + "github.com/grafana/grafana/pkg/services/ngalert/eval" + "github.com/grafana/grafana/pkg/services/ngalert/models" + "github.com/grafana/grafana/pkg/services/ngalert/state" + "github.com/grafana/grafana/pkg/services/user" +) + +var ( + ErrInvalidInputData = errors.New("invalid input data") + + logger = log.New("ngalert.backtesting.engine") + backtestingEvaluatorFactory = newBacktestingEvaluator +) + +type callbackFunc = func(now time.Time, results eval.Results) error + +type backtestingEvaluator interface { + Eval(ctx context.Context, from, to time.Time, interval time.Duration, callback callbackFunc) error +} + +type stateManager interface { + ProcessEvalResults(ctx context.Context, evaluatedAt time.Time, alertRule *models.AlertRule, results eval.Results, extraLabels data.Labels) []state.StateTransition +} + +type Engine struct { + evalFactory eval.EvaluatorFactory + createStateManager func() stateManager +} + +func NewEngine(appUrl *url.URL, evalFactory eval.EvaluatorFactory) *Engine { + return &Engine{ + evalFactory: evalFactory, + createStateManager: func() stateManager { + return state.NewManager(nil, appUrl, nil, &NoopImageService{}, clock.New(), nil) + }, + } +} + +func (e *Engine) Test(ctx context.Context, user *user.SignedInUser, rule *models.AlertRule, from, to time.Time) (*data.Frame, error) { + ruleCtx := models.WithRuleKey(ctx, rule.GetKey()) + logger := logger.FromContext(ctx) + + if !from.Before(to) { + return nil, fmt.Errorf("%w: invalid interval of the backtesting [%d,%d]", ErrInvalidInputData, from.Unix(), to.Unix()) + } + if to.Sub(from).Seconds() < float64(rule.IntervalSeconds) { + return nil, fmt.Errorf("%w: interval of the backtesting [%d,%d] is less than evaluation interval [%ds]", ErrInvalidInputData, from.Unix(), to.Unix(), rule.IntervalSeconds) + } + length := int(to.Sub(from).Seconds()) / int(rule.IntervalSeconds) + + evaluator, err := backtestingEvaluatorFactory(ruleCtx, e.evalFactory, user, rule.GetEvalCondition()) + if err != nil { + return nil, multierror.Append(ErrInvalidInputData, err) + } + + stateManager := e.createStateManager() + + logger.Info("Start testing alert rule", "from", from, "to", to, "interval", rule.IntervalSeconds, "evaluations", length) + + start := time.Now() + + tsField := data.NewField("Time", nil, make([]time.Time, length)) + valueFields := make(map[string]*data.Field) + + err = evaluator.Eval(ruleCtx, from, to, time.Duration(rule.IntervalSeconds)*time.Second, func(currentTime time.Time, results eval.Results) error { + idx := int(currentTime.Sub(from).Seconds()) / int(rule.IntervalSeconds) + states := stateManager.ProcessEvalResults(ruleCtx, currentTime, rule, results, nil) + tsField.Set(idx, currentTime) + for _, s := range states { + field, ok := valueFields[s.CacheID] + if !ok { + field = data.NewField("", s.Labels, make([]*string, length)) + valueFields[s.CacheID] = field + } + if s.State.State != eval.NoData { // set nil if NoData + value := s.State.State.String() + if s.StateReason != "" { + value += " (" + s.StateReason + ")" + } + field.Set(idx, &value) + continue + } + } + return nil + }) + fields := make([]*data.Field, 0, len(valueFields)+1) + fields = append(fields, tsField) + for _, f := range valueFields { + fields = append(fields, f) + } + result := data.NewFrame("Backtesting results", fields...) + + if err != nil { + return nil, err + } + logger.Info("Rule testing finished successfully", "duration", time.Since(start)) + return result, nil +} + +func newBacktestingEvaluator(ctx context.Context, evalFactory eval.EvaluatorFactory, user *user.SignedInUser, condition models.Condition) (backtestingEvaluator, error) { + for _, q := range condition.Data { + if q.DatasourceUID == "__data__" || q.QueryType == "__data__" { + if len(condition.Data) != 1 { + return nil, errors.New("data queries are not supported with other expressions or data queries") + } + if condition.Condition == "" { + return nil, fmt.Errorf("condition must not be empty and be set to the data query %s", q.RefID) + } + if condition.Condition != q.RefID { + return nil, fmt.Errorf("condition must be set to the data query %s", q.RefID) + } + model := struct { + DataFrame *data.Frame `json:"data"` + }{} + err := json.Unmarshal(q.Model, &model) + if err != nil { + return nil, fmt.Errorf("failed to parse data frame: %w", err) + } + if model.DataFrame == nil { + return nil, errors.New("the data field must not be empty") + } + return newDataEvaluator(condition.Condition, model.DataFrame) + } + } + + evaluator, err := evalFactory.Create(eval.EvaluationContext{Ctx: ctx, + User: user, + }, condition) + + if err != nil { + return nil, err + } + + return &queryEvaluator{ + eval: evaluator, + }, nil +} + +// NoopImageService is a no-op image service. +type NoopImageService struct{} + +func (s *NoopImageService) NewImage(_ context.Context, _ *models.AlertRule) (*models.Image, error) { + return &models.Image{}, nil +} diff --git a/pkg/services/ngalert/backtesting/engine_test.go b/pkg/services/ngalert/backtesting/engine_test.go new file mode 100644 index 00000000000..c1f0896d79b --- /dev/null +++ b/pkg/services/ngalert/backtesting/engine_test.go @@ -0,0 +1,376 @@ +package backtesting + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "math/rand" + "testing" + "time" + + "github.com/grafana/grafana-plugin-sdk-go/data" + "github.com/stretchr/testify/require" + + "github.com/grafana/grafana/pkg/services/ngalert/eval" + "github.com/grafana/grafana/pkg/services/ngalert/eval/eval_mocks" + "github.com/grafana/grafana/pkg/services/ngalert/models" + "github.com/grafana/grafana/pkg/services/ngalert/state" + "github.com/grafana/grafana/pkg/services/user" + "github.com/grafana/grafana/pkg/util" +) + +func TestNewBacktestingEvaluator(t *testing.T) { + t.Run("creates data evaluator", func(t *testing.T) { + frame := GenerateWideSeriesFrame(10, time.Second) + d := struct { + Data *data.Frame `json:"data"` + }{ + Data: frame, + } + validData, err := json.Marshal(d) + require.NoError(t, err) + refID := util.GenerateShortUID() + + evalFactory := eval_mocks.NewEvaluatorFactory(&eval_mocks.ConditionEvaluatorMock{}) + + testCases := []struct { + name string + condition models.Condition + error bool + expectedEval backtestingEvaluator + }{ + { + name: "creates data evaluator when there is one query with type __data__", + condition: models.Condition{ + Condition: refID, + Data: []models.AlertQuery{ + { + RefID: refID, + QueryType: "__data__", + RelativeTimeRange: models.RelativeTimeRange{}, + DatasourceUID: "", + Model: json.RawMessage(validData), + }, + }, + }, + expectedEval: &dataEvaluator{}, + }, + { + name: "creates data evaluator when there is one query with datasource UID __data__", + condition: models.Condition{ + Condition: refID, + Data: []models.AlertQuery{ + { + RefID: refID, + QueryType: "", + RelativeTimeRange: models.RelativeTimeRange{}, + DatasourceUID: "__data__", + Model: json.RawMessage(validData), + }, + }, + }, + expectedEval: &dataEvaluator{}, + }, { + name: "fails if queries contain data and other queries", + condition: models.Condition{ + Condition: refID, + Data: []models.AlertQuery{ + { + RefID: refID, + QueryType: "__data__", + RelativeTimeRange: models.RelativeTimeRange{}, + DatasourceUID: "", + Model: json.RawMessage(validData), + }, + { + RefID: "D", + QueryType: "", + RelativeTimeRange: models.RelativeTimeRange{}, + DatasourceUID: util.GenerateShortUID(), + }, + }, + }, + error: true, + }, + { + name: "fails if data query does not contain data", + condition: models.Condition{ + Condition: refID, + Data: []models.AlertQuery{ + { + RefID: refID, + QueryType: "__data__", + RelativeTimeRange: models.RelativeTimeRange{}, + DatasourceUID: "", + Model: json.RawMessage(nil), + }, + }, + }, + error: true, + }, + { + name: "fails if data query does not contain frame in data", + condition: models.Condition{ + Condition: refID, + Data: []models.AlertQuery{ + { + RefID: refID, + QueryType: "__data__", + RelativeTimeRange: models.RelativeTimeRange{}, + DatasourceUID: "", + Model: json.RawMessage(`{ "data": "test"}`), + }, + }, + }, + error: true, + }, { + name: "fails if condition refID and data refID does not match", + condition: models.Condition{ + Condition: refID, + Data: []models.AlertQuery{ + { + RefID: "B", + QueryType: "__data__", + RelativeTimeRange: models.RelativeTimeRange{}, + DatasourceUID: "", + Model: json.RawMessage(validData), + }, + }, + }, + error: true, + }, + } + + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + e, err := newBacktestingEvaluator(context.Background(), evalFactory, nil, testCase.condition) + if testCase.error { + require.Error(t, err) + return + } + require.NoError(t, err) + require.IsType(t, &dataEvaluator{}, e) + }) + } + }) +} + +func TestEvaluatorTest(t *testing.T) { + states := []eval.State{eval.Normal, eval.Alerting, eval.Pending} + generateState := func(prefix string) *state.State { + return &state.State{ + CacheID: "state-" + prefix, + Labels: models.GenerateAlertLabels(rand.Intn(5)+1, prefix+"-"), + State: states[rand.Intn(len(states))], + } + } + + randomResultCallback := func(now time.Time) (eval.Results, error) { + return eval.GenerateResults(rand.Intn(5)+1, eval.ResultGen()), nil + } + evaluator := &fakeBacktestingEvaluator{ + evalCallback: randomResultCallback, + } + manager := &fakeStateManager{} + + backtestingEvaluatorFactory = func(ctx context.Context, evalFactory eval.EvaluatorFactory, user *user.SignedInUser, condition models.Condition) (backtestingEvaluator, error) { + return evaluator, nil + } + + t.Cleanup(func() { + backtestingEvaluatorFactory = newBacktestingEvaluator + }) + + engine := &Engine{ + evalFactory: nil, + createStateManager: func() stateManager { + return manager + }, + } + rule := models.AlertRuleGen(models.WithInterval(time.Second))() + ruleInterval := time.Duration(rule.IntervalSeconds) * time.Second + + t.Run("should return data frame in specific format", func(t *testing.T) { + from := time.Unix(0, 0) + to := from.Add(5 * ruleInterval) + allStates := [...]eval.State{eval.Normal, eval.Alerting, eval.Pending, eval.NoData, eval.Error} + + var states []state.StateTransition + + for _, s := range allStates { + states = append(states, state.StateTransition{ + State: &state.State{ + CacheID: "state-" + s.String(), + Labels: models.GenerateAlertLabels(rand.Intn(5)+1, s.String()+"-"), + State: s, + StateReason: util.GenerateShortUID(), + }, + }) + } + + manager.stateCallback = func(now time.Time) []state.StateTransition { + return states + } + + frame, err := engine.Test(context.Background(), nil, rule, from, to) + + require.NoError(t, err) + require.Len(t, frame.Fields, len(states)+1) // +1 - timestamp + + t.Run("should contain field Time", func(t *testing.T) { + timestampField, _ := frame.FieldByName("Time") + require.NotNil(t, timestampField, "frame does not contain field 'Time'") + require.Equal(t, data.FieldTypeTime, timestampField.Type()) + }) + + fieldByState := make(map[string]*data.Field, len(states)) + + t.Run("should contain a field per state", func(t *testing.T) { + for _, s := range states { + var f *data.Field + for _, field := range frame.Fields { + if field.Labels.String() == s.Labels.String() { + f = field + break + } + } + require.NotNilf(t, f, "Cannot find a field by state labels") + fieldByState[s.CacheID] = f + } + }) + + t.Run("should be populated with correct values", func(t *testing.T) { + timestampField, _ := frame.FieldByName("Time") + expectedLength := timestampField.Len() + for _, field := range frame.Fields { + require.Equalf(t, expectedLength, field.Len(), "Field %s should have the size %d", field.Name, expectedLength) + } + for i := 0; i < expectedLength; i++ { + expectedTime := from.Add(time.Duration(int64(i)*rule.IntervalSeconds) * time.Second) + require.Equal(t, expectedTime, timestampField.At(i).(time.Time)) + for _, s := range states { + f := fieldByState[s.CacheID] + if s.State.State == eval.NoData { + require.Nil(t, f.At(i)) + } else { + v := f.At(i).(*string) + require.NotNilf(t, v, "Field [%s] value at index %d should not be nil", s.CacheID, i) + require.Equal(t, fmt.Sprintf("%s (%s)", s.State.State, s.StateReason), *v) + } + } + } + }) + }) + + t.Run("should backfill field with nulls if a new dimension created in the middle", func(t *testing.T) { + from := time.Unix(0, 0) + + state1 := state.StateTransition{ + State: generateState("1"), + } + state2 := state.StateTransition{ + State: generateState("2"), + } + state3 := state.StateTransition{ + State: generateState("3"), + } + stateByTime := map[time.Time][]state.StateTransition{ + from: {state1, state2}, + from.Add(1 * ruleInterval): {state1, state2}, + from.Add(2 * ruleInterval): {state1, state2}, + from.Add(3 * ruleInterval): {state1, state2, state3}, + from.Add(4 * ruleInterval): {state1, state2, state3}, + } + to := from.Add(time.Duration(len(stateByTime)) * ruleInterval) + + manager.stateCallback = func(now time.Time) []state.StateTransition { + return stateByTime[now] + } + + frame, err := engine.Test(context.Background(), nil, rule, from, to) + require.NoError(t, err) + + var field3 *data.Field + for _, field := range frame.Fields { + if field.Labels.String() == state3.Labels.String() { + field3 = field + break + } + } + require.NotNilf(t, field3, "Result for state 3 was not found") + require.Equalf(t, len(stateByTime), field3.Len(), "State3 result has unexpected number of values") + + idx := 0 + for curTime, states := range stateByTime { + value := field3.At(idx).(*string) + if len(states) == 2 { + require.Nilf(t, value, "The result should be nil if state3 was not available for time %v", curTime) + } + } + }) + + t.Run("should fail", func(t *testing.T) { + manager.stateCallback = func(now time.Time) []state.StateTransition { + return nil + } + + t.Run("when interval is not correct", func(t *testing.T) { + from := time.Now() + t.Run("when from=to", func(t *testing.T) { + to := from + _, err := engine.Test(context.Background(), nil, rule, from, to) + require.ErrorIs(t, err, ErrInvalidInputData) + }) + t.Run("when from > to", func(t *testing.T) { + to := from.Add(-ruleInterval) + _, err := engine.Test(context.Background(), nil, rule, from, to) + require.ErrorIs(t, err, ErrInvalidInputData) + }) + t.Run("when to-from < interval", func(t *testing.T) { + to := from.Add(ruleInterval).Add(-time.Millisecond) + _, err := engine.Test(context.Background(), nil, rule, from, to) + require.ErrorIs(t, err, ErrInvalidInputData) + }) + }) + + t.Run("when evalution fails", func(t *testing.T) { + expectedError := errors.New("test-error") + evaluator.evalCallback = func(now time.Time) (eval.Results, error) { + return nil, expectedError + } + from := time.Now() + to := from.Add(ruleInterval) + _, err := engine.Test(context.Background(), nil, rule, from, to) + require.ErrorIs(t, err, expectedError) + }) + }) +} + +type fakeStateManager struct { + stateCallback func(now time.Time) []state.StateTransition +} + +func (f *fakeStateManager) ProcessEvalResults(_ context.Context, evaluatedAt time.Time, _ *models.AlertRule, _ eval.Results, _ data.Labels) []state.StateTransition { + return f.stateCallback(evaluatedAt) +} + +type fakeBacktestingEvaluator struct { + evalCallback func(now time.Time) (eval.Results, error) +} + +func (f *fakeBacktestingEvaluator) Eval(_ context.Context, from, to time.Time, interval time.Duration, callback callbackFunc) error { + idx := 0 + for now := from; now.Before(to); now = now.Add(interval) { + results, err := f.evalCallback(now) + if err != nil { + return err + } + err = callback(now, results) + if err != nil { + return err + } + idx++ + } + return nil +} diff --git a/pkg/services/ngalert/backtesting/eval_data.go b/pkg/services/ngalert/backtesting/eval_data.go new file mode 100644 index 00000000000..061a7ca2c55 --- /dev/null +++ b/pkg/services/ngalert/backtesting/eval_data.go @@ -0,0 +1,96 @@ +package backtesting + +import ( + "context" + "errors" + "time" + + "github.com/grafana/grafana-plugin-sdk-go/data" + + "github.com/grafana/grafana/pkg/expr" + "github.com/grafana/grafana/pkg/expr/mathexp" + "github.com/grafana/grafana/pkg/services/ngalert/eval" +) + +// DataEvaluator is evaluator that evaluates data +type dataEvaluator struct { + refID string + data []mathexp.Series + downsampleFunction string + upsampleFunction string +} + +func newDataEvaluator(refID string, frame *data.Frame) (*dataEvaluator, error) { + series, err := expr.WideToMany(frame) + if err != nil { + return nil, err + } + for _, s := range series { + s.SortByTime(false) + } + + return &dataEvaluator{ + refID: refID, + data: series, + downsampleFunction: "last", + upsampleFunction: "pad", + }, nil +} + +func (d *dataEvaluator) Eval(_ context.Context, from, to time.Time, interval time.Duration, callback callbackFunc) error { + var resampled = make([]mathexp.Series, 0, len(d.data)) + + iterations := 0 + for _, s := range d.data { + // making sure the input data frame is aligned with the interval + r, err := s.Resample(d.refID, interval, d.downsampleFunction, d.upsampleFunction, from, to.Add(-interval)) // we want to query [from,to) + if err != nil { + return err + } + resampled = append(resampled, r) + iterations = r.Len() + } + + for i := 0; i < iterations; i++ { + result := make([]eval.Result, 0, len(resampled)) + var now time.Time + for _, series := range resampled { + snow := series.GetTime(i) + if !now.IsZero() && now != snow { // this should not happen because all series' belong to a single data frame + return errors.New("failed to resample input data. timestamps are not aligned") + } + now = snow + value := series.GetValue(i) + var state = eval.Normal + if value == nil { + continue + } else if *value != 0 { + state = eval.Alerting + } + result = append(result, eval.Result{ + Instance: series.GetLabels(), + State: state, + Results: nil, + Values: map[string]eval.NumberValueCapture{ + d.refID: { + Var: d.refID, + Labels: series.GetLabels(), + Value: value, + }, + }, + EvaluatedAt: now, + }) + } + if len(result) == 0 { + result = append(result, eval.Result{ + State: eval.NoData, + EvaluatedAt: now, + }) + } + err := callback(now, result) + if err != nil { + return err + } + } + return nil +} diff --git a/pkg/services/ngalert/backtesting/eval_data_test.go b/pkg/services/ngalert/backtesting/eval_data_test.go new file mode 100644 index 00000000000..bae7611497c --- /dev/null +++ b/pkg/services/ngalert/backtesting/eval_data_test.go @@ -0,0 +1,295 @@ +package backtesting + +import ( + "context" + "errors" + "fmt" + "math/rand" + "testing" + "time" + + "github.com/grafana/grafana-plugin-sdk-go/data" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/grafana/grafana/pkg/services/ngalert/eval" + "github.com/grafana/grafana/pkg/services/ngalert/models" + "github.com/grafana/grafana/pkg/util" +) + +func GenerateWideSeriesFrame(size int, resolution time.Duration) *data.Frame { + fields := make(data.Fields, 0, rand.Intn(4)+2) + fields = append(fields, data.NewField("time", nil, make([]time.Time, size))) + for i := 1; i < cap(fields); i++ { + name := fmt.Sprintf("values-%d", i) + fields = append(fields, data.NewField(name, models.GenerateAlertLabels(rand.Intn(4)+1, name), make([]int64, size))) + } + frame := data.NewFrame("test", fields...) + + tmili := time.Now().UnixMilli() + tmili = tmili - tmili%resolution.Milliseconds() + current := time.UnixMilli(tmili).Add(-resolution * time.Duration(size)) + for i := 0; i < size; i++ { + vals := make([]interface{}, 0, len(frame.Fields)) + vals = append(vals, current) + for i := 1; i < cap(vals); i++ { + vals = append(vals, rand.Int63n(2)-1) // random value [-1,1] + } + frame.SetRow(i, vals...) + current = current.Add(resolution) + } + return frame +} + +func TestDataEvaluator_New(t *testing.T) { + t.Run("should fail if frame is not TimeSeriesTypeWide", func(t *testing.T) { + t.Run("but TimeSeriesTypeNot", func(t *testing.T) { + frameTimeSeriesTypeNot := data.NewFrame("test") + require.Equal(t, data.TimeSeriesTypeNot, frameTimeSeriesTypeNot.TimeSeriesSchema().Type) + _, err := newDataEvaluator(util.GenerateShortUID(), frameTimeSeriesTypeNot) + require.Error(t, err) + }) + t.Run("but TimeSeriesTypeLong", func(t *testing.T) { + frameTimeSeriesTypeLong := data.NewFrame("test", data.NewField("time", nil, make([]time.Time, 0)), data.NewField("data", nil, make([]string, 0)), data.NewField("value", nil, make([]int64, 0))) + require.Equal(t, data.TimeSeriesTypeLong, frameTimeSeriesTypeLong.TimeSeriesSchema().Type) + _, err := newDataEvaluator(util.GenerateShortUID(), frameTimeSeriesTypeLong) + require.Error(t, err) + }) + }) + + t.Run("should convert fame to series and sort it", func(t *testing.T) { + refID := util.GenerateShortUID() + frameSize := rand.Intn(100) + 100 + frame := GenerateWideSeriesFrame(frameSize, time.Second) + rand.Shuffle(frameSize, func(i, j int) { + rowi := frame.RowCopy(i) + rowj := frame.RowCopy(j) + frame.SetRow(i, rowj...) + frame.SetRow(j, rowi...) + }) + e, err := newDataEvaluator(refID, frame) + require.NoError(t, err) + require.Equal(t, refID, e.refID) + require.Len(t, e.data, len(frame.Fields)-1) // timestamp is not counting + for idx, series := range e.data { + assert.Equalf(t, series.Len(), frameSize, "Length of the series %d is %d but expected to be %d", idx, series.Len(), frameSize) + assert.Equalf(t, frame.Fields[idx+1].Labels, series.GetLabels(), "Labels of series %d does not match with original field labels", idx) + assert.Lessf(t, series.GetTime(0), series.GetTime(1), "Series %d is expected to be sorted in ascending order", idx) + } + }) +} + +func TestDataEvaluator_Eval(t *testing.T) { + type results struct { + time time.Time + results eval.Results + } + + refID := util.GenerateShortUID() + frameSize := rand.Intn(100) + 100 + frame := GenerateWideSeriesFrame(frameSize, time.Second) + from := frame.At(0, 0).(time.Time) + to := frame.At(0, frame.Rows()-1).(time.Time) + evaluator, err := newDataEvaluator(refID, frame) + require.NoErrorf(t, err, "Frame %v", frame) + + t.Run("should use data points when frame resolution matches evaluation interval", func(t *testing.T) { + r := make([]results, 0, frame.Rows()) + + invterval := time.Second + + resultsCount := int(to.Sub(from).Seconds() / invterval.Seconds()) + + err = evaluator.Eval(context.Background(), from, to, time.Second, func(now time.Time, res eval.Results) error { + r = append(r, results{ + now, res, + }) + return nil + }) + require.NoError(t, err) + + require.Len(t, r, resultsCount) + + t.Run("results should be in the same refID", func(t *testing.T) { + for _, res := range r { + for _, result := range res.results { + require.Contains(t, result.Values, refID) + } + } + }) + + t.Run("should be Alerting if value is not 0", func(t *testing.T) { + for _, res := range r { + for _, result := range res.results { + v := result.Values[refID].Value + require.NotNil(t, v) + if *v == 0 { + require.Equalf(t, eval.Normal, result.State, "Result value is %d", *v) + } else { + require.Equalf(t, eval.Alerting, result.State, "Result value is %d", *v) + } + } + } + }) + + t.Run("results should be in ascending order", func(t *testing.T) { + var prev = results{} + for i := 0; i < len(r); i++ { + current := r[i] + if i > 0 { + require.Less(t, prev.time, current.time) + } else { + require.Equal(t, from, current.time) + } + prev = current + } + }) + + t.Run("results should be in the same order as fields in frame", func(t *testing.T) { + for i := 0; i < len(r); i++ { + current := r[i] + for idx, result := range current.results { + field := frame.Fields[idx+1] + require.Equal(t, field.Labels, result.Instance) + expected, err := field.FloatAt(i) + require.NoError(t, err) + require.EqualValues(t, expected, *result.Values[refID].Value) + } + } + }) + }) + t.Run("when frame resolution does not match evaluation interval", func(t *testing.T) { + t.Run("should closest timestamp if interval is smaller than frame resolution", func(t *testing.T) { + interval := 300 * time.Millisecond + size := to.Sub(from).Milliseconds() / interval.Milliseconds() + r := make([]results, 0, size) + + err = evaluator.Eval(context.Background(), from, to, interval, func(now time.Time, res eval.Results) error { + r = append(r, results{ + now, res, + }) + return nil + }) + + currentRowIdx := 0 + nextTime := frame.At(0, currentRowIdx+1).(time.Time) + for id, current := range r { + if !current.time.Before(nextTime) { + currentRowIdx++ + if frame.Rows() > currentRowIdx+1 { + nextTime = frame.At(0, currentRowIdx+1).(time.Time) + } + } + for idx, result := range current.results { + field := frame.Fields[idx+1] + require.Equal(t, field.Labels, result.Instance) + expected, err := field.FloatAt(currentRowIdx) + require.NoError(t, err) + require.EqualValuesf(t, expected, *result.Values[refID].Value, "Time %d", id) + } + } + }) + + t.Run("should downscale series if interval is smaller using previous value", func(t *testing.T) { + interval := 5 * time.Second + size := int(to.Sub(from).Seconds() / interval.Seconds()) + r := make([]results, 0, size) + + err = evaluator.Eval(context.Background(), from, to, interval, func(now time.Time, res eval.Results) error { + r = append(r, results{ + now, res, + }) + return nil + }) + + currentRowIdx := 0 + var frameDate time.Time + for resultNum, current := range r { + for i := currentRowIdx; i < frame.Rows(); i++ { + d := frame.At(0, i).(time.Time) + if d.Equal(current.time) { + currentRowIdx = i + frameDate = d + break + } + if d.After(current.time) { + require.Fail(t, "Interval is not aligned") + } + } + for idx, result := range current.results { + field := frame.Fields[idx+1] + require.Equal(t, field.Labels, result.Instance) + expected, err := field.FloatAt(currentRowIdx) + require.NoError(t, err) + require.EqualValuesf(t, expected, *result.Values[refID].Value, "Current time [%v] frame time [%v]. Result #%d", current.time, frameDate, resultNum) + } + } + }) + }) + t.Run("when eval interval is larger than data", func(t *testing.T) { + t.Run("should be noData until the frame interval", func(t *testing.T) { + newFrom := from.Add(-10 * time.Second) + r := make([]results, 0, int(to.Sub(newFrom).Seconds())) + err = evaluator.Eval(context.Background(), newFrom, to, time.Second, func(now time.Time, res eval.Results) error { + r = append(r, results{ + now, res, + }) + return nil + }) + + rowIdx := 0 + for _, current := range r { + if current.time.Before(from) { + require.Len(t, current.results, 1) + require.Equal(t, eval.NoData, current.results[0].State) + } else { + for idx, result := range current.results { + field := frame.Fields[idx+1] + require.Equal(t, field.Labels, result.Instance) + expected, err := field.FloatAt(rowIdx) + require.NoError(t, err) + require.EqualValues(t, expected, *result.Values[refID].Value) + } + rowIdx++ + } + } + }) + + t.Run("should be the last value after the frame interval", func(t *testing.T) { + newTo := to.Add(10 * time.Second) + r := make([]results, 0, int(newTo.Sub(from).Seconds())) + err = evaluator.Eval(context.Background(), from, newTo, time.Second, func(now time.Time, res eval.Results) error { + r = append(r, results{ + now, res, + }) + return nil + }) + + rowIdx := 0 + for _, current := range r { + for idx, result := range current.results { + field := frame.Fields[idx+1] + require.Equal(t, field.Labels, result.Instance) + expected, err := field.FloatAt(rowIdx) + require.NoError(t, err) + require.EqualValues(t, expected, *result.Values[refID].Value) + } + if current.time.Before(to) { + rowIdx++ + } + } + }) + }) + t.Run("should stop if callback error", func(t *testing.T) { + expectedError := errors.New("error") + evals := 0 + err = evaluator.Eval(context.Background(), from, to, time.Second, func(now time.Time, res eval.Results) error { + if evals > 5 { + return expectedError + } + evals++ + return nil + }) + require.ErrorIs(t, err, expectedError) + }) +} diff --git a/pkg/services/ngalert/backtesting/eval_query.go b/pkg/services/ngalert/backtesting/eval_query.go new file mode 100644 index 00000000000..06c71c5b415 --- /dev/null +++ b/pkg/services/ngalert/backtesting/eval_query.go @@ -0,0 +1,27 @@ +package backtesting + +import ( + "context" + "time" + + "github.com/grafana/grafana/pkg/services/ngalert/eval" +) + +// QueryEvaluator is evaluator of regular alert rule queries +type queryEvaluator struct { + eval eval.ConditionEvaluator +} + +func (d *queryEvaluator) Eval(ctx context.Context, from, to time.Time, interval time.Duration, callback callbackFunc) error { + for now := from; now.Before(to); now = now.Add(interval) { + results, err := d.eval.Evaluate(ctx, now) + if err != nil { + return err + } + err = callback(now, results) + if err != nil { + return err + } + } + return nil +} diff --git a/pkg/services/ngalert/backtesting/eval_query_test.go b/pkg/services/ngalert/backtesting/eval_query_test.go new file mode 100644 index 00000000000..92369b8e4e6 --- /dev/null +++ b/pkg/services/ngalert/backtesting/eval_query_test.go @@ -0,0 +1,89 @@ +package backtesting + +import ( + "context" + "errors" + "math/rand" + "testing" + "time" + + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + + "github.com/grafana/grafana/pkg/services/ngalert/eval" + "github.com/grafana/grafana/pkg/services/ngalert/eval/eval_mocks" +) + +func TestQueryEvaluator_Eval(t *testing.T) { + ctx := context.Background() + interval := time.Duration(rand.Int63n(9)+1) * time.Second + times := rand.Intn(11) + 5 + to := time.Now() + from := to.Add(-time.Duration(times) * interval) + + t.Run("should evaluate query", func(t *testing.T) { + m := &eval_mocks.ConditionEvaluatorMock{} + expectedResults := eval.Results{} + m.EXPECT().Evaluate(mock.Anything, mock.Anything).Return(expectedResults, nil) + evaluator := queryEvaluator{ + eval: m, + } + + intervals := make([]time.Time, 0, times) + + err := evaluator.Eval(ctx, from, to, interval, func(now time.Time, results eval.Results) error { + intervals = append(intervals, now) + return nil + }) + require.NoError(t, err) + require.Len(t, intervals, times) + + m.AssertNumberOfCalls(t, "Evaluate", times) + for _, now := range intervals { + m.AssertCalled(t, "Evaluate", ctx, now) + } + }) + + t.Run("should stop evaluation if error", func(t *testing.T) { + t.Run("when evaluation fails", func(t *testing.T) { + m := &eval_mocks.ConditionEvaluatorMock{} + expectedResults := eval.Results{} + expectedError := errors.New("test") + m.EXPECT().Evaluate(mock.Anything, mock.Anything).Return(expectedResults, nil).Times(3) + m.EXPECT().Evaluate(mock.Anything, mock.Anything).Return(nil, expectedError).Once() + evaluator := queryEvaluator{ + eval: m, + } + + intervals := make([]time.Time, 0, times) + + err := evaluator.Eval(ctx, from, to, interval, func(now time.Time, results eval.Results) error { + intervals = append(intervals, now) + return nil + }) + require.ErrorIs(t, err, expectedError) + require.Len(t, intervals, 3) + }) + + t.Run("when callback fails", func(t *testing.T) { + m := &eval_mocks.ConditionEvaluatorMock{} + expectedResults := eval.Results{} + expectedError := errors.New("test") + m.EXPECT().Evaluate(mock.Anything, mock.Anything).Return(expectedResults, nil) + evaluator := queryEvaluator{ + eval: m, + } + + intervals := make([]time.Time, 0, times) + + err := evaluator.Eval(ctx, from, to, interval, func(now time.Time, results eval.Results) error { + if len(intervals) > 3 { + return expectedError + } + intervals = append(intervals, now) + return nil + }) + require.ErrorIs(t, err, expectedError) + }) + }) +} diff --git a/pkg/services/ngalert/ngalert.go b/pkg/services/ngalert/ngalert.go index 12d4cc33f84..b3f6f783afc 100644 --- a/pkg/services/ngalert/ngalert.go +++ b/pkg/services/ngalert/ngalert.go @@ -244,6 +244,8 @@ func (ng *AlertNG) init() error { AlertRules: alertRuleService, AlertsRouter: alertsRouter, EvaluatorFactory: evalFactory, + FeatureManager: ng.FeatureToggles, + AppUrl: appUrl, } api.RegisterAPIEndpoints(ng.Metrics.GetAPIMetrics()) diff --git a/pkg/tests/api/alerting/api_backtesting_data.json b/pkg/tests/api/alerting/api_backtesting_data.json new file mode 100644 index 00000000000..f3a6cef0964 --- /dev/null +++ b/pkg/tests/api/alerting/api_backtesting_data.json @@ -0,0 +1,306 @@ +{ + "data": { + "from": "2022-10-19T18:44:00Z", + "to": "2022-10-19T19:44:00Z", + "interval": "1m", + "for": "0", + "labels": { + "templatable-label": "test" + }, + "annotations": { + "anno-test": "test" + }, + "condition": "A", + "no_data_state": "Alerting", + "data": [ + { + "refId": "A", + "queryType": "", + "datasourceUid": "__data__", + "model": { + "data": { + "schema": { + "name": "A-series", + "refId": "A", + "fields": [ + { + "name": "Time", + "type": "time", + "typeInfo": { + "frame": "time.Time", + "nullable": true + } + }, + { + "name": "A-series", + "type": "number", + "typeInfo": { + "frame": "float64", + "nullable": true + }, + "labels": { + "label": "2", + "test": "1" + } + } + ] + }, + "data": { + "values": [ + [ + 1666205040000, + 1666205100000, + 1666205160000, + 1666205220000, + 1666205280000, + 1666205340000, + 1666205400000, + 1666205460000, + 1666205520000, + 1666205580000, + 1666205640000, + 1666205700000, + 1666205760000, + 1666205820000, + 1666205880000, + 1666205940000, + 1666206000000, + 1666206060000, + 1666206120000, + 1666206180000, + 1666206240000, + 1666206300000, + 1666206360000, + 1666206420000, + 1666206480000, + 1666206540000, + 1666206600000, + 1666206660000, + 1666206720000, + 1666206780000, + 1666206840000, + 1666206900000, + 1666206960000, + 1666207020000, + 1666207080000, + 1666207140000, + 1666207200000, + 1666207260000, + 1666207320000, + 1666207380000, + 1666207440000, + 1666207500000, + 1666207560000, + 1666207620000, + 1666207680000, + 1666207740000, + 1666207800000, + 1666207860000, + 1666207920000, + 1666207980000, + 1666208040000, + 1666208100000, + 1666208160000, + 1666208220000, + 1666208280000, + 1666208340000, + 1666208400000, + 1666208460000, + 1666208520000, + 1666208580000, + 1666208640000 + ], + [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ] + ] + } + } + } + } + ] + }, + "query": { + "from": "2022-10-19T18:44:00Z", + "to": "2022-10-19T19:44:00Z", + "interval": "1m", + "for": "5m", + "labels": { + "templatable-label": "TMPL-{{.Labels.state}}" + }, + "annotations": { + "anno-test": "TMPL-ANNO-{{.Labels.state}}" + }, + "condition": "C", + "no_data_state": "Alerting", + "data": [ + { + "refId": "A", + "datasourceUid": "testdata", + "queryType": "", + "relativeTimeRange": { + "from": 600, + "to": 0 + }, + "model": { + "refId": "A", + "hide": false, + "scenarioId": "usa", + "usa": { + "mode": "timeseries", + "period": "1m", + "states": [ + "GA", "FL", "AL", "AZ" + ], + "fields": [ + "baz" + ] + } + } + }, + { + "refId": "B", + "datasourceUid": "-100", + "queryType": "", + "model": { + "refId": "B", + "hide": false, + "type": "reduce", + "datasource": { + "uid": "-100", + "type": "__expr__" + }, + "conditions": [ + { + "type": "query", + "evaluator": { + "params": [], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "B" + ] + }, + "reducer": { + "params": [], + "type": "last" + } + } + ], + "reducer": "last", + "expression": "A" + }, + "relativeTimeRange": { + "from": 600, + "to": 0 + } + }, + { + "refId": "C", + "datasourceUid": "-100", + "queryType": "", + "model": { + "refId": "C", + "hide": false, + "type": "threshold", + "datasource": { + "uid": "-100", + "type": "__expr__" + }, + "conditions": [ + { + "type": "query", + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "C" + ] + }, + "reducer": { + "params": [], + "type": "last" + } + } + ], + "expression": "B" + }, + "relativeTimeRange": { + "from": 600, + "to": 0 + } + } + ] + } +} \ No newline at end of file diff --git a/pkg/tests/api/alerting/api_backtesting_test.go b/pkg/tests/api/alerting/api_backtesting_test.go new file mode 100644 index 00000000000..114b14f9dd8 --- /dev/null +++ b/pkg/tests/api/alerting/api_backtesting_test.go @@ -0,0 +1,136 @@ +package alerting + +import ( + "context" + "encoding/json" + "net/http" + "os" + "path/filepath" + "testing" + + "github.com/grafana/grafana-plugin-sdk-go/data" + "github.com/stretchr/testify/require" + + "github.com/grafana/grafana/pkg/services/accesscontrol" + "github.com/grafana/grafana/pkg/services/accesscontrol/resourcepermissions" + "github.com/grafana/grafana/pkg/services/datasources" + "github.com/grafana/grafana/pkg/services/featuremgmt" + apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" + "github.com/grafana/grafana/pkg/services/org" + "github.com/grafana/grafana/pkg/services/user" + "github.com/grafana/grafana/pkg/setting" + "github.com/grafana/grafana/pkg/tests/testinfra" +) + +func TestBacktesting(t *testing.T) { + dir, path := testinfra.CreateGrafDir(t, testinfra.GrafanaOpts{ + DisableLegacyAlerting: true, + EnableUnifiedAlerting: true, + DisableAnonymous: true, + AppModeProduction: true, + EnableFeatureToggles: []string{ + featuremgmt.FlagAlertingBacktesting, + }, + EnableLog: false, + }) + + grafanaListedAddr, env := testinfra.StartGrafanaEnv(t, dir, path) + + userId := createUser(t, env.SQLStore, user.CreateUserCommand{ + DefaultOrgRole: string(org.RoleAdmin), + Password: "admin", + Login: "admin", + }) + + apiCli := newAlertingApiClient(grafanaListedAddr, "admin", "admin") + + input, err := os.ReadFile(filepath.Join("api_backtesting_data.json")) + require.NoError(t, err) + var testData map[string]apimodels.BacktestConfig + require.NoError(t, json.Unmarshal(input, &testData)) + + queryRequest, ok := testData["query"] + require.Truef(t, ok, "The data file does not contain a field `query`") + + for _, query := range queryRequest.Data { + isExpr, _ := query.IsExpression() + if isExpr { + continue + } + t.Logf("Creating a new test data source with UID %s", query.DatasourceUID) + dsCmd := &datasources.AddDataSourceCommand{ + Name: "Backtesting-TestDatasource", + Type: "testdata", + Access: datasources.DS_ACCESS_PROXY, + Uid: query.DatasourceUID, + UserId: userId, + OrgId: 1, + } + err := env.Server.HTTPServer.DataSourcesService.AddDataSource(context.Background(), dsCmd) + require.NoError(t, err) + break + } + + t.Run("and request contains data", func(t *testing.T) { + t.Run("should accept request", func(t *testing.T) { + request, ok := testData["data"] + require.Truef(t, ok, "The data file does not contain a field `data`") + + status, body := apiCli.SubmitRuleForBacktesting(t, request) + require.Equal(t, http.StatusOK, status) + var result data.Frame + require.NoErrorf(t, json.Unmarshal([]byte(body), &result), "cannot parse response to data frame") + }) + }) + + t.Run("and request contains query", func(t *testing.T) { + t.Run("should accept request with query", func(t *testing.T) { + status, body := apiCli.SubmitRuleForBacktesting(t, queryRequest) + require.Equalf(t, http.StatusOK, status, "Response: %s", body) + var result data.Frame + require.NoErrorf(t, json.Unmarshal([]byte(body), &result), "cannot parse response to data frame") + }) + }) + + t.Run("if user does not have permissions", func(t *testing.T) { + if !setting.IsEnterprise { + t.Skip("Enterprise-only test") + } + + testUserId := createUser(t, env.SQLStore, user.CreateUserCommand{ + DefaultOrgRole: "", + Password: "test", + Login: "test", + }) + + testUserApiCli := newAlertingApiClient(grafanaListedAddr, "test", "test") + + t.Run("fail if can't read rules", func(t *testing.T) { + status, body := testUserApiCli.SubmitRuleForBacktesting(t, queryRequest) + require.Contains(t, body, accesscontrol.ActionAlertingRuleRead) + require.Equalf(t, http.StatusForbidden, status, "Response: %s", body) + }) + + // access control permissions store + permissionsStore := resourcepermissions.NewStore(env.SQLStore) + _, err := permissionsStore.SetUserResourcePermission(context.Background(), + accesscontrol.GlobalOrgID, + accesscontrol.User{ID: testUserId}, + resourcepermissions.SetResourcePermissionCommand{ + Actions: []string{ + accesscontrol.ActionAlertingRuleRead, + }, + Resource: "folders", + ResourceID: "*", + ResourceAttribute: "uid", + }, nil) + require.NoError(t, err) + testUserApiCli.ReloadCachedPermissions(t) + + t.Run("fail if can't query data sources", func(t *testing.T) { + status, body := testUserApiCli.SubmitRuleForBacktesting(t, queryRequest) + require.Contains(t, body, "user is not authorized to query one or many data sources used by the rule") + require.Equalf(t, http.StatusUnauthorized, status, "Response: %s", body) + }) + }) +} diff --git a/pkg/tests/api/alerting/testing.go b/pkg/tests/api/alerting/testing.go index f13443b5d2d..2e54b57b929 100644 --- a/pkg/tests/api/alerting/testing.go +++ b/pkg/tests/api/alerting/testing.go @@ -311,3 +311,22 @@ func (a apiClient) GetAllRulesGroupInFolder(t *testing.T, folder string) apimode require.NoError(t, json.Unmarshal(b, &result)) return result } + +func (a apiClient) SubmitRuleForBacktesting(t *testing.T, config apimodels.BacktestConfig) (int, string) { + t.Helper() + buf := bytes.Buffer{} + enc := json.NewEncoder(&buf) + err := enc.Encode(config) + require.NoError(t, err) + + u := fmt.Sprintf("%s/api/v1/rule/backtest", a.url) + // nolint:gosec + resp, err := http.Post(u, "application/json", &buf) + require.NoError(t, err) + defer func() { + _ = resp.Body.Close() + }() + b, err := io.ReadAll(resp.Body) + require.NoError(t, err) + return resp.StatusCode, string(b) +}