From 4acbcd7053d0549aaa53f982199dc31490478a94 Mon Sep 17 00:00:00 2001 From: Sofia Papagiannaki Date: Mon, 12 Oct 2020 21:51:39 +0300 Subject: [PATCH] AlertingNG: POC of evaluator under feature flag. (#27922) * New feature toggle for enabling alerting NG * Initial commit * Modify evaluate alert API request * Check for unique labels in alert execution result dataframes * Remove print statement * Additional minor fixes/comments * Fix lint issues * Add API endpoint for evaluating panel queries * Push missing renaming * add refId for condition to API * add refId for condition to API * switch dashboard based eval to get method * add from/to params to dashboard based eval * add from/to params to eval endpoint Co-authored-by: kyle --- docs/sources/administration/configuration.md | 2 +- packages/grafana-data/src/types/config.ts | 1 + packages/grafana-runtime/src/config.ts | 1 + pkg/api/api.go | 7 + pkg/api/dtos/ngalert.go | 12 + pkg/api/http_server.go | 2 + pkg/api/ngalert.go | 101 +++++++ pkg/models/datasource.go | 42 +-- pkg/services/ngalert/eval.go | 284 +++++++++++++++++++ pkg/setting/setting.go | 5 + pkg/tsdb/models.go | 12 +- 11 files changed, 441 insertions(+), 28 deletions(-) create mode 100644 pkg/api/dtos/ngalert.go create mode 100644 pkg/api/ngalert.go create mode 100644 pkg/services/ngalert/eval.go diff --git a/docs/sources/administration/configuration.md b/docs/sources/administration/configuration.md index 04643e76f9e..815a62ff026 100644 --- a/docs/sources/administration/configuration.md +++ b/docs/sources/administration/configuration.md @@ -1417,7 +1417,7 @@ For more information about Grafana Enterprise, refer to [Grafana Enterprise]({{< ### enable -Keys of alpha features to enable, separated by space. Available alpha features are: `transformations` +Keys of alpha features to enable, separated by space. Available alpha features are: `transformations`,`ngalert` ## [date_formats] diff --git a/packages/grafana-data/src/types/config.ts b/packages/grafana-data/src/types/config.ts index 215f364f59f..a283f702386 100644 --- a/packages/grafana-data/src/types/config.ts +++ b/packages/grafana-data/src/types/config.ts @@ -34,6 +34,7 @@ export interface BuildInfo { export interface FeatureToggles { live: boolean; expressions: boolean; + ngalert: boolean; /** * @remarks diff --git a/packages/grafana-runtime/src/config.ts b/packages/grafana-runtime/src/config.ts index 0636af0219a..3f5660ff8b3 100644 --- a/packages/grafana-runtime/src/config.ts +++ b/packages/grafana-runtime/src/config.ts @@ -57,6 +57,7 @@ export class GrafanaBootConfig implements GrafanaConfig { meta: false, datasourceInsights: false, reportGrid: false, + ngalert: false, }; licenseInfo: LicenseInfo = {} as LicenseInfo; rendererAvailable = false; diff --git a/pkg/api/api.go b/pkg/api/api.go index e312a59dac2..1cfce02cb92 100644 --- a/pkg/api/api.go +++ b/pkg/api/api.go @@ -351,6 +351,13 @@ func (hs *HTTPServer) registerRoutes() { alertsRoute.Get("/states-for-dashboard", Wrap(GetAlertStatesForDashboard)) }) + if hs.Cfg.IsNgAlertEnabled() { + apiRoute.Group("/alert-definitions", func(alertDefinitions routing.RouteRegister) { + alertDefinitions.Get("/eval/:dashboardID/:panelID/:refID", reqEditorRole, Wrap(hs.AlertDefinitionEval)) + alertDefinitions.Post("/eval", reqEditorRole, bind(dtos.EvalAlertConditionsCommand{}), Wrap(hs.ConditionsEval)) + }) + } + apiRoute.Get("/alert-notifiers", reqEditorRole, Wrap(GetAlertNotifiers)) apiRoute.Group("/alert-notifications", func(alertNotifications routing.RouteRegister) { diff --git a/pkg/api/dtos/ngalert.go b/pkg/api/dtos/ngalert.go new file mode 100644 index 00000000000..4fa5712e96d --- /dev/null +++ b/pkg/api/dtos/ngalert.go @@ -0,0 +1,12 @@ +package dtos + +import ( + "time" + + eval "github.com/grafana/grafana/pkg/services/ngalert" +) + +type EvalAlertConditionsCommand struct { + Conditions eval.Conditions `json:"conditions"` + Now time.Time `json:"now"` +} diff --git a/pkg/api/http_server.go b/pkg/api/http_server.go index 1ae0aeee82b..385d0df697a 100644 --- a/pkg/api/http_server.go +++ b/pkg/api/http_server.go @@ -29,6 +29,7 @@ import ( "github.com/grafana/grafana/pkg/services/datasources" "github.com/grafana/grafana/pkg/services/hooks" "github.com/grafana/grafana/pkg/services/login" + eval "github.com/grafana/grafana/pkg/services/ngalert" "github.com/grafana/grafana/pkg/services/provisioning" "github.com/grafana/grafana/pkg/services/quota" "github.com/grafana/grafana/pkg/services/rendering" @@ -70,6 +71,7 @@ type HTTPServer struct { BackendPluginManager backendplugin.Manager `inject:""` PluginManager *plugins.PluginManager `inject:""` SearchService *search.SearchService `inject:""` + AlertNG *eval.AlertNG `inject:""` Live *live.GrafanaLive Listener net.Listener } diff --git a/pkg/api/ngalert.go b/pkg/api/ngalert.go new file mode 100644 index 00000000000..b9ab6f38cca --- /dev/null +++ b/pkg/api/ngalert.go @@ -0,0 +1,101 @@ +package api + +import ( + "context" + + "github.com/grafana/grafana-plugin-sdk-go/data" + "github.com/grafana/grafana/pkg/api/dtos" + "github.com/grafana/grafana/pkg/models" + eval "github.com/grafana/grafana/pkg/services/ngalert" + "github.com/grafana/grafana/pkg/setting" + "github.com/grafana/grafana/pkg/tsdb" + "github.com/grafana/grafana/pkg/util" +) + +// POST /api/alert-definitions/eval +func (hs *HTTPServer) ConditionsEval(c *models.ReqContext, dto dtos.EvalAlertConditionsCommand) Response { + alertCtx, cancelFn := context.WithTimeout(context.Background(), setting.AlertingEvaluationTimeout) + defer cancelFn() + + alertExecCtx := eval.AlertExecCtx{Ctx: alertCtx, SignedInUser: c.SignedInUser} + + fromStr := c.Query("from") + if fromStr == "" { + fromStr = "now-3h" + } + + toStr := c.Query("to") + if toStr == "" { + toStr = "now" + } + + execResult, err := dto.Conditions.Execute(alertExecCtx, fromStr, toStr) + if err != nil { + return Error(400, "Failed to execute conditions", err) + } + + evalResults, err := eval.EvaluateExecutionResult(execResult) + if err != nil { + return Error(400, "Failed to evaluate results", err) + } + + frame := evalResults.AsDataFrame() + df := tsdb.NewDecodedDataFrames([]*data.Frame{&frame}) + instances, err := df.Encoded() + if err != nil { + return Error(400, "Failed to encode result dataframes", err) + } + + return JSON(200, util.DynMap{ + "instances": instances, + }) +} + +// GET /api/alert-definitions/eval/:dashboardId/:panelId/:refId" +func (hs *HTTPServer) AlertDefinitionEval(c *models.ReqContext) Response { + dashboardID := c.ParamsInt64(":dashboardID") + panelID := c.ParamsInt64(":panelID") + conditionRefID := c.Params(":refID") + + fromStr := c.Query("from") + if fromStr == "" { + fromStr = "now-3h" + } + + toStr := c.Query("to") + if toStr == "" { + toStr = "now" + } + + conditions, err := hs.AlertNG.LoadAlertConditions(dashboardID, panelID, conditionRefID, c.SignedInUser, c.SkipCache) + if err != nil { + return Error(400, "Failed to load conditions", err) + } + + alertCtx, cancelFn := context.WithTimeout(context.Background(), setting.AlertingEvaluationTimeout) + defer cancelFn() + + alertExecCtx := eval.AlertExecCtx{Ctx: alertCtx, SignedInUser: c.SignedInUser} + + execResult, err := conditions.Execute(alertExecCtx, fromStr, toStr) + if err != nil { + return Error(400, "Failed to execute conditions", err) + } + + evalResults, err := eval.EvaluateExecutionResult(execResult) + if err != nil { + return Error(400, "Failed to evaluate results", err) + } + + frame := evalResults.AsDataFrame() + + df := tsdb.NewDecodedDataFrames([]*data.Frame{&frame}) + instances, err := df.Encoded() + if err != nil { + return Error(400, "Failed to encode result dataframes", err) + } + + return JSON(200, util.DynMap{ + "instances": instances, + }) +} diff --git a/pkg/models/datasource.go b/pkg/models/datasource.go index 1e0a2190bba..4af1ece71de 100644 --- a/pkg/models/datasource.go +++ b/pkg/models/datasource.go @@ -42,29 +42,29 @@ var ( type DsAccess string type DataSource struct { - Id int64 - OrgId int64 - Version int + Id int64 `json:"id"` + OrgId int64 `json:"orgId"` + Version int `json:"version"` - Name string - Type string - Access DsAccess - Url string - Password string - User string - Database string - BasicAuth bool - BasicAuthUser string - BasicAuthPassword string - WithCredentials bool - IsDefault bool - JsonData *simplejson.Json - SecureJsonData securejsondata.SecureJsonData - ReadOnly bool - Uid string + Name string `json:"name"` + Type string `json:"type"` + Access DsAccess `json:"access"` + Url string `json:"url"` + Password string `json:"password"` + User string `json:"user"` + Database string `json:"database"` + BasicAuth bool `json:"basicAuth"` + BasicAuthUser string `json:"basicAuthUser"` + BasicAuthPassword string `json:"basicAuthPassword"` + WithCredentials bool `json:"withCredentials"` + IsDefault bool `json:"isDefault"` + JsonData *simplejson.Json `json:"jsonData"` + SecureJsonData securejsondata.SecureJsonData `json:"secureJsonData"` + ReadOnly bool `json:"readOnly"` + Uid string `json:"uid"` - Created time.Time - Updated time.Time + Created time.Time `json:"created"` + Updated time.Time `json:"updated"` } // DecryptedBasicAuthPassword returns data source basic auth password in plain text. It uses either deprecated diff --git a/pkg/services/ngalert/eval.go b/pkg/services/ngalert/eval.go new file mode 100644 index 00000000000..b654381e7f5 --- /dev/null +++ b/pkg/services/ngalert/eval.go @@ -0,0 +1,284 @@ +package eval + +import ( + "context" + "encoding/json" + "errors" + "fmt" + + "github.com/grafana/grafana-plugin-sdk-go/data" + "github.com/grafana/grafana/pkg/bus" + "github.com/grafana/grafana/pkg/components/simplejson" + "github.com/grafana/grafana/pkg/models" + "github.com/grafana/grafana/pkg/plugins" + "github.com/grafana/grafana/pkg/registry" + "github.com/grafana/grafana/pkg/services/datasources" + "github.com/grafana/grafana/pkg/tsdb" +) + +type minimalDashboard struct { + Panels []struct { + ID int64 `json:"id"` + Datasource string `json:"datasource"` + Targets []*simplejson.Json `json:"targets"` + } `json:"panels"` +} + +type AlertNG struct { + DatasourceCache datasources.CacheService `inject:""` +} + +func init() { + registry.RegisterService(&AlertNG{}) +} + +// Init initializes the AlertingService. +func (e *AlertNG) Init() error { + return nil +} + +type AlertExecCtx struct { + AlertDefitionID int64 + SignedInUser *models.SignedInUser + + Ctx context.Context +} + +// At least Warn or Crit condition must be non-empty +type Conditions struct { + Condition string `json:"condition"` + + QueriesAndExpressions []tsdb.Query `json:"queriesAndExpressions"` +} + +type ExecutionResult struct { + AlertDefinitionId int64 + + Error error + + Results data.Frames +} + +type EvalResults []EvalResult + +type EvalResult struct { + Instance data.Labels + State State // Enum +} + +type State int + +const ( + Normal State = iota + Warning + Critical + Error +) + +func (s State) String() string { + return [...]string{"Normal", "Warning", "Critical", "Error"}[s] +} + +// IsValid checks the conditions validity +func (c Conditions) IsValid() bool { + /* + if c.WarnCondition == "" && c.CritCondition == "" { + return false + } + */ + + // TODO search for refIDs in QueriesAndExpressions + return len(c.QueriesAndExpressions) != 0 +} + +// LoadAlertConditions returns a Conditions object for the given alertDefintionId. +func (ng *AlertNG) LoadAlertConditions(dashboardID int64, panelID int64, conditionRefID string, signedInUser *models.SignedInUser, skipCache bool) (*Conditions, error) { + //func (ng *AlertNG) LoadAlertConditions(alertDefinitionID int64, signedInUser *models.SignedInUser, skipCache bool) (*Conditions, error) { + /* + getAlertByIDQuery := models.GetAlertByIdQuery{Id: alertDefinitionID} + if err := bus.Dispatch(&getAlertByIDQuery); err != nil { + return nil, err + } + dashboardID := getAlertByIDQuery.Result.DashboardId + panelID := getAlertByIDQuery.Result.PanelId + */ + + // get queries from the dashboard (because GEL expressions cannot be stored in alerts so far) + getDashboardQuery := models.GetDashboardQuery{Id: dashboardID} + if err := bus.Dispatch(&getDashboardQuery); err != nil { + return nil, err + } + + blob, err := getDashboardQuery.Result.Data.MarshalJSON() + if err != nil { + return nil, errors.New("Failed to marshal dashboard JSON") + } + var dash minimalDashboard + err = json.Unmarshal(blob, &dash) + if err != nil { + return nil, errors.New("Failed to unmarshal dashboard JSON") + } + + conditions := Conditions{} + for _, p := range dash.Panels { + if p.ID == panelID { + panelDatasource := p.Datasource + var ds *models.DataSource + for i, query := range p.Targets { + refID := query.Get("refId").MustString("A") + queryDatasource := query.Get("datasource").MustString() + + if i == 0 && queryDatasource != "__expr__" { + dsName := panelDatasource + if queryDatasource != "" { + dsName = queryDatasource + } + + getDataSourceByNameQuery := models.GetDataSourceByNameQuery{Name: dsName, OrgId: getDashboardQuery.Result.OrgId} + if err := bus.Dispatch(&getDataSourceByNameQuery); err != nil { + return nil, err + } + + ds, err = ng.DatasourceCache.GetDatasource(getDataSourceByNameQuery.Result.Id, signedInUser, skipCache) + if err != nil { + return nil, err + } + } + + if ds == nil { + return nil, errors.New("No datasource reference found") + } + + if queryDatasource == "" { + query.Set("datasource", ds.Name) + } + + if query.Get("datasourceId").MustString() == "" { + query.Set("datasourceId", ds.Id) + } + + if query.Get("orgId").MustString() == "" { // GEL requires orgID inside the query JSON + // need to decide which organisation id is expected there + // in grafana queries is passed the signed in user organisation id: + // https://github.com/grafana/grafana/blob/34a355fe542b511ed02976523aa6716aeb00bde6/packages/grafana-runtime/src/utils/DataSourceWithBackend.ts#L60 + // but I think that it should be datasource org id instead + query.Set("orgId", 0) + } + + if query.Get("maxDataPoints").MustString() == "" { // GEL requires maxDataPoints inside the query JSON + query.Set("maxDataPoints", 100) + } + + // intervalMS is calculated by the frontend + // should we do something similar? + if query.Get("intervalMs").MustString() == "" { // GEL requires intervalMs inside the query JSON + query.Set("intervalMs", 1000) + } + + conditions.QueriesAndExpressions = append(conditions.QueriesAndExpressions, tsdb.Query{ + RefId: refID, + MaxDataPoints: query.Get("maxDataPoints").MustInt64(100), + IntervalMs: query.Get("intervalMs").MustInt64(1000), + QueryType: query.Get("queryType").MustString(""), + Model: query, + DataSource: ds, + }) + } + } + } + conditions.Condition = conditionRefID + return &conditions, nil +} + +// Execute runs the WarnCondition and CritCondtion expressions or queries. +func (conditions *Conditions) Execute(ctx AlertExecCtx, fromStr, toStr string) (*ExecutionResult, error) { + result := ExecutionResult{} + if !conditions.IsValid() { + return nil, fmt.Errorf("Invalid conditions") + } + + request := &tsdb.TsdbQuery{ + TimeRange: tsdb.NewTimeRange(fromStr, toStr), + Debug: true, + User: ctx.SignedInUser, + } + for i := range conditions.QueriesAndExpressions { + request.Queries = append(request.Queries, &conditions.QueriesAndExpressions[i]) + } + + resp, err := plugins.Transform.Transform(ctx.Ctx, request) + if err != nil { + result.Error = err + return &result, err + } + + conditionResult := resp.Results[conditions.Condition] + if conditionResult == nil { + err = fmt.Errorf("No GEL results") + result.Error = err + return &result, err + } + + result.Results, err = conditionResult.Dataframes.Decoded() + if err != nil { + result.Error = err + return &result, err + } + + return &result, nil +} + +// EvaluateExecutionResult takes the ExecutionResult, and returns a frame where +// each column is a string type that holds a string representing its state. +func EvaluateExecutionResult(results *ExecutionResult) (EvalResults, error) { + evalResults := make([]EvalResult, 0) + labels := make(map[string]bool) + for _, f := range results.Results { + rowLen, err := f.RowLen() + if err != nil { + return nil, fmt.Errorf("Unable to get frame row length") + } + if rowLen > 1 { + return nil, fmt.Errorf("Invalid frame %v: row length %v", f.Name, rowLen) + } + + if len(f.Fields) > 1 { + return nil, fmt.Errorf("Invalid frame %v: field length %v", f.Name, len(f.Fields)) + } + + if f.Fields[0].Type() != data.FieldTypeNullableFloat64 { + return nil, fmt.Errorf("Invalid frame %v: field type %v", f.Name, f.Fields[0].Type()) + } + + labelsStr := f.Fields[0].Labels.String() + _, ok := labels[labelsStr] + if ok { + return nil, fmt.Errorf("Invalid frame %v: frames cannot uniquely be identified by its labels: %q", f.Name, labelsStr) + } + labels[labelsStr] = true + + state := Normal + val, err := f.Fields[0].FloatAt(0) + if err != nil || val != 0 { + state = Critical + } + + evalResults = append(evalResults, EvalResult{ + Instance: f.Fields[0].Labels, + State: state, + }) + } + return evalResults, nil +} + +// AsDataFrame forms the EvalResults in Frame suitable for displaying in the table panel of the front end. +// This may be temporary, as there might be a fair amount we want to display in the frontend, and it might not make sense to store that in data.Frame. +// For the first pass, I would expect a Frame with a single row, and a column for each instance with a boolean value. +func (evalResults EvalResults) AsDataFrame() data.Frame { + fields := make([]*data.Field, 0) + for _, evalResult := range evalResults { + fields = append(fields, data.NewField("", evalResult.Instance, []bool{evalResult.State != Normal})) + } + f := data.NewFrame("", fields...) + return *f +} diff --git a/pkg/setting/setting.go b/pkg/setting/setting.go index fcc6a2c625b..488a21db291 100644 --- a/pkg/setting/setting.go +++ b/pkg/setting/setting.go @@ -327,6 +327,11 @@ func (c Cfg) IsLiveEnabled() bool { return c.FeatureToggles["live"] } +// IsNgAlertEnabled returns whether the standalone alerts feature is enabled. +func (c Cfg) IsNgAlertEnabled() bool { + return c.FeatureToggles["ngalert"] +} + type CommandLineArgs struct { Config string HomePath string diff --git a/pkg/tsdb/models.go b/pkg/tsdb/models.go index 78c3cb7718c..4d4fb50aeb4 100644 --- a/pkg/tsdb/models.go +++ b/pkg/tsdb/models.go @@ -19,12 +19,12 @@ type TsdbQuery struct { } type Query struct { - RefId string - Model *simplejson.Json - DataSource *models.DataSource - MaxDataPoints int64 - IntervalMs int64 - QueryType string + RefId string `json:"refID"` + Model *simplejson.Json `json:"model,omitempty"` + DataSource *models.DataSource `json:"datasource"` + MaxDataPoints int64 `json:"maxDataPoints"` + IntervalMs int64 `json:"intervalMs"` + QueryType string `json:"queryType"` } type Response struct {