From bb7acf9e47e82e5a78d6addfc6ce78b478078199 Mon Sep 17 00:00:00 2001 From: Ludovic Viaud Date: Wed, 7 Jun 2023 13:36:03 +0200 Subject: [PATCH] Add prometheus querydata instrumentation (#69447) --- .../instrumentation/instrumentation.go | 87 +++++++++++++ .../instrumentation/instrumentation_test.go | 117 ++++++++++++++++++ pkg/tsdb/prometheus/prometheus.go | 11 +- 3 files changed, 213 insertions(+), 2 deletions(-) create mode 100644 pkg/tsdb/prometheus/instrumentation/instrumentation.go create mode 100644 pkg/tsdb/prometheus/instrumentation/instrumentation_test.go diff --git a/pkg/tsdb/prometheus/instrumentation/instrumentation.go b/pkg/tsdb/prometheus/instrumentation/instrumentation.go new file mode 100644 index 00000000000..1cab64aef18 --- /dev/null +++ b/pkg/tsdb/prometheus/instrumentation/instrumentation.go @@ -0,0 +1,87 @@ +package instrumentation + +import ( + "github.com/grafana/grafana-plugin-sdk-go/backend" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var ( + pluginRequestCounter = promauto.NewCounterVec(prometheus.CounterOpts{ + Namespace: "grafana", + Name: "prometheus_plugin_backend_request_count", + Help: "The total amount of prometheus backend plugin requests", + }, []string{"endpoint", "status", "errorSource"}) +) + +const ( + StatusOK = "ok" + StatusError = "error" + + EndpointCallResource = "callResource" + EndpointQueryData = "queryData" + + PluginSource = "plugin" + ExternalSource = "external" + DatabaseSource = "database" + NoneSource = "none" +) + +func UpdateQueryDataMetrics(err error, resp *backend.QueryDataResponse) { + status := StatusOK + if err != nil { + status = StatusError + } + + errorSource := getErrorSource(err, resp) + + pluginRequestCounter.WithLabelValues(EndpointQueryData, status, errorSource).Inc() +} + +func getErrorSource(err error, resp *backend.QueryDataResponse) string { + if err != nil { + return PluginSource + } + + // If there is different errorSource from the list of responses, we want to return the most severe one. + // The priority order is: pluginSource > databaseSource > externalSource > noneSource + var errorSource = NoneSource + for _, res := range resp.Responses { + responseErrorSource := getErrorSourceForResponse(res) + + if responseErrorSource == PluginSource { + return PluginSource + } + + if responseErrorSource == DatabaseSource { + errorSource = DatabaseSource + } + + if responseErrorSource == ExternalSource && errorSource == NoneSource { + errorSource = ExternalSource + } + } + + return errorSource +} + +func getErrorSourceForResponse(res backend.DataResponse) string { + if res.Error != nil { + return PluginSource + } + + if res.Status >= 500 { + return DatabaseSource + } + + if res.Status >= 400 { + // Those error codes are related to authentication and authorization. + if res.Status == 401 || res.Status == 402 || res.Status == 403 || res.Status == 407 { + return ExternalSource + } + + return PluginSource + } + + return NoneSource +} diff --git a/pkg/tsdb/prometheus/instrumentation/instrumentation_test.go b/pkg/tsdb/prometheus/instrumentation/instrumentation_test.go new file mode 100644 index 00000000000..7298f39908d --- /dev/null +++ b/pkg/tsdb/prometheus/instrumentation/instrumentation_test.go @@ -0,0 +1,117 @@ +package instrumentation + +import ( + "fmt" + "testing" + + "github.com/grafana/grafana-plugin-sdk-go/backend" +) + +func checkErrorSource(t *testing.T, expected, actual string) { + t.Helper() + if expected != actual { + t.Errorf("expected errorSource to be %v, but got %v", expected, actual) + } +} + +func TestGetErrorSourceForResponse(t *testing.T) { + t.Run("A response that return an error should return pluginSource", func(t *testing.T) { + errorSource := getErrorSourceForResponse(backend.DataResponse{Error: fmt.Errorf("error")}) + checkErrorSource(t, PluginSource, errorSource) + }) + + t.Run("A response with an http satus code > 500 should return databaseSource", func(t *testing.T) { + errorSource := getErrorSourceForResponse(backend.DataResponse{Error: nil, Status: 500}) + checkErrorSource(t, DatabaseSource, errorSource) + + errorSource = getErrorSourceForResponse(backend.DataResponse{Error: nil, Status: 503}) + checkErrorSource(t, DatabaseSource, errorSource) + + errorSource = getErrorSourceForResponse(backend.DataResponse{Error: nil, Status: 507}) + checkErrorSource(t, DatabaseSource, errorSource) + }) + + t.Run("A response with an http satus related to auth (401, 402, 403, 407), should return externalSource", func(t *testing.T) { + errorSource := getErrorSourceForResponse(backend.DataResponse{Error: nil, Status: 401}) + checkErrorSource(t, ExternalSource, errorSource) + + errorSource = getErrorSourceForResponse(backend.DataResponse{Error: nil, Status: 402}) + checkErrorSource(t, ExternalSource, errorSource) + + errorSource = getErrorSourceForResponse(backend.DataResponse{Error: nil, Status: 403}) + checkErrorSource(t, ExternalSource, errorSource) + + errorSource = getErrorSourceForResponse(backend.DataResponse{Error: nil, Status: 407}) + checkErrorSource(t, ExternalSource, errorSource) + }) + + t.Run("A response with an http satus of 4xx but not related to auth (401, 402, 403, 407), should return pluginSource", func(t *testing.T) { + errorSource := getErrorSourceForResponse(backend.DataResponse{Error: nil, Status: 400}) + checkErrorSource(t, PluginSource, errorSource) + + errorSource = getErrorSourceForResponse(backend.DataResponse{Error: nil, Status: 404}) + checkErrorSource(t, PluginSource, errorSource) + + errorSource = getErrorSourceForResponse(backend.DataResponse{Error: nil, Status: 405}) + checkErrorSource(t, PluginSource, errorSource) + }) + + t.Run("A response without error and with an http status of 2xx, should return noneSource", func(t *testing.T) { + errorSource := getErrorSourceForResponse(backend.DataResponse{Error: nil, Status: 200}) + checkErrorSource(t, NoneSource, errorSource) + + errorSource = getErrorSourceForResponse(backend.DataResponse{Error: nil, Status: 201}) + checkErrorSource(t, NoneSource, errorSource) + }) +} + +func TestGetErrorSource(t *testing.T) { + t.Run("If status of backend.QueryDataResponse is statusError, then errorSource is pluginSource ", func(t *testing.T) { + errorSource := getErrorSource(fmt.Errorf("a random error"), nil) + checkErrorSource(t, PluginSource, errorSource) + }) + + t.Run("If status of backend.QueryDataResponse is statusOK, then errorSource is the most severe response's errorSource: pluginSource > databaseSource > externalSource > noneSource", func(t *testing.T) { + errorSource := getErrorSource(nil, &backend.QueryDataResponse{ + Responses: map[string]backend.DataResponse{ + "A": {Error: fmt.Errorf("error")}, + "B": {Error: nil, Status: 200}, + }, + }) + checkErrorSource(t, PluginSource, errorSource) + + errorSource = getErrorSource(nil, &backend.QueryDataResponse{ + Responses: map[string]backend.DataResponse{ + "A": {Error: nil, Status: 400}, + "B": {Error: nil, Status: 500}, + "C": {Error: nil, Status: 401}, + "D": {Error: nil, Status: 200}, + }, + }) + checkErrorSource(t, PluginSource, errorSource) + + errorSource = getErrorSource(nil, &backend.QueryDataResponse{ + Responses: map[string]backend.DataResponse{ + "B": {Error: nil, Status: 500}, + "C": {Error: nil, Status: 401}, + "D": {Error: nil, Status: 200}, + }, + }) + checkErrorSource(t, DatabaseSource, errorSource) + + errorSource = getErrorSource(nil, &backend.QueryDataResponse{ + Responses: map[string]backend.DataResponse{ + "C": {Error: nil, Status: 401}, + "D": {Error: nil, Status: 200}, + }, + }) + checkErrorSource(t, ExternalSource, errorSource) + + errorSource = getErrorSource(nil, &backend.QueryDataResponse{ + Responses: map[string]backend.DataResponse{ + "D": {Error: nil, Status: 200}, + }, + }) + checkErrorSource(t, NoneSource, errorSource) + }) +} diff --git a/pkg/tsdb/prometheus/prometheus.go b/pkg/tsdb/prometheus/prometheus.go index ee44cfef844..3ace8cb5e73 100644 --- a/pkg/tsdb/prometheus/prometheus.go +++ b/pkg/tsdb/prometheus/prometheus.go @@ -19,6 +19,7 @@ import ( "github.com/grafana/grafana/pkg/services/featuremgmt" "github.com/grafana/grafana/pkg/setting" "github.com/grafana/grafana/pkg/tsdb/prometheus/client" + "github.com/grafana/grafana/pkg/tsdb/prometheus/instrumentation" "github.com/grafana/grafana/pkg/tsdb/prometheus/querydata" "github.com/grafana/grafana/pkg/tsdb/prometheus/resource" ) @@ -78,15 +79,21 @@ func newInstanceSettings(httpClientProvider httpclient.Provider, cfg *setting.Cf func (s *Service) QueryData(ctx context.Context, req *backend.QueryDataRequest) (*backend.QueryDataResponse, error) { if len(req.Queries) == 0 { - return &backend.QueryDataResponse{}, fmt.Errorf("query contains no queries") + err := fmt.Errorf("query contains no queries") + instrumentation.UpdateQueryDataMetrics(err, nil) + return &backend.QueryDataResponse{}, err } i, err := s.getInstance(ctx, req.PluginContext) if err != nil { + instrumentation.UpdateQueryDataMetrics(err, nil) return nil, err } - return i.queryData.Execute(ctx, req) + qd, err := i.queryData.Execute(ctx, req) + instrumentation.UpdateQueryDataMetrics(err, qd) + + return qd, err } func (s *Service) CallResource(ctx context.Context, req *backend.CallResourceRequest, sender backend.CallResourceResponseSender) error {