From f80463a8a926fa800e9ee52c02455280a8c04fb5 Mon Sep 17 00:00:00 2001 From: George Robinson Date: Mon, 5 Jun 2023 10:35:18 +0100 Subject: [PATCH] Alerting: Add heuristics back to datasource healthchecks (#69329) This commit adds heuristics back to datasource healthchecks as it was removed in #66198. The healthcheck for Prometheus datasources also returns the kind (Prometheus or Mimir) and a boolean if the ruler is enabled or disabled. --- pkg/tsdb/prometheus/healthcheck.go | 35 ++++++-- pkg/tsdb/prometheus/heuristics.go | 112 +++++++++++++++++++++++++ pkg/tsdb/prometheus/heuristics_test.go | 98 ++++++++++++++++++++++ 3 files changed, 236 insertions(+), 9 deletions(-) create mode 100644 pkg/tsdb/prometheus/heuristics.go create mode 100644 pkg/tsdb/prometheus/heuristics_test.go diff --git a/pkg/tsdb/prometheus/healthcheck.go b/pkg/tsdb/prometheus/healthcheck.go index a12aaec3daf..4d75df134c6 100644 --- a/pkg/tsdb/prometheus/healthcheck.go +++ b/pkg/tsdb/prometheus/healthcheck.go @@ -7,9 +7,9 @@ import ( "fmt" "time" - "github.com/grafana/grafana-plugin-sdk-go/backend" "github.com/grafana/kindsys" + "github.com/grafana/grafana-plugin-sdk-go/backend" "github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/tsdb/prometheus/kinds/dataquery" "github.com/grafana/grafana/pkg/tsdb/prometheus/models" @@ -28,14 +28,32 @@ func (s *Service) CheckHealth(ctx context.Context, req *backend.CheckHealthReque // check that the datasource exists if err != nil { - return getHealthCheckMessage(logger, "error getting datasource info", err) + return getHealthCheckMessage("error getting datasource info", err) } if ds == nil { - return getHealthCheckMessage(logger, "", errors.New("invalid datasource info received")) + return getHealthCheckMessage("", errors.New("invalid datasource info received")) } - return healthcheck(ctx, req, ds) + hc, err := healthcheck(ctx, req, ds) + if err != nil { + logger.Warn("error performing prometheus healthcheck", "err", err.Error()) + return nil, err + } + + heuristics, err := getHeuristics(ctx, ds) + if err != nil { + logger.Warn("failed to get prometheus heuristics", "err", err.Error()) + } else { + jsonDetails, err := json.Marshal(heuristics) + if err != nil { + logger.Warn("failed to marshal heuristics", "err", err) + } else { + hc.JSONDetails = jsonDetails + } + } + + return hc, nil } func healthcheck(ctx context.Context, req *backend.CheckHealthRequest, i *instance) (*backend.CheckHealthResult, error) { @@ -64,18 +82,18 @@ func healthcheck(ctx context.Context, req *backend.CheckHealthRequest, i *instan }) if err != nil { - return getHealthCheckMessage(logger, "There was an error returned querying the Prometheus API.", err) + return getHealthCheckMessage("There was an error returned querying the Prometheus API.", err) } if resp.Responses[refID].Error != nil { - return getHealthCheckMessage(logger, "There was an error returned querying the Prometheus API.", + return getHealthCheckMessage("There was an error returned querying the Prometheus API.", errors.New(resp.Responses[refID].Error.Error())) } - return getHealthCheckMessage(logger, "Successfully queried the Prometheus API.", nil) + return getHealthCheckMessage("Successfully queried the Prometheus API.", nil) } -func getHealthCheckMessage(logger log.Logger, message string, err error) (*backend.CheckHealthResult, error) { +func getHealthCheckMessage(message string, err error) (*backend.CheckHealthResult, error) { if err == nil { return &backend.CheckHealthResult{ Status: backend.HealthStatusOk, @@ -83,7 +101,6 @@ func getHealthCheckMessage(logger log.Logger, message string, err error) (*backe }, nil } - logger.Warn("error performing prometheus healthcheck", "err", err.Error()) errorMessage := fmt.Sprintf("%s - %s", err.Error(), message) return &backend.CheckHealthResult{ diff --git a/pkg/tsdb/prometheus/heuristics.go b/pkg/tsdb/prometheus/heuristics.go new file mode 100644 index 00000000000..97dfaac5539 --- /dev/null +++ b/pkg/tsdb/prometheus/heuristics.go @@ -0,0 +1,112 @@ +package prometheus + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "net/http" + + "github.com/grafana/grafana-plugin-sdk-go/backend" +) + +const ( + KindPrometheus = "Prometheus" + KindMimir = "Mimir" +) + +var ( + ErrNoBuildInfo = errors.New("no build info") +) + +type BuildInfoRequest struct { + PluginContext backend.PluginContext +} + +type BuildInfoResponse struct { + Status string `json:"status"` + Data BuildInfoResponseData `json:"data"` +} + +type BuildInfoResponseData struct { + Version string `json:"version"` + Revision string `json:"revision"` + Branch string `json:"branch"` + Features map[string]string `json:"features"` + BuildUser string `json:"buildUser"` + BuildDate string `json:"buildDate"` + GoVersion string `json:"goVersion"` +} + +func (s *Service) GetBuildInfo(ctx context.Context, req BuildInfoRequest) (*BuildInfoResponse, error) { + ds, err := s.getInstance(ctx, req.PluginContext) + if err != nil { + return nil, err + } + return getBuildInfo(ctx, ds) +} + +// getBuildInfo queries /api/v1/status/buildinfo +func getBuildInfo(ctx context.Context, i *instance) (*BuildInfoResponse, error) { + resp, err := i.resource.Execute(ctx, &backend.CallResourceRequest{ + Path: "api/v1/status/buildinfo", + }) + if err != nil { + return nil, err + } + if resp.Status == http.StatusNotFound { + return nil, ErrNoBuildInfo + } + if resp.Status != http.StatusOK { + return nil, fmt.Errorf("unexpected response %d", resp.Status) + } + res := BuildInfoResponse{} + if err := json.Unmarshal(resp.Body, &res); err != nil { + return nil, fmt.Errorf("failed to unmarshal JSON: %w", err) + } + return &res, nil +} + +type HeuristicsRequest struct { + PluginContext backend.PluginContext +} + +type Heuristics struct { + Application string `json:"application"` + Features Features `json:"features"` +} + +type Features struct { + RulerApiEnabled bool `json:"rulerApiEnabled"` +} + +func (s *Service) GetHeuristics(ctx context.Context, req HeuristicsRequest) (*Heuristics, error) { + ds, err := s.getInstance(ctx, req.PluginContext) + if err != nil { + return nil, err + } + return getHeuristics(ctx, ds) +} + +func getHeuristics(ctx context.Context, i *instance) (*Heuristics, error) { + heuristics := Heuristics{ + Application: "unknown", + Features: Features{ + RulerApiEnabled: false, + }, + } + buildInfo, err := getBuildInfo(ctx, i) + if err != nil { + logger.Warn("failed to get prometheus buildinfo", "err", err.Error()) + return nil, fmt.Errorf("failed to get buildinfo: %w", err) + } + if len(buildInfo.Data.Features) == 0 { + // If there are no features then this is a Prometheus datasource + heuristics.Application = KindPrometheus + heuristics.Features.RulerApiEnabled = false + } else { + heuristics.Application = KindMimir + heuristics.Features.RulerApiEnabled = true + } + return &heuristics, nil +} diff --git a/pkg/tsdb/prometheus/heuristics_test.go b/pkg/tsdb/prometheus/heuristics_test.go new file mode 100644 index 00000000000..10e7181f511 --- /dev/null +++ b/pkg/tsdb/prometheus/heuristics_test.go @@ -0,0 +1,98 @@ +package prometheus + +import ( + "context" + "io" + "net/http" + "strconv" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/grafana/grafana-plugin-sdk-go/backend/datasource" + sdkHttpClient "github.com/grafana/grafana-plugin-sdk-go/backend/httpclient" + "github.com/grafana/grafana/pkg/infra/httpclient" + "github.com/grafana/grafana/pkg/services/featuremgmt" + "github.com/grafana/grafana/pkg/setting" +) + +type heuristicsProvider struct { + httpclient.Provider + http.RoundTripper +} + +type heuristicsSuccessRoundTripper struct { + res io.ReadCloser + status int +} + +func (rt *heuristicsSuccessRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { + return &http.Response{ + Status: strconv.Itoa(rt.status), + StatusCode: rt.status, + Header: nil, + Body: rt.res, + ContentLength: 0, + Request: req, + }, nil +} + +func (provider *heuristicsProvider) New(opts ...sdkHttpClient.Options) (*http.Client, error) { + client := &http.Client{} + client.Transport = provider.RoundTripper + return client, nil +} + +func (provider *heuristicsProvider) GetTransport(opts ...sdkHttpClient.Options) (http.RoundTripper, error) { + return provider.RoundTripper, nil +} + +func getHeuristicsMockProvider(rt http.RoundTripper) *heuristicsProvider { + return &heuristicsProvider{ + RoundTripper: rt, + } +} + +func Test_GetHeuristics(t *testing.T) { + t.Run("should return Prometheus", func(t *testing.T) { + rt := heuristicsSuccessRoundTripper{ + res: io.NopCloser(strings.NewReader("{\"status\":\"success\",\"data\":{\"version\":\"1.0\"}}")), + status: http.StatusOK, + } + httpProvider := getHeuristicsMockProvider(&rt) + s := &Service{ + im: datasource.NewInstanceManager(newInstanceSettings(httpProvider, &setting.Cfg{}, &featuremgmt.FeatureManager{}, nil)), + } + + req := HeuristicsRequest{ + PluginContext: getPluginContext(), + } + res, err := s.GetHeuristics(context.Background(), req) + assert.NoError(t, err) + require.NotNil(t, res) + assert.Equal(t, KindPrometheus, res.Application) + assert.Equal(t, Features{RulerApiEnabled: false}, res.Features) + }) + + t.Run("should return Mimir", func(t *testing.T) { + rt := heuristicsSuccessRoundTripper{ + res: io.NopCloser(strings.NewReader("{\"status\":\"success\",\"data\":{\"features\":{\"foo\":\"bar\"},\"version\":\"1.0\"}}")), + status: http.StatusOK, + } + httpProvider := getHeuristicsMockProvider(&rt) + s := &Service{ + im: datasource.NewInstanceManager(newInstanceSettings(httpProvider, &setting.Cfg{}, &featuremgmt.FeatureManager{}, nil)), + } + + req := HeuristicsRequest{ + PluginContext: getPluginContext(), + } + res, err := s.GetHeuristics(context.Background(), req) + assert.NoError(t, err) + require.NotNil(t, res) + assert.Equal(t, KindMimir, res.Application) + assert.Equal(t, Features{RulerApiEnabled: true}, res.Features) + }) +}