Loki: Add backend healthcheck (#74330)

* add loki healthcheck

* remove `testDatasource` call

* remove unused error check

* change success message

* improve error messages
This commit is contained in:
Sven Grossmann
2023-09-05 08:59:13 +02:00
committed by GitHub
parent ed46e3444c
commit a403027608
7 changed files with 253 additions and 93 deletions

View File

@@ -4,8 +4,7 @@ const dataSourceName = 'LokiEditor';
const addDataSource = () => { const addDataSource = () => {
e2e.flows.addDataSource({ e2e.flows.addDataSource({
type: 'Loki', type: 'Loki',
expectedAlertMessage: expectedAlertMessage: 'Unable to connect with Loki. Please check the server logs for more details.',
'Unable to connect with Loki (Failed to call resource). Please check the server logs for more details.',
name: dataSourceName, name: dataSourceName,
form: () => { form: () => {
e2e().get('#connection-url').type('http://loki-url:3100'); e2e().get('#connection-url').type('http://loki-url:3100');

View File

@@ -5,8 +5,7 @@ const dataSourceName = 'LokiBuilder';
const addDataSource = () => { const addDataSource = () => {
e2e.flows.addDataSource({ e2e.flows.addDataSource({
type: 'Loki', type: 'Loki',
expectedAlertMessage: expectedAlertMessage: 'Unable to connect with Loki. Please check the server logs for more details.',
'Unable to connect with Loki (Failed to call resource). Please check the server logs for more details.',
name: dataSourceName, name: dataSourceName,
form: () => { form: () => {
e2e().get('#connection-url').type('http://loki-url:3100'); e2e().get('#connection-url').type('http://loki-url:3100');

View File

@@ -0,0 +1,104 @@
package loki
import (
"context"
"encoding/json"
"errors"
"fmt"
"time"
"github.com/grafana/grafana-plugin-sdk-go/backend"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/tsdb/loki/kinds/dataquery"
)
const (
refID = "__healthcheck__"
)
func (s *Service) CheckHealth(ctx context.Context, req *backend.CheckHealthRequest) (*backend.CheckHealthResult,
error) {
logger := s.logger.New("endpoint", "CheckHealth")
ds, err := s.im.Get(ctx, req.PluginContext)
// check that the datasource exists
if err != nil {
return getHealthCheckMessage(fmt.Errorf("failed to get datasource information: %w", err), logger), err
}
if ds == nil {
return getHealthCheckMessage(errors.New("invalid datasource info received"), logger), err
}
hc := healthcheck(ctx, req, s, logger)
return hc, nil
}
func healthcheck(ctx context.Context, req *backend.CheckHealthRequest, s *Service, logger *log.ConcreteLogger) *backend.CheckHealthResult {
step := "1s"
qt := "instant"
qm := dataquery.LokiDataQuery{
Expr: "vector(1)+vector(1)",
Step: &step,
QueryType: &qt,
}
b, _ := json.Marshal(&qm)
query := backend.DataQuery{
RefID: refID,
TimeRange: backend.TimeRange{
From: time.Unix(1, 0).UTC(),
To: time.Unix(4, 0).UTC(),
},
JSON: b,
}
resp, err := s.QueryData(ctx, &backend.QueryDataRequest{
PluginContext: req.PluginContext,
Queries: []backend.DataQuery{query},
})
if err != nil {
return getHealthCheckMessage(fmt.Errorf("error received while querying loki: %w", err), logger)
}
if resp.Responses[refID].Error != nil {
return getHealthCheckMessage(fmt.Errorf("error from loki: %w", resp.Responses[refID].Error), logger)
}
frameLen := len(resp.Responses[refID].Frames)
if frameLen != 1 {
return getHealthCheckMessage(fmt.Errorf("invalid dataframe length, expected %d got %d", 1, frameLen), logger)
}
fieldLen := len(resp.Responses[refID].Frames[0].Fields)
if fieldLen != 2 {
return getHealthCheckMessage(fmt.Errorf("invalid dataframe field length, expected %d got %d", 2, fieldLen), logger)
}
fieldValueLen := resp.Responses[refID].Frames[0].Fields[0].Len()
if fieldValueLen != 1 {
return getHealthCheckMessage(fmt.Errorf("invalid dataframe field value length, expected %d got %d", 1, fieldLen), logger)
}
rspValue := resp.Responses[refID].Frames[0].Fields[1].At(0).(float64)
if rspValue != 2 {
return getHealthCheckMessage(fmt.Errorf("invalid response value, expected %d got %f", 2, rspValue), logger)
}
return getHealthCheckMessage(nil, logger)
}
func getHealthCheckMessage(err error, logger *log.ConcreteLogger) *backend.CheckHealthResult {
if err == nil {
return &backend.CheckHealthResult{
Status: backend.HealthStatusOk,
Message: "Data source successfully connected.",
}
}
logger.Error("Loki health check failed", "error", err)
return &backend.CheckHealthResult{
Status: backend.HealthStatusError,
Message: "Unable to connect with Loki. Please check the server logs for more details.",
}
}

View File

@@ -0,0 +1,145 @@
package loki
import (
"context"
"io"
"net/http"
"strings"
"testing"
"time"
"github.com/grafana/grafana-plugin-sdk-go/backend"
"github.com/grafana/grafana-plugin-sdk-go/backend/datasource"
sdkHttpClient "github.com/grafana/grafana-plugin-sdk-go/backend/httpclient"
"github.com/grafana/grafana/pkg/infra/httpclient"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/infra/tracing"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/stretchr/testify/assert"
)
type healthCheckProvider[T http.RoundTripper] struct {
httpclient.Provider
RoundTripper *T
}
type healthCheckSuccessRoundTripper struct {
}
type healthCheckFailRoundTripper struct {
}
func (rt *healthCheckSuccessRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
return &http.Response{
Status: "200",
StatusCode: 200,
Header: nil,
Body: io.NopCloser(strings.NewReader(`{
"data": {
"resultType": "vector",
"result": [
{
"metric": {},
"value": [
4000000000,
"2"
]
}
]
},
"status": "success"
}`)),
ContentLength: 0,
Request: req,
}, nil
}
func (rt *healthCheckFailRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
return &http.Response{
Status: "400",
StatusCode: 400,
Header: nil,
Body: nil,
ContentLength: 0,
Request: req,
}, nil
}
func (provider *healthCheckProvider[T]) New(opts ...sdkHttpClient.Options) (*http.Client, error) {
client := &http.Client{}
provider.RoundTripper = new(T)
client.Transport = *provider.RoundTripper
return client, nil
}
func (provider *healthCheckProvider[T]) GetTransport(opts ...sdkHttpClient.Options) (http.RoundTripper, error) {
return *new(T), nil
}
func getMockProvider[T http.RoundTripper]() *healthCheckProvider[T] {
return &healthCheckProvider[T]{
RoundTripper: new(T),
}
}
func Test_healthcheck(t *testing.T) {
t.Run("should do a successful health check", func(t *testing.T) {
httpProvider := getMockProvider[*healthCheckSuccessRoundTripper]()
s := &Service{
im: datasource.NewInstanceManager(newInstanceSettings(httpProvider)),
features: featuremgmt.WithFeatures(featuremgmt.FlagLokiLogsDataplane, featuremgmt.FlagLokiMetricDataplane),
tracer: tracing.NewFakeTracer(),
logger: log.New("loki test"),
}
req := &backend.CheckHealthRequest{
PluginContext: getPluginContext(),
Headers: nil,
}
res, err := s.CheckHealth(context.Background(), req)
assert.NoError(t, err)
assert.Equal(t, backend.HealthStatusOk, res.Status)
})
t.Run("should return an error for an unsuccessful health check", func(t *testing.T) {
httpProvider := getMockProvider[*healthCheckFailRoundTripper]()
s := &Service{
im: datasource.NewInstanceManager(newInstanceSettings(httpProvider)),
features: featuremgmt.WithFeatures(featuremgmt.FlagLokiLogsDataplane, featuremgmt.FlagLokiMetricDataplane),
tracer: tracing.NewFakeTracer(),
logger: log.New("loki test"),
}
req := &backend.CheckHealthRequest{
PluginContext: getPluginContext(),
Headers: nil,
}
res, err := s.CheckHealth(context.Background(), req)
assert.NoError(t, err)
assert.Equal(t, backend.HealthStatusError, res.Status)
})
}
func getPluginContext() backend.PluginContext {
return backend.PluginContext{
OrgID: 0,
PluginID: "loki",
User: nil,
AppInstanceSettings: nil,
DataSourceInstanceSettings: &backend.DataSourceInstanceSettings{
ID: 0,
UID: "",
Type: "loki",
Name: "test-loki",
URL: "http://loki:3100",
User: "",
Database: "",
BasicAuthEnabled: true,
BasicAuthUser: "admin",
JSONData: []byte("{}"),
DecryptedSecureJSONData: map[string]string{},
Updated: time.Time{},
},
}
}

View File

@@ -30,6 +30,7 @@ type Service struct {
im instancemgmt.InstanceManager im instancemgmt.InstanceManager
features featuremgmt.FeatureToggles features featuremgmt.FeatureToggles
tracer tracing.Tracer tracer tracing.Tracer
logger *log.ConcreteLogger
} }
var ( var (
@@ -43,6 +44,7 @@ func ProvideService(httpClientProvider httpclient.Provider, features featuremgmt
im: datasource.NewInstanceManager(newInstanceSettings(httpClientProvider)), im: datasource.NewInstanceManager(newInstanceSettings(httpClientProvider)),
features: features, features: features,
tracer: tracer, tracer: tracer,
logger: logger,
} }
} }

View File

@@ -323,63 +323,6 @@ describe('LokiDatasource', () => {
}); });
}); });
describe('when performing testDataSource', () => {
let ds: LokiDatasource;
beforeEach(() => {
ds = createLokiDatasource(templateSrvStub);
});
it('should return successfully when call succeeds with labels', async () => {
ds.metadataRequest = () => Promise.resolve(['avalue']);
const result = await ds.testDatasource();
expect(result).toStrictEqual({
status: 'success',
message: 'Data source successfully connected.',
});
});
it('should return error when call succeeds without labels', async () => {
ds.metadataRequest = () => Promise.resolve([]);
const result = await ds.testDatasource();
expect(result).toStrictEqual({
status: 'error',
message:
'Data source connected, but no labels were received. Verify that Loki and Promtail are correctly configured.',
});
});
it('should return error status with no details when call fails with no details', async () => {
ds.metadataRequest = () => Promise.reject({});
const result = await ds.testDatasource();
expect(result).toStrictEqual({
status: 'error',
message: 'Unable to connect with Loki. Please check the server logs for more details.',
});
});
it('should return error status with details when call fails with details', async () => {
ds.metadataRequest = () =>
Promise.reject({
data: {
message: 'error42',
},
});
const result = await ds.testDatasource();
expect(result).toStrictEqual({
status: 'error',
message: 'Unable to connect with Loki (error42). Please check the server logs for more details.',
});
});
});
describe('when calling annotationQuery', () => { describe('when calling annotationQuery', () => {
const getTestContext = (frame: DataFrame, options = {}) => { const getTestContext = (frame: DataFrame, options = {}) => {
const query = makeAnnotationQueryRequest(options); const query = makeAnnotationQueryRequest(options);

View File

@@ -761,38 +761,6 @@ export class LokiDatasource
return this.logContextProvider.getLogRowContextUi(row, runContextQuery, getLokiQueryFromDataQuery(origQuery)); return this.logContextProvider.getLogRowContextUi(row, runContextQuery, getLokiQueryFromDataQuery(origQuery));
} }
testDatasource(): Promise<{ status: string; message: string }> {
// Consider only last 10 minutes otherwise request takes too long
const nowMs = Date.now();
const params = {
start: (nowMs - 10 * 60 * 1000) * NS_IN_MS,
end: nowMs * NS_IN_MS,
};
return this.metadataRequest('labels', params).then(
(values) => {
return values.length > 0
? { status: 'success', message: 'Data source successfully connected.' }
: {
status: 'error',
message:
'Data source connected, but no labels were received. Verify that Loki and Promtail are correctly configured.',
};
},
(err) => {
// we did a resource-call that failed.
// the only info we have, if exists, is err.data.message
// (when in development-mode, err.data.error exists too, but not in production-mode)
// things like err.status & err.statusText does not help,
// because those will only describe how the request between browser<>server failed
const info: string = err?.data?.message ?? '';
const infoInParentheses = info !== '' ? ` (${info})` : '';
const message = `Unable to connect with Loki${infoInParentheses}. Please check the server logs for more details.`;
return { status: 'error', message: message };
}
);
}
async annotationQuery(options: any): Promise<AnnotationEvent[]> { async annotationQuery(options: any): Promise<AnnotationEvent[]> {
const { expr, maxLines, instant, tagKeys = '', titleFormat = '', textFormat = '' } = options.annotation; const { expr, maxLines, instant, tagKeys = '', titleFormat = '', textFormat = '' } = options.annotation;