Loki: Add backend healthcheck (#74330)

* add loki healthcheck

* remove `testDatasource` call

* remove unused error check

* change success message

* improve error messages
This commit is contained in:
Sven Grossmann 2023-09-05 08:59:13 +02:00 committed by GitHub
parent ed46e3444c
commit a403027608
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 253 additions and 93 deletions

View File

@ -4,8 +4,7 @@ const dataSourceName = 'LokiEditor';
const addDataSource = () => {
e2e.flows.addDataSource({
type: 'Loki',
expectedAlertMessage:
'Unable to connect with Loki (Failed to call resource). Please check the server logs for more details.',
expectedAlertMessage: 'Unable to connect with Loki. Please check the server logs for more details.',
name: dataSourceName,
form: () => {
e2e().get('#connection-url').type('http://loki-url:3100');

View File

@ -5,8 +5,7 @@ const dataSourceName = 'LokiBuilder';
const addDataSource = () => {
e2e.flows.addDataSource({
type: 'Loki',
expectedAlertMessage:
'Unable to connect with Loki (Failed to call resource). Please check the server logs for more details.',
expectedAlertMessage: 'Unable to connect with Loki. Please check the server logs for more details.',
name: dataSourceName,
form: () => {
e2e().get('#connection-url').type('http://loki-url:3100');

View File

@ -0,0 +1,104 @@
package loki
import (
"context"
"encoding/json"
"errors"
"fmt"
"time"
"github.com/grafana/grafana-plugin-sdk-go/backend"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/tsdb/loki/kinds/dataquery"
)
const (
refID = "__healthcheck__"
)
func (s *Service) CheckHealth(ctx context.Context, req *backend.CheckHealthRequest) (*backend.CheckHealthResult,
error) {
logger := s.logger.New("endpoint", "CheckHealth")
ds, err := s.im.Get(ctx, req.PluginContext)
// check that the datasource exists
if err != nil {
return getHealthCheckMessage(fmt.Errorf("failed to get datasource information: %w", err), logger), err
}
if ds == nil {
return getHealthCheckMessage(errors.New("invalid datasource info received"), logger), err
}
hc := healthcheck(ctx, req, s, logger)
return hc, nil
}
func healthcheck(ctx context.Context, req *backend.CheckHealthRequest, s *Service, logger *log.ConcreteLogger) *backend.CheckHealthResult {
step := "1s"
qt := "instant"
qm := dataquery.LokiDataQuery{
Expr: "vector(1)+vector(1)",
Step: &step,
QueryType: &qt,
}
b, _ := json.Marshal(&qm)
query := backend.DataQuery{
RefID: refID,
TimeRange: backend.TimeRange{
From: time.Unix(1, 0).UTC(),
To: time.Unix(4, 0).UTC(),
},
JSON: b,
}
resp, err := s.QueryData(ctx, &backend.QueryDataRequest{
PluginContext: req.PluginContext,
Queries: []backend.DataQuery{query},
})
if err != nil {
return getHealthCheckMessage(fmt.Errorf("error received while querying loki: %w", err), logger)
}
if resp.Responses[refID].Error != nil {
return getHealthCheckMessage(fmt.Errorf("error from loki: %w", resp.Responses[refID].Error), logger)
}
frameLen := len(resp.Responses[refID].Frames)
if frameLen != 1 {
return getHealthCheckMessage(fmt.Errorf("invalid dataframe length, expected %d got %d", 1, frameLen), logger)
}
fieldLen := len(resp.Responses[refID].Frames[0].Fields)
if fieldLen != 2 {
return getHealthCheckMessage(fmt.Errorf("invalid dataframe field length, expected %d got %d", 2, fieldLen), logger)
}
fieldValueLen := resp.Responses[refID].Frames[0].Fields[0].Len()
if fieldValueLen != 1 {
return getHealthCheckMessage(fmt.Errorf("invalid dataframe field value length, expected %d got %d", 1, fieldLen), logger)
}
rspValue := resp.Responses[refID].Frames[0].Fields[1].At(0).(float64)
if rspValue != 2 {
return getHealthCheckMessage(fmt.Errorf("invalid response value, expected %d got %f", 2, rspValue), logger)
}
return getHealthCheckMessage(nil, logger)
}
func getHealthCheckMessage(err error, logger *log.ConcreteLogger) *backend.CheckHealthResult {
if err == nil {
return &backend.CheckHealthResult{
Status: backend.HealthStatusOk,
Message: "Data source successfully connected.",
}
}
logger.Error("Loki health check failed", "error", err)
return &backend.CheckHealthResult{
Status: backend.HealthStatusError,
Message: "Unable to connect with Loki. Please check the server logs for more details.",
}
}

View File

@ -0,0 +1,145 @@
package loki
import (
"context"
"io"
"net/http"
"strings"
"testing"
"time"
"github.com/grafana/grafana-plugin-sdk-go/backend"
"github.com/grafana/grafana-plugin-sdk-go/backend/datasource"
sdkHttpClient "github.com/grafana/grafana-plugin-sdk-go/backend/httpclient"
"github.com/grafana/grafana/pkg/infra/httpclient"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/infra/tracing"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/stretchr/testify/assert"
)
type healthCheckProvider[T http.RoundTripper] struct {
httpclient.Provider
RoundTripper *T
}
type healthCheckSuccessRoundTripper struct {
}
type healthCheckFailRoundTripper struct {
}
func (rt *healthCheckSuccessRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
return &http.Response{
Status: "200",
StatusCode: 200,
Header: nil,
Body: io.NopCloser(strings.NewReader(`{
"data": {
"resultType": "vector",
"result": [
{
"metric": {},
"value": [
4000000000,
"2"
]
}
]
},
"status": "success"
}`)),
ContentLength: 0,
Request: req,
}, nil
}
func (rt *healthCheckFailRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
return &http.Response{
Status: "400",
StatusCode: 400,
Header: nil,
Body: nil,
ContentLength: 0,
Request: req,
}, nil
}
func (provider *healthCheckProvider[T]) New(opts ...sdkHttpClient.Options) (*http.Client, error) {
client := &http.Client{}
provider.RoundTripper = new(T)
client.Transport = *provider.RoundTripper
return client, nil
}
func (provider *healthCheckProvider[T]) GetTransport(opts ...sdkHttpClient.Options) (http.RoundTripper, error) {
return *new(T), nil
}
func getMockProvider[T http.RoundTripper]() *healthCheckProvider[T] {
return &healthCheckProvider[T]{
RoundTripper: new(T),
}
}
func Test_healthcheck(t *testing.T) {
t.Run("should do a successful health check", func(t *testing.T) {
httpProvider := getMockProvider[*healthCheckSuccessRoundTripper]()
s := &Service{
im: datasource.NewInstanceManager(newInstanceSettings(httpProvider)),
features: featuremgmt.WithFeatures(featuremgmt.FlagLokiLogsDataplane, featuremgmt.FlagLokiMetricDataplane),
tracer: tracing.NewFakeTracer(),
logger: log.New("loki test"),
}
req := &backend.CheckHealthRequest{
PluginContext: getPluginContext(),
Headers: nil,
}
res, err := s.CheckHealth(context.Background(), req)
assert.NoError(t, err)
assert.Equal(t, backend.HealthStatusOk, res.Status)
})
t.Run("should return an error for an unsuccessful health check", func(t *testing.T) {
httpProvider := getMockProvider[*healthCheckFailRoundTripper]()
s := &Service{
im: datasource.NewInstanceManager(newInstanceSettings(httpProvider)),
features: featuremgmt.WithFeatures(featuremgmt.FlagLokiLogsDataplane, featuremgmt.FlagLokiMetricDataplane),
tracer: tracing.NewFakeTracer(),
logger: log.New("loki test"),
}
req := &backend.CheckHealthRequest{
PluginContext: getPluginContext(),
Headers: nil,
}
res, err := s.CheckHealth(context.Background(), req)
assert.NoError(t, err)
assert.Equal(t, backend.HealthStatusError, res.Status)
})
}
func getPluginContext() backend.PluginContext {
return backend.PluginContext{
OrgID: 0,
PluginID: "loki",
User: nil,
AppInstanceSettings: nil,
DataSourceInstanceSettings: &backend.DataSourceInstanceSettings{
ID: 0,
UID: "",
Type: "loki",
Name: "test-loki",
URL: "http://loki:3100",
User: "",
Database: "",
BasicAuthEnabled: true,
BasicAuthUser: "admin",
JSONData: []byte("{}"),
DecryptedSecureJSONData: map[string]string{},
Updated: time.Time{},
},
}
}

View File

@ -30,6 +30,7 @@ type Service struct {
im instancemgmt.InstanceManager
features featuremgmt.FeatureToggles
tracer tracing.Tracer
logger *log.ConcreteLogger
}
var (
@ -43,6 +44,7 @@ func ProvideService(httpClientProvider httpclient.Provider, features featuremgmt
im: datasource.NewInstanceManager(newInstanceSettings(httpClientProvider)),
features: features,
tracer: tracer,
logger: logger,
}
}

View File

@ -323,63 +323,6 @@ describe('LokiDatasource', () => {
});
});
describe('when performing testDataSource', () => {
let ds: LokiDatasource;
beforeEach(() => {
ds = createLokiDatasource(templateSrvStub);
});
it('should return successfully when call succeeds with labels', async () => {
ds.metadataRequest = () => Promise.resolve(['avalue']);
const result = await ds.testDatasource();
expect(result).toStrictEqual({
status: 'success',
message: 'Data source successfully connected.',
});
});
it('should return error when call succeeds without labels', async () => {
ds.metadataRequest = () => Promise.resolve([]);
const result = await ds.testDatasource();
expect(result).toStrictEqual({
status: 'error',
message:
'Data source connected, but no labels were received. Verify that Loki and Promtail are correctly configured.',
});
});
it('should return error status with no details when call fails with no details', async () => {
ds.metadataRequest = () => Promise.reject({});
const result = await ds.testDatasource();
expect(result).toStrictEqual({
status: 'error',
message: 'Unable to connect with Loki. Please check the server logs for more details.',
});
});
it('should return error status with details when call fails with details', async () => {
ds.metadataRequest = () =>
Promise.reject({
data: {
message: 'error42',
},
});
const result = await ds.testDatasource();
expect(result).toStrictEqual({
status: 'error',
message: 'Unable to connect with Loki (error42). Please check the server logs for more details.',
});
});
});
describe('when calling annotationQuery', () => {
const getTestContext = (frame: DataFrame, options = {}) => {
const query = makeAnnotationQueryRequest(options);

View File

@ -761,38 +761,6 @@ export class LokiDatasource
return this.logContextProvider.getLogRowContextUi(row, runContextQuery, getLokiQueryFromDataQuery(origQuery));
}
testDatasource(): Promise<{ status: string; message: string }> {
// Consider only last 10 minutes otherwise request takes too long
const nowMs = Date.now();
const params = {
start: (nowMs - 10 * 60 * 1000) * NS_IN_MS,
end: nowMs * NS_IN_MS,
};
return this.metadataRequest('labels', params).then(
(values) => {
return values.length > 0
? { status: 'success', message: 'Data source successfully connected.' }
: {
status: 'error',
message:
'Data source connected, but no labels were received. Verify that Loki and Promtail are correctly configured.',
};
},
(err) => {
// we did a resource-call that failed.
// the only info we have, if exists, is err.data.message
// (when in development-mode, err.data.error exists too, but not in production-mode)
// things like err.status & err.statusText does not help,
// because those will only describe how the request between browser<>server failed
const info: string = err?.data?.message ?? '';
const infoInParentheses = info !== '' ? ` (${info})` : '';
const message = `Unable to connect with Loki${infoInParentheses}. Please check the server logs for more details.`;
return { status: 'error', message: message };
}
);
}
async annotationQuery(options: any): Promise<AnnotationEvent[]> {
const { expr, maxLines, instant, tagKeys = '', titleFormat = '', textFormat = '' } = options.annotation;