2022-02-18 07:35:39 -06:00
package loki
import (
"bytes"
"context"
"encoding/json"
2023-09-07 10:09:24 -05:00
"errors"
2022-02-18 07:35:39 -06:00
"fmt"
"io"
"net/http"
"net/url"
2022-06-17 00:47:31 -05:00
"path"
2022-02-18 07:35:39 -06:00
"strconv"
2024-01-08 11:32:16 -06:00
"syscall"
2023-09-01 04:44:18 -05:00
"time"
2022-02-18 07:35:39 -06:00
2023-01-30 02:50:27 -06:00
jsoniter "github.com/json-iterator/go"
2023-09-01 04:44:18 -05:00
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
2023-10-03 07:54:20 -05:00
"go.opentelemetry.io/otel/trace"
2024-04-11 04:51:54 -05:00
"google.golang.org/grpc/metadata"
2023-01-30 02:50:27 -06:00
2024-01-08 11:32:16 -06:00
"github.com/grafana/grafana-plugin-sdk-go/backend"
2024-03-20 04:25:35 -05:00
"github.com/grafana/grafana-plugin-sdk-go/backend/log"
2023-03-09 04:12:33 -06:00
"github.com/grafana/grafana/pkg/infra/tracing"
2024-03-11 11:22:33 -05:00
"github.com/grafana/grafana/pkg/promlib/converter"
2023-09-28 10:25:09 -05:00
"github.com/grafana/grafana/pkg/tsdb/loki/instrumentation"
2022-02-18 07:35:39 -06:00
)
type LokiAPI struct {
2023-11-16 10:06:14 -06:00
client * http . Client
url string
log log . Logger
tracer tracing . Tracer
requestStructuredMetadata bool
2022-02-18 07:35:39 -06:00
}
2022-11-22 07:02:30 -06:00
type RawLokiResponse struct {
Body [ ] byte
2023-03-09 04:12:33 -06:00
Status int
2022-11-22 07:02:30 -06:00
Encoding string
}
2023-11-16 10:06:14 -06:00
func newLokiAPI ( client * http . Client , url string , log log . Logger , tracer tracing . Tracer , requestStructuredMetadata bool ) * LokiAPI {
return & LokiAPI { client : client , url : url , log : log , tracer : tracer , requestStructuredMetadata : requestStructuredMetadata }
2022-02-18 07:35:39 -06:00
}
2023-11-16 10:06:14 -06:00
func makeDataRequest ( ctx context . Context , lokiDsUrl string , query lokiQuery , categorizeLabels bool ) ( * http . Request , error ) {
2022-02-18 07:35:39 -06:00
qs := url . Values { }
qs . Set ( "query" , query . Expr )
2022-02-25 02:14:17 -06:00
2022-04-20 06:52:15 -05:00
qs . Set ( "direction" , string ( query . Direction ) )
2022-02-25 02:14:17 -06:00
// MaxLines defaults to zero when not received,
// and Loki does not like limit=0, even when it is not needed
// (for example for metric queries), so we
// only send it when it's set
if query . MaxLines > 0 {
qs . Set ( "limit" , fmt . Sprintf ( "%d" , query . MaxLines ) )
}
2022-02-18 07:35:39 -06:00
lokiUrl , err := url . Parse ( lokiDsUrl )
if err != nil {
return nil , err
}
2022-02-25 02:14:17 -06:00
switch query . QueryType {
case QueryTypeRange :
{
qs . Set ( "start" , strconv . FormatInt ( query . Start . UnixNano ( ) , 10 ) )
qs . Set ( "end" , strconv . FormatInt ( query . End . UnixNano ( ) , 10 ) )
// NOTE: technically for streams-producing queries `step`
// is ignored, so it would be nicer to not send it in such cases,
// but we cannot detect that situation, so we always send it.
// it should not break anything.
2022-04-12 05:30:39 -05:00
// NOTE2: we do this at millisecond precision for two reasons:
// a. Loki cannot do steps with better precision anyway,
// so the microsecond & nanosecond part can be ignored.
// b. having it always be number+'ms' makes it more robust and
// precise, as Loki does not support step with float number
// and time-specifier, like "1.5s"
qs . Set ( "step" , fmt . Sprintf ( "%dms" , query . Step . Milliseconds ( ) ) )
2022-06-17 00:47:31 -05:00
lokiUrl . Path = path . Join ( lokiUrl . Path , "/loki/api/v1/query_range" )
2022-02-25 02:14:17 -06:00
}
case QueryTypeInstant :
{
qs . Set ( "time" , strconv . FormatInt ( query . End . UnixNano ( ) , 10 ) )
2022-06-17 00:47:31 -05:00
lokiUrl . Path = path . Join ( lokiUrl . Path , "/loki/api/v1/query" )
2022-02-25 02:14:17 -06:00
}
default :
return nil , fmt . Errorf ( "invalid QueryType: %v" , query . QueryType )
}
2022-02-18 07:35:39 -06:00
lokiUrl . RawQuery = qs . Encode ( )
req , err := http . NewRequestWithContext ( ctx , "GET" , lokiUrl . String ( ) , nil )
if err != nil {
return nil , err
}
2023-01-27 09:41:40 -06:00
if query . SupportingQueryType != SupportingQueryNone {
2024-03-26 06:08:09 -05:00
value := getSupportingQueryHeaderValue ( query . SupportingQueryType )
2023-01-27 09:41:40 -06:00
if value != "" {
req . Header . Set ( "X-Query-Tags" , "Source=" + value )
}
2022-02-25 02:14:17 -06:00
}
2022-02-18 07:35:39 -06:00
2023-11-16 10:06:14 -06:00
if categorizeLabels {
req . Header . Set ( "X-Loki-Response-Encoding-Flags" , "categorize-labels" )
}
2024-04-11 04:51:54 -05:00
setXScopeOrgIDHeader ( req , ctx )
2022-02-18 07:35:39 -06:00
return req , nil
}
2023-03-09 04:12:33 -06:00
type lokiResponseError struct {
Message string ` json:"message" `
TraceID string ` json:"traceID,omitempty" `
}
2022-02-18 07:35:39 -06:00
type lokiError struct {
Message string
}
2023-03-09 04:12:33 -06:00
func makeLokiError ( bytes [ ] byte ) error {
var data lokiError
err := json . Unmarshal ( bytes , & data )
if err != nil {
// we were unable to convert the bytes to JSON, we return the whole text
return fmt . Errorf ( "%v" , string ( bytes ) )
}
if data . Message == "" {
// we got no usable error message, we return the whole text
return fmt . Errorf ( "%v" , string ( bytes ) )
}
return fmt . Errorf ( "%v" , data . Message )
}
2022-02-18 07:35:39 -06:00
// we know there is an error,
// based on the http-response-body
// we have to make an informative error-object
2023-03-09 04:12:33 -06:00
func readLokiError ( body io . ReadCloser ) error {
2022-02-18 07:35:39 -06:00
var buf bytes . Buffer
_ , err := buf . ReadFrom ( body )
if err != nil {
return err
}
bytes := buf . Bytes ( )
// the error-message is probably a JSON structure,
// with a string-field named "message". we want the
// value of that field.
// but, the response might be just a simple string,
// this was used in older Loki versions.
// so our approach is this:
// - we try to convert the bytes to JSON
// - we take the value of the field "message"
// - if any of these steps fail, or if "message" is empty, we return the whole text
2023-03-09 04:12:33 -06:00
return makeLokiError ( bytes )
2022-02-18 07:35:39 -06:00
}
2024-01-08 11:32:16 -06:00
func ( api * LokiAPI ) DataQuery ( ctx context . Context , query lokiQuery , responseOpts ResponseOpts ) ( * backend . DataResponse , error ) {
2023-11-16 10:06:14 -06:00
req , err := makeDataRequest ( ctx , api . url , query , api . requestStructuredMetadata )
2022-02-18 07:35:39 -06:00
if err != nil {
2023-11-10 10:03:59 -06:00
return nil , err
2022-02-18 07:35:39 -06:00
}
2023-09-07 10:09:24 -05:00
queryAttrs := [ ] any { "start" , query . Start , "end" , query . End , "step" , query . Step , "query" , query . Expr , "queryType" , query . QueryType , "direction" , query . Direction , "maxLines" , query . MaxLines , "supportingQueryType" , query . SupportingQueryType , "lokiHost" , req . URL . Host , "lokiPath" , req . URL . Path }
api . log . Debug ( "Sending query to loki" , queryAttrs ... )
2023-09-01 04:44:18 -05:00
start := time . Now ( )
2022-02-18 07:35:39 -06:00
resp , err := api . client . Do ( req )
if err != nil {
2023-09-07 10:09:24 -05:00
status := "error"
if errors . Is ( err , context . Canceled ) {
status = "cancelled"
}
lp := [ ] any { "error" , err , "status" , status , "duration" , time . Since ( start ) , "stage" , stageDatabaseRequest }
lp = append ( lp , queryAttrs ... )
if resp != nil {
lp = append ( lp , "statusCode" , resp . StatusCode )
}
api . log . Error ( "Error received from Loki" , lp ... )
2024-01-08 11:32:16 -06:00
res := backend . DataResponse {
Error : err ,
}
if errors . Is ( err , syscall . ECONNREFUSED ) {
res . ErrorSource = backend . ErrorSourceDownstream
}
return & res , nil
2022-02-18 07:35:39 -06:00
}
defer func ( ) {
if err := resp . Body . Close ( ) ; err != nil {
2023-09-07 10:09:24 -05:00
api . log . Warn ( "Failed to close response body" , "error" , err )
2022-02-18 07:35:39 -06:00
}
} ( )
2023-09-19 04:11:27 -05:00
lp := [ ] any { "duration" , time . Since ( start ) , "stage" , stageDatabaseRequest , "statusCode" , resp . StatusCode , "contentLength" , resp . Header . Get ( "Content-Length" ) }
lp = append ( lp , queryAttrs ... )
2022-02-18 07:35:39 -06:00
if resp . StatusCode / 100 != 2 {
2023-09-01 04:44:18 -05:00
err := readLokiError ( resp . Body )
2024-01-08 11:32:16 -06:00
res := backend . DataResponse {
Error : err ,
ErrorSource : backend . ErrorSourceFromHTTPStatus ( resp . StatusCode ) ,
}
2024-01-23 04:16:29 -06:00
lp = append ( lp , "status" , "error" , "error" , err , "statusSource" , res . ErrorSource )
2023-09-19 04:11:27 -05:00
api . log . Error ( "Error received from Loki" , lp ... )
2024-01-08 11:32:16 -06:00
return & res , nil
2023-09-19 04:11:27 -05:00
} else {
lp = append ( lp , "status" , "ok" )
api . log . Info ( "Response received from loki" , lp ... )
2022-02-18 07:35:39 -06:00
}
2023-09-01 04:44:18 -05:00
start = time . Now ( )
2023-10-03 07:54:20 -05:00
_ , span := api . tracer . Start ( ctx , "datasource.loki.parseResponse" , trace . WithAttributes (
attribute . Bool ( "metricDataplane" , responseOpts . metricDataplane ) ,
) )
2023-09-01 04:44:18 -05:00
defer span . End ( )
2022-05-05 06:09:01 -05:00
iter := jsoniter . Parse ( jsoniter . ConfigDefault , resp . Body , 1024 )
2023-08-24 09:47:19 -05:00
res := converter . ReadPrometheusStyleResult ( iter , converter . Options { Dataplane : responseOpts . metricDataplane } )
2022-05-05 06:09:01 -05:00
if res . Error != nil {
2023-09-01 04:44:18 -05:00
span . RecordError ( res . Error )
2023-11-28 04:48:23 -06:00
span . SetStatus ( codes . Error , res . Error . Error ( ) )
2023-09-28 10:25:09 -05:00
instrumentation . UpdatePluginParsingResponseDurationSeconds ( ctx , time . Since ( start ) , "error" )
2023-09-07 10:09:24 -05:00
api . log . Error ( "Error parsing response from loki" , "error" , res . Error , "metricDataplane" , responseOpts . metricDataplane , "duration" , time . Since ( start ) , "stage" , stageParseResponse )
2023-11-10 10:03:59 -06:00
return nil , res . Error
2022-02-18 07:35:39 -06:00
}
2023-09-28 10:25:09 -05:00
instrumentation . UpdatePluginParsingResponseDurationSeconds ( ctx , time . Since ( start ) , "ok" )
2023-09-07 10:09:24 -05:00
api . log . Info ( "Response parsed from loki" , "duration" , time . Since ( start ) , "metricDataplane" , responseOpts . metricDataplane , "framesLength" , len ( res . Frames ) , "stage" , stageParseResponse )
2023-09-01 04:44:18 -05:00
2024-01-08 11:32:16 -06:00
return & res , nil
2022-02-18 07:35:39 -06:00
}
2022-04-25 06:16:14 -05:00
2022-12-21 06:25:58 -06:00
func makeRawRequest ( ctx context . Context , lokiDsUrl string , resourcePath string ) ( * http . Request , error ) {
2022-04-25 06:16:14 -05:00
lokiUrl , err := url . Parse ( lokiDsUrl )
if err != nil {
return nil , err
}
2022-06-17 00:47:31 -05:00
resourceUrl , err := url . Parse ( resourcePath )
2022-04-25 06:16:14 -05:00
if err != nil {
return nil , err
}
2022-06-17 00:47:31 -05:00
// we take the path and the query-string only
lokiUrl . RawQuery = resourceUrl . RawQuery
lokiUrl . Path = path . Join ( lokiUrl . Path , resourceUrl . Path )
req , err := http . NewRequestWithContext ( ctx , "GET" , lokiUrl . String ( ) , nil )
2022-05-05 05:42:50 -05:00
if err != nil {
return nil , err
}
2024-04-11 04:51:54 -05:00
setXScopeOrgIDHeader ( req , ctx )
2022-05-05 05:42:50 -05:00
return req , nil
2022-04-25 06:16:14 -05:00
}
2022-11-22 07:02:30 -06:00
func ( api * LokiAPI ) RawQuery ( ctx context . Context , resourcePath string ) ( RawLokiResponse , error ) {
2023-09-01 08:26:25 -05:00
api . log . Debug ( "Sending raw query to loki" , "resourcePath" , resourcePath )
2022-12-21 06:25:58 -06:00
req , err := makeRawRequest ( ctx , api . url , resourcePath )
2022-04-25 06:16:14 -05:00
if err != nil {
2023-09-07 10:09:24 -05:00
api . log . Error ( "Failed to prepare request to loki" , "error" , err , "resourcePath" , resourcePath )
2022-11-22 07:02:30 -06:00
return RawLokiResponse { } , err
2022-04-25 06:16:14 -05:00
}
2023-09-07 10:09:24 -05:00
start := time . Now ( )
2022-04-25 06:16:14 -05:00
resp , err := api . client . Do ( req )
if err != nil {
2023-09-07 10:09:24 -05:00
status := "error"
if errors . Is ( err , context . Canceled ) {
status = "cancelled"
}
lp := [ ] any { "error" , err , "resourcePath" , resourcePath , "status" , status , "duration" , time . Since ( start ) , "stage" , stageDatabaseRequest }
if resp != nil {
lp = append ( lp , "statusCode" , resp . StatusCode )
}
api . log . Error ( "Error received from Loki" , lp ... )
2022-11-22 07:02:30 -06:00
return RawLokiResponse { } , err
2022-04-25 06:16:14 -05:00
}
defer func ( ) {
if err := resp . Body . Close ( ) ; err != nil {
2023-09-07 10:09:24 -05:00
api . log . Warn ( "Failed to close response body" , "error" , err )
2022-04-25 06:16:14 -05:00
}
} ( )
2023-09-07 10:09:24 -05:00
api . log . Info ( "Response received from loki" , "status" , "ok" , "statusCode" , resp . StatusCode , "contentLength" , resp . Header . Get ( "Content-Length" ) , "duration" , time . Since ( start ) , "contentEncoding" , resp . Header . Get ( "Content-Encoding" ) , "stage" , stageDatabaseRequest )
2023-09-01 08:26:25 -05:00
2023-03-09 04:12:33 -06:00
// server errors are handled by the plugin-proxy to hide the error message
if resp . StatusCode / 100 == 5 {
return RawLokiResponse { } , readLokiError ( resp . Body )
2022-11-22 07:02:30 -06:00
}
body , err := io . ReadAll ( resp . Body )
if err != nil {
2023-09-07 10:09:24 -05:00
api . log . Error ( "Error reading response body bytes" , "error" , err )
2022-11-22 07:02:30 -06:00
return RawLokiResponse { } , err
}
2023-03-09 04:12:33 -06:00
// client errors are passed as a json struct to the client
if resp . StatusCode / 100 != 2 {
lokiResponseErr := lokiResponseError { Message : makeLokiError ( body ) . Error ( ) }
2023-09-07 10:09:24 -05:00
api . log . Warn ( "Non 200 HTTP status received from loki" , "error" , lokiResponseErr . Message , "statusCode" , resp . StatusCode , "resourcePath" , resourcePath )
2023-03-09 04:12:33 -06:00
traceID := tracing . TraceIDFromContext ( ctx , false )
if traceID != "" {
lokiResponseErr . TraceID = traceID
}
body , err = json . Marshal ( lokiResponseErr )
if err != nil {
return RawLokiResponse { } , err
}
}
rawLokiResponse := RawLokiResponse {
2022-11-22 07:02:30 -06:00
Body : body ,
2023-03-09 04:12:33 -06:00
Status : resp . StatusCode ,
2022-11-22 07:02:30 -06:00
Encoding : resp . Header . Get ( "Content-Encoding" ) ,
2022-04-25 06:16:14 -05:00
}
2023-03-09 04:12:33 -06:00
return rawLokiResponse , nil
2022-04-25 06:16:14 -05:00
}
2023-01-27 09:41:40 -06:00
2024-03-26 06:08:09 -05:00
func getSupportingQueryHeaderValue ( supportingQueryType SupportingQueryType ) string {
2023-01-27 09:41:40 -06:00
value := ""
2024-03-26 06:08:09 -05:00
// we need to map the SupportingQueryType to the actual header value. For
// legacy reasons we defined each value, such as "logsVolume" maps to the
// "logvolhist" header value to Loki. With #85123, even the value set in the
// frontend query can be passed as is to Loki.
2023-01-27 09:41:40 -06:00
switch supportingQueryType {
case SupportingQueryLogsVolume :
value = "logvolhist"
case SupportingQueryLogsSample :
value = "logsample"
case SupportingQueryDataSample :
value = "datasample"
2024-01-29 04:24:05 -06:00
case SupportingQueryInfiniteScroll :
value = "infinitescroll"
2024-03-26 06:08:09 -05:00
default :
value = string ( supportingQueryType )
2023-01-27 09:41:40 -06:00
}
return value
}
2024-04-11 04:51:54 -05:00
// setXScopeOrgIDHeader sets the `X-Scope-OrgID` header in the provided HTTP request based on the tenant ID retrieved from the context.
// `X-Scope-OrgID` is needed by the Loki system to work in multi-tenant mode.
// See https://github.com/grafana/loki/blob/main/docs/sources/operations/multi-tenancy.md
func setXScopeOrgIDHeader ( req * http . Request , ctx context . Context ) * http . Request {
logger := backend . NewLoggerWith ( "logger" , "tsdb.loki" )
md , ok := metadata . FromIncomingContext ( ctx )
if ! ok {
2024-05-15 09:08:03 -05:00
// Metadata are currently set and needed only locally for multi-tenancy, while on cloud
// this is set by our stack
logger . Debug ( "Metadata not present in context. Header not set" )
2024-04-11 04:51:54 -05:00
return req
}
tenantids := md . Get ( "tenantid" )
if len ( tenantids ) == 0 {
// We assume we are not using multi-tenant mode, which is fine
logger . Debug ( "Tenant ID not present. Header not set" )
2024-04-22 15:02:56 -05:00
} else if len ( tenantids ) > 1 {
2024-04-11 04:51:54 -05:00
// Loki supports multiple tenant IDs, but we should receive them from different contexts
logger . Error ( strconv . Itoa ( len ( tenantids ) ) + " tenant IDs found. Header not set" )
} else {
req . Header . Add ( "X-Scope-OrgID" , tenantids [ 0 ] )
logger . Debug ( "Tenant ID " + tenantids [ 0 ] + " added to Loki request" )
}
return req
}