Tempo: TraceQL query response streaming (#69212)

* Refactor Tempo datasource backend to support multiple queryData types.
Added traceId query type that is set when performing the request but doesn't map to a tab.

* WIP data is reaching the frontend

* WIP

* Use channels and goroutines

* Some fixes

* Simplify backend code.
Return traces, metrics, state and error in a dataframe.
Shared state type between FE and BE.
Use getStream() instead of getQueryData()

* Handle errors in frontend

* Update Tempo and use same URL for RPC and HTTP

* Cleanup backend code

* Merge main

* Create grpc client only with host and authenticate

* Create grpc client only with host and authenticate

* Cleanup

* Add streaming to TraceQL Search tab

* Fix merge conflicts

* Added tests for processStream

* make gen-cue

* make gen-cue

* goimports

* lint

* Cleanup go.mod

* Comments

* Addressing PR comments

* Fix streaming for tracel search tab

* Added streaming kill switch as the disableTraceQLStreaming feature toggle

* Small comment

* Fix conflicts

* Correctly capture and send all errors as a DF to client

* Fix infinite error loop

* Fix merge conflicts

* Fix test

* Update deprecated import

* Fix feature toggles gen

* Fix merge conflicts
This commit is contained in:
Andre Pereira
2023-07-14 15:10:46 +01:00
committed by GitHub
parent fb2a57d3a3
commit c1709c9301
27 changed files with 1802 additions and 401 deletions

View File

@@ -38,13 +38,13 @@ export class TableContainer extends PureComponent<Props> {
return frames?.filter((df) => df.meta?.custom?.parentRowIndex === undefined) || [frames?.[0]];
}
getTableHeight(rowCount: number, isSingleTable = true) {
getTableHeight(rowCount: number, hasSubFrames = true) {
if (rowCount === 0) {
return 200;
}
// tries to estimate table height, with a min of 300 and a max of 600
// if there are multiple tables, there is no min
return Math.min(600, Math.max(rowCount * 36, isSingleTable ? 300 : 0) + 40 + 46);
return Math.min(600, Math.max(rowCount * 36, hasSubFrames ? 300 : 0) + 40 + 46);
}
render() {
@@ -107,7 +107,7 @@ export class TableContainer extends PureComponent<Props> {
key={data.main.refId || `table-${i}`}
title={tableData.length > 1 ? `Table - ${data.main.name || data.main.refId || i}` : 'Table'}
width={width}
height={this.getTableHeight(data.main.length, tableData.length === 1)}
height={this.getTableHeight(data.main.length, (data.sub?.length || 0) > 0)}
loadingState={loading ? LoadingState.Loading : undefined}
>
{(innerWidth, innerHeight) => (

View File

@@ -46,11 +46,16 @@ composableKinds: DataQuery: {
serviceMapQuery?: string
// Defines the maximum number of traces that are returned from Tempo
limit?: int64
// Use the streaming API to get partial results as they are available
streaming?: bool
filters: [...#TraceqlFilter]
} @cuetsy(kind="interface") @grafana(TSVeneer="type")
// search = Loki search, nativeSearch = Tempo search for backwards compatibility
#TempoQueryType: "traceql" | "traceqlSearch" | "search" | "serviceMap" | "upload" | "nativeSearch" | "clear" @cuetsy(kind="type")
#TempoQueryType: "traceql" | "traceqlSearch" | "search" | "serviceMap" | "upload" | "nativeSearch" | "traceId" | "clear" @cuetsy(kind="type")
// The state of the TraceQL streaming search query
#SearchStreamingState: "pending" | "streaming" | "done" | "error" @cuetsy(kind="enum")
// static fields are pre-set in the UI, dynamic fields are added by the user
#TraceqlSearchScope: "unscoped" | "resource" | "span" @cuetsy(kind="enum")

View File

@@ -44,6 +44,10 @@ export interface TempoQuery extends common.DataQuery {
* Query traces by span name
*/
spanName?: string;
/**
* Use the streaming API to get partial results as they are available
*/
streaming?: boolean;
}
export const defaultTempoQuery: Partial<TempoQuery> = {
@@ -53,7 +57,17 @@ export const defaultTempoQuery: Partial<TempoQuery> = {
/**
* search = Loki search, nativeSearch = Tempo search for backwards compatibility
*/
export type TempoQueryType = ('traceql' | 'traceqlSearch' | 'search' | 'serviceMap' | 'upload' | 'nativeSearch' | 'clear');
export type TempoQueryType = ('traceql' | 'traceqlSearch' | 'search' | 'serviceMap' | 'upload' | 'nativeSearch' | 'traceId' | 'clear');
/**
* The state of the TraceQL streaming search query
*/
export enum SearchStreamingState {
Done = 'done',
Error = 'error',
Pending = 'pending',
Streaming = 'streaming',
}
/**
* static fields are pre-set in the UI, dynamic fields are added by the user

View File

@@ -58,6 +58,7 @@ import {
createTableFrameFromSearch,
createTableFrameFromTraceQlQuery,
} from './resultTransformer';
import { doTempoChannelStream } from './streaming';
import { SearchQueryParams, TempoQuery, TempoJsonData } from './types';
import { getErrorMessage } from './utils';
@@ -97,6 +98,7 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
this.lokiSearch = instanceSettings.jsonData.lokiSearch;
this.traceQuery = instanceSettings.jsonData.traceQuery;
this.languageProvider = new TempoLanguageProvider(this);
if (!this.search?.filters) {
this.search = {
...this.search,
@@ -221,7 +223,49 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
app: options.app ?? '',
grafana_version: config.buildInfo.version,
query: queryValue ?? '',
streaming: appliedQuery.streaming,
});
if (appliedQuery.streaming) {
subQueries.push(this.handleStreamingSearch(options, targets.traceql));
} else {
subQueries.push(
this._request('/api/search', {
q: queryValue,
limit: options.targets[0].limit ?? DEFAULT_LIMIT,
start: options.range.from.unix(),
end: options.range.to.unix(),
}).pipe(
map((response) => {
return {
data: createTableFrameFromTraceQlQuery(response.data.traces, this.instanceSettings),
};
}),
catchError((err) => {
return of({ error: { message: getErrorMessage(err.data.message) }, data: [] });
})
)
);
}
}
} catch (error) {
return of({ error: { message: error instanceof Error ? error.message : 'Unknown error occurred' }, data: [] });
}
}
if (targets.traceqlSearch?.length) {
try {
const queryValue = generateQueryFromFilters(targets.traceqlSearch[0].filters);
reportInteraction('grafana_traces_traceql_search_queried', {
datasourceType: 'tempo',
app: options.app ?? '',
grafana_version: config.buildInfo.version,
query: queryValue ?? '',
streaming: targets.traceqlSearch[0].streaming,
});
if (targets.traceqlSearch[0].streaming) {
subQueries.push(this.handleStreamingSearch(options, targets.traceqlSearch, queryValue));
} else {
subQueries.push(
this._request('/api/search', {
q: queryValue,
@@ -244,36 +288,6 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
return of({ error: { message: error instanceof Error ? error.message : 'Unknown error occurred' }, data: [] });
}
}
if (targets.traceqlSearch?.length) {
try {
const queryValue = generateQueryFromFilters(targets.traceqlSearch[0].filters);
reportInteraction('grafana_traces_traceql_search_queried', {
datasourceType: 'tempo',
app: options.app ?? '',
grafana_version: config.buildInfo.version,
query: queryValue ?? '',
});
subQueries.push(
this._request('/api/search', {
q: queryValue,
limit: options.targets[0].limit ?? DEFAULT_LIMIT,
start: options.range.from.unix(),
end: options.range.to.unix(),
}).pipe(
map((response) => {
return {
data: createTableFrameFromTraceQlQuery(response.data.traces, this.instanceSettings),
};
}),
catchError((err) => {
return of({ error: { message: getErrorMessage(err.data.message) }, data: [] });
})
)
);
} catch (error) {
return of({ error: { message: error instanceof Error ? error.message : 'Unknown error occurred' }, data: [] });
}
}
if (targets.upload?.length) {
if (this.uploadedJson) {
@@ -370,7 +384,9 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
* @private
*/
handleTraceIdQuery(options: DataQueryRequest<TempoQuery>, targets: TempoQuery[]): Observable<DataQueryResponse> {
const validTargets = targets.filter((t) => t.query).map((t) => ({ ...t, query: t.query.trim() }));
const validTargets = targets
.filter((t) => t.query)
.map((t): TempoQuery => ({ ...t, query: t.query.trim(), queryType: 'traceId' }));
if (!validTargets.length) {
return EMPTY;
}
@@ -409,6 +425,30 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
return request;
}
handleStreamingSearch(
options: DataQueryRequest<TempoQuery>,
targets: TempoQuery[],
query?: string
): Observable<DataQueryResponse> {
const validTargets = targets
.filter((t) => t.query || query)
.map((t): TempoQuery => ({ ...t, query: query || t.query.trim() }));
if (!validTargets.length) {
return EMPTY;
}
return merge(
...validTargets.map((q) =>
doTempoChannelStream(
q,
this, // the datasource
options,
this.instanceSettings
)
)
);
}
async metadataRequest(url: string, params = {}) {
return await lastValueFrom(this._request(url, params, { method: 'GET', hideFromInspector: true }));
}

View File

@@ -575,6 +575,7 @@ export function createTableFrameFromTraceQlQuery(
instanceSettings: DataSourceInstanceSettings
): DataFrame[] {
const frame = new MutableDataFrame({
name: 'Traces',
fields: [
{
name: 'traceID',
@@ -773,17 +774,17 @@ function transformSpanToTraceData(span: Span, traceID: string): TraceTableData {
};
span.attributes?.forEach((attr) => {
if (attr.value.boolValue) {
data[attr.key] = attr.value.boolValue;
if (attr.value.boolValue || attr.value.Value?.bool_value) {
data[attr.key] = attr.value.boolValue || attr.value.Value?.bool_value;
}
if (attr.value.doubleValue) {
data[attr.key] = attr.value.doubleValue;
if (attr.value.doubleValue || attr.value.Value?.double_value) {
data[attr.key] = attr.value.doubleValue || attr.value.Value?.double_value;
}
if (attr.value.intValue) {
data[attr.key] = attr.value.intValue;
if (attr.value.intValue || attr.value.Value?.int_value) {
data[attr.key] = attr.value.intValue || attr.value.Value?.int_value;
}
if (attr.value.stringValue) {
data[attr.key] = attr.value.stringValue;
if (attr.value.stringValue || attr.value.Value?.string_value) {
data[attr.key] = attr.value.stringValue || attr.value.Value?.string_value;
}
});

View File

@@ -0,0 +1,166 @@
import { capitalize } from 'lodash';
import { map, Observable, defer, mergeMap } from 'rxjs';
import { v4 as uuidv4 } from 'uuid';
import {
DataFrame,
DataQueryRequest,
DataQueryResponse,
DataSourceInstanceSettings,
FieldType,
LiveChannelScope,
LoadingState,
MutableDataFrame,
ThresholdsConfig,
ThresholdsMode,
} from '@grafana/data';
import { getGrafanaLiveSrv } from '@grafana/runtime';
import { SearchStreamingState } from './dataquery.gen';
import { TempoDatasource } from './datasource';
import { createTableFrameFromTraceQlQuery } from './resultTransformer';
import { SearchMetrics, TempoJsonData, TempoQuery } from './types';
export async function getLiveStreamKey(): Promise<string> {
return uuidv4();
}
export function doTempoChannelStream(
query: TempoQuery,
ds: TempoDatasource,
options: DataQueryRequest<TempoQuery>,
instanceSettings: DataSourceInstanceSettings<TempoJsonData>
): Observable<DataQueryResponse> {
const range = options.range;
let frames: DataFrame[] | undefined = undefined;
let state: LoadingState = LoadingState.NotStarted;
return defer(() => getLiveStreamKey()).pipe(
mergeMap((key) => {
return getGrafanaLiveSrv()
.getStream<MutableDataFrame>({
scope: LiveChannelScope.DataSource,
namespace: ds.uid,
path: `search/${key}`,
data: {
...query,
timeRange: {
from: range.from.toISOString(),
to: range.to.toISOString(),
},
},
})
.pipe(
map((evt) => {
if ('message' in evt && evt?.message) {
// Schema should be [traces, metrics, state, error]
const traces = evt.message.data.values[0][0];
const metrics = evt.message.data.values[1][0];
const frameState: SearchStreamingState = evt.message.data.values[2][0];
const error = evt.message.data.values[3][0];
switch (frameState) {
case SearchStreamingState.Done:
state = LoadingState.Done;
break;
case SearchStreamingState.Streaming:
state = LoadingState.Streaming;
break;
case SearchStreamingState.Error:
throw new Error(error);
}
frames = [
metricsDataFrame(metrics, frameState),
...createTableFrameFromTraceQlQuery(traces, instanceSettings),
];
}
return {
data: frames || [],
state,
};
})
);
})
);
}
function metricsDataFrame(metrics: SearchMetrics, state: SearchStreamingState) {
const progressThresholds: ThresholdsConfig = {
steps: [
{
color: 'blue',
value: -Infinity,
},
{
color: 'green',
value: 75,
},
],
mode: ThresholdsMode.Absolute,
};
const frame: DataFrame = {
refId: 'streaming-progress',
name: 'Streaming Progress',
length: 1,
fields: [
{
name: 'state',
type: FieldType.string,
values: [capitalize(state.toString())],
config: {
displayNameFromDS: 'State',
},
},
{
name: 'totalBlocks',
type: FieldType.number,
values: [metrics.totalBlocks],
config: {
displayNameFromDS: 'Total Blocks',
},
},
{
name: 'completedJobs',
type: FieldType.number,
values: [metrics.completedJobs],
config: {
displayNameFromDS: 'Completed Jobs',
},
},
{
name: 'totalJobs',
type: FieldType.number,
values: [metrics.totalJobs],
config: {
displayNameFromDS: 'Total Jobs',
},
},
{
name: 'progress',
type: FieldType.number,
values: [
state === SearchStreamingState.Done ? 100 : ((metrics.completedJobs || 0) / (metrics.totalJobs || 1)) * 100,
],
config: {
displayNameFromDS: 'Total Jobs',
unit: 'percent',
min: 0,
max: 100,
custom: {
cellOptions: {
type: 'gauge',
mode: 'gradient',
},
},
thresholds: progressThresholds,
},
},
],
meta: {
preferredVisualisationType: 'table',
},
};
return frame;
}

View File

@@ -1,7 +1,8 @@
import React from 'react';
import { EditorField, EditorRow } from '@grafana/experimental';
import { AutoSizeInput } from '@grafana/ui';
import { config } from '@grafana/runtime';
import { AutoSizeInput, Switch } from '@grafana/ui';
import { QueryOptionGroup } from 'app/plugins/datasource/prometheus/querybuilder/shared/QueryOptionGroup';
import { DEFAULT_LIMIT } from '../datasource';
@@ -17,14 +18,27 @@ export const TempoQueryBuilderOptions = React.memo<Props>(({ onChange, query })
query.limit = DEFAULT_LIMIT;
}
if (!query.hasOwnProperty('streaming')) {
query.streaming = true;
}
const onLimitChange = (e: React.FormEvent<HTMLInputElement>) => {
onChange({ ...query, limit: parseInt(e.currentTarget.value, 10) });
};
const onStreamingChange = (e: React.FormEvent<HTMLInputElement>) => {
onChange({ ...query, streaming: e.currentTarget.checked });
};
const collapsedInfoList = [`Limit: ${query.limit || DEFAULT_LIMIT}`];
if (!config.featureToggles.disableTraceQLStreaming) {
collapsedInfoList.push(`Streaming: ${query.streaming ? 'Yes' : 'No'}`);
}
return (
<>
<EditorRow>
<QueryOptionGroup title="Options" collapsedInfo={[`Limit: ${query.limit || DEFAULT_LIMIT}`]}>
<QueryOptionGroup title="Options" collapsedInfo={collapsedInfoList}>
<EditorField label="Limit" tooltip="Maximum number of traces to return.">
<AutoSizeInput
className="width-4"
@@ -36,6 +50,11 @@ export const TempoQueryBuilderOptions = React.memo<Props>(({ onChange, query })
value={query.limit}
/>
</EditorField>
{!config.featureToggles.disableTraceQLStreaming && (
<EditorField label="Stream response" tooltip="Stream the query response to receive partial results sooner">
<Switch value={query.streaming || false} onChange={onStreamingChange} />
</EditorField>
)}
</QueryOptionGroup>
</EditorRow>
</>

View File

@@ -61,11 +61,10 @@ export type TraceSearchMetadata = {
export type SearchMetrics = {
inspectedTraces?: number;
inspectedBytes?: number;
inspectedBlocks?: number;
skippedBlocks?: number;
skippedTraces?: number;
totalBlocks?: number;
completedJobs?: number;
totalJobs?: number;
totalBlockBytes?: number;
spanSets?: Spanset[];
};
export enum SpanKind {
@@ -89,7 +88,18 @@ export type Span = {
endTimeUnixNano?: string;
attributes?: Array<{
key: string;
value: { stringValue?: string; intValue?: string; boolValue?: boolean; doubleValue?: string };
value: {
stringValue?: string;
intValue?: string;
boolValue?: boolean;
doubleValue?: string;
Value?: {
string_value?: string;
int_value?: string;
bool_value?: boolean;
double_value?: string;
};
};
}>;
dropped_attributes_count?: number;
};