Tempo: TraceQL metrics streaming (#99037)

* TraceQL metrics streaming POC

* Reduce duplicate frames by using scan() and combineResponses()

* Trying to remove samples outside of time range

* Remove code to clean out of range

* Metrics streaming config toggle

* Sync opening the search and metrics options

* Fix tests

* Fix issues after conflicts

* Fix tests

* Use absolute value when computing minXDelta

* Revert last commit

* Fix frame sorting

* Remove all duplicates

* Use fields from schema to get the frames

* Use FieldCache

* Address PR comments
This commit is contained in:
Andre Pereira
2025-02-11 11:11:01 +00:00
committed by GitHub
parent cbe5741096
commit d48802cdfb
16 changed files with 453 additions and 77 deletions

View File

@@ -65,6 +65,7 @@ describe('TraceQLSearch', () => {
},
} as TempoDatasource;
datasource.isStreamingSearchEnabled = () => false;
datasource.isStreamingMetricsEnabled = () => false;
const lp = new TempoLanguageProvider(datasource);
lp.getIntrinsics = () => ['duration'];
lp.generateQueryFromFilters = () => '{}';
@@ -221,6 +222,8 @@ describe('TraceQLSearch', () => {
},
} as TempoDatasource;
datasource.isStreamingSearchEnabled = () => false;
datasource.isStreamingMetricsEnabled = () => false;
const lp = new TempoLanguageProvider(datasource);
lp.getIntrinsics = () => ['duration'];
lp.generateQueryFromFilters = () => '{}';

View File

@@ -274,7 +274,8 @@ const TraceQLSearch = ({ datasource, query, onChange, onClearResults, app, addVa
<TempoQueryBuilderOptions
onChange={onChange}
query={query}
isStreaming={datasource.isStreamingSearchEnabled() ?? false}
searchStreaming={datasource.isStreamingSearchEnabled() ?? false}
metricsStreaming={datasource.isStreamingMetricsEnabled() ?? false}
/>
</div>
{error ? (

View File

@@ -13,9 +13,11 @@ export interface Props {
collapsedInfo: string[];
queryStats?: QueryStats | null;
children: React.ReactNode;
onToggle?: (isOpen: boolean) => void;
isOpen?: boolean;
}
export function QueryOptionGroup({ title, children, collapsedInfo, queryStats }: Props) {
export function QueryOptionGroup({ title, children, collapsedInfo, queryStats, onToggle, isOpen: propsIsOpen }: Props) {
const [isOpen, toggleOpen] = useToggle(false);
const styles = useStyles2(getStyles);
@@ -24,8 +26,8 @@ export function QueryOptionGroup({ title, children, collapsedInfo, queryStats }:
<Collapse
className={styles.collapse}
collapsible
isOpen={isOpen}
onToggle={toggleOpen}
isOpen={propsIsOpen ?? isOpen}
onToggle={onToggle ?? toggleOpen}
label={
<Stack gap={0}>
<h6 className={styles.title}>{title}</h6>

View File

@@ -15,6 +15,7 @@ import { FeatureName, featuresToTempoVersion } from '../datasource';
interface StreamingOptions extends DataSourceJsonData {
streamingEnabled?: {
search?: boolean;
metrics?: boolean;
};
}
interface Props extends DataSourcePluginOptionsEditorProps<StreamingOptions> {}
@@ -27,8 +28,7 @@ export const StreamingSection = ({ options, onOptionsChange }: Props) => {
isCollapsible={false}
description={
<Stack gap={0.5}>
<div>{`Enable streaming for different Tempo features.
Currently supported only for search queries and from Tempo version ${featuresToTempoVersion[FeatureName.streaming]} onwards.`}</div>
<div>Enable streaming for different Tempo features.</div>
<a
href={'https://grafana.com/docs/tempo/latest/traceql/#stream-query-results'}
target={'_blank'}
@@ -46,8 +46,8 @@ export const StreamingSection = ({ options, onOptionsChange }: Props) => {
</Alert>
<InlineFieldRow>
<InlineField
tooltip={`Enable streaming for search queries. Minimum required version for Tempo: ${featuresToTempoVersion[FeatureName.streaming]}.`}
label="Queries"
tooltip={`Enable streaming for search queries. Minimum required version for Tempo: ${featuresToTempoVersion[FeatureName.searchStreaming]}.`}
label="Search queries"
labelWidth={26}
>
<InlineSwitch
@@ -64,6 +64,26 @@ export const StreamingSection = ({ options, onOptionsChange }: Props) => {
/>
</InlineField>
</InlineFieldRow>
<InlineFieldRow>
<InlineField
tooltip={`Enable streaming for metrics queries. Minimum required version for Tempo: ${featuresToTempoVersion[FeatureName.metricsStreaming]}.`}
label="Metrics queries"
labelWidth={26}
>
<InlineSwitch
id={'streamingEnabled.metrics'}
// TECHDEBT: We should check whether the feature is supported by the Tempo version,
// but here we don't have easily access to such information
value={options.jsonData.streamingEnabled?.metrics || false}
onChange={(event: React.SyntheticEvent<HTMLInputElement>) => {
updateDatasourcePluginJsonDataOption({ onOptionsChange, options }, 'streamingEnabled', {
...options.jsonData.streamingEnabled,
metrics: event.currentTarget.checked,
});
}}
/>
</InlineField>
</InlineFieldRow>
</ConfigSection>
);
};

View File

@@ -58,7 +58,7 @@ import {
transformFromOTLP as transformFromOTEL,
transformTrace,
} from './resultTransformer';
import { doTempoChannelStream } from './streaming';
import { doTempoMetricsStreaming, doTempoSearchStreaming } from './streaming';
import { TempoJsonData, TempoQuery } from './types';
import { getErrorMessage, migrateFromSearchToTraceQLSearch } from './utils';
import { TempoVariableSupport } from './variables';
@@ -67,7 +67,8 @@ export const DEFAULT_LIMIT = 20;
export const DEFAULT_SPSS = 3; // spans per span set
export enum FeatureName {
streaming = 'streaming',
searchStreaming = 'searchStreaming',
metricsStreaming = 'metricsStreaming',
}
/* Map, for each feature (e.g., streaming), the minimum Tempo version required to have that
@@ -75,7 +76,8 @@ export enum FeatureName {
** target version, the feature is disabled in Grafana (frontend).
*/
export const featuresToTempoVersion = {
[FeatureName.streaming]: '2.2.0',
[FeatureName.searchStreaming]: '2.2.0',
[FeatureName.metricsStreaming]: '2.7.0',
};
// The version that we use as default in case we cannot retrieve it from the backend.
@@ -115,6 +117,7 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
streamingEnabled?: {
search?: boolean;
metrics?: boolean;
};
// The version of Tempo running on the backend. `null` if we cannot retrieve it for whatever reason
@@ -291,6 +294,18 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
isStreamingSearchEnabled() {
return this.streamingEnabled?.search && config.liveEnabled;
}
/**
* Check if streaming for metrics queries is enabled (and available).
*
* We need to check:
* - the Tempo data source plugin toggle, to disable streaming if the user disabled it in the data source configuration
* - if Grafana Live is enabled
*
* @return true if streaming for metrics queries is enabled, false otherwise
*/
isStreamingMetricsEnabled() {
return this.streamingEnabled?.metrics && config.liveEnabled;
}
isTraceQlMetricsQuery(query: string): boolean {
// Check whether this is a metrics query by checking if it contains a metrics function
@@ -355,8 +370,13 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
app: options.app ?? '',
grafana_version: config.buildInfo.version,
query: queryValue ?? '',
streaming: this.isStreamingMetricsEnabled(),
});
subQueries.push(this.handleTraceQlMetricsQuery(options, targets.traceql));
if (this.isStreamingMetricsEnabled()) {
subQueries.push(this.handleMetricsStreamingQuery(options, targets.traceql, queryValue));
} else {
subQueries.push(this.handleTraceQlMetricsQuery(options, targets.traceql));
}
} else {
reportInteraction('grafana_traces_traceql_queried', {
datasourceType: 'tempo',
@@ -689,7 +709,7 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
return merge(
...targets.map((target) =>
doTempoChannelStream(
doTempoSearchStreaming(
{ ...target, query },
this, // the datasource
options,
@@ -699,6 +719,28 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
);
}
// This function can probably be simplified by avoiding passing both `targets` and `query`,
// since `query` is built from `targets`, if you look at how this function is currently called
handleMetricsStreamingQuery(
options: DataQueryRequest<TempoQuery>,
targets: TempoQuery[],
query: string
): Observable<DataQueryResponse> {
if (query === '') {
return EMPTY;
}
return merge(
...targets.map((target) =>
doTempoMetricsStreaming(
{ ...target, query },
this, // the datasource
options
)
)
);
}
makeTraceIdRequest(options: DataQueryRequest<TempoQuery>, targets: TempoQuery[]): DataQueryRequest<TempoQuery> {
const request = {
...options,

View File

@@ -1,31 +1,36 @@
import { capitalize } from 'lodash';
import { map, Observable, takeWhile } from 'rxjs';
import { map, Observable, scan, takeWhile } from 'rxjs';
import { v4 as uuidv4 } from 'uuid';
import {
DataFrame,
dataFrameFromJSON,
DataQueryRequest,
DataQueryResponse,
DataSourceInstanceSettings,
FieldCache,
FieldType,
LiveChannelScope,
LoadingState,
MutableDataFrame,
sortDataFrame,
ThresholdsConfig,
ThresholdsMode,
} from '@grafana/data';
import { cloneQueryResponse, combineResponses } from '@grafana/o11y-ds-frontend';
import { getGrafanaLiveSrv } from '@grafana/runtime';
import { SearchStreamingState } from './dataquery.gen';
import { DEFAULT_SPSS, TempoDatasource } from './datasource';
import { formatTraceQLResponse } from './resultTransformer';
import { SearchMetrics, TempoJsonData, TempoQuery } from './types';
import { stepToNanos } from './utils';
function getLiveStreamKey(): string {
return uuidv4();
}
export function doTempoChannelStream(
export function doTempoSearchStreaming(
query: TempoQuery,
ds: TempoDatasource,
options: DataQueryRequest<TempoQuery>,
@@ -67,11 +72,14 @@ export function doTempoChannelStream(
if ('message' in evt && evt?.message) {
const currentTime = performance.now();
const elapsedTime = currentTime - requestTime;
// Schema should be [traces, metrics, state, error]
const traces = evt.message.data.values[0][0];
const metrics = evt.message.data.values[1][0];
const frameState: SearchStreamingState = evt.message.data.values[2][0];
const error = evt.message.data.values[3][0];
const messageFrame = dataFrameFromJSON(evt.message);
const fieldCache = new FieldCache(messageFrame);
const traces = fieldCache.getFieldByName('result')?.values[0];
const metrics = fieldCache.getFieldByName('metrics')?.values[0];
const frameState = fieldCache.getFieldByName('state')?.values[0];
const error = fieldCache.getFieldByName('error')?.values[0];
switch (frameState) {
case SearchStreamingState.Done:
@@ -100,6 +108,127 @@ export function doTempoChannelStream(
);
}
export function doTempoMetricsStreaming(
query: TempoQuery,
ds: TempoDatasource,
options: DataQueryRequest<TempoQuery>
): Observable<DataQueryResponse> {
const range = options.range;
const key = getLiveStreamKey();
let state: LoadingState = LoadingState.NotStarted;
const step = stepToNanos(query.step);
return getGrafanaLiveSrv()
.getStream<MutableDataFrame>({
scope: LiveChannelScope.DataSource,
namespace: ds.uid,
path: `metrics/${key}`,
data: {
...query,
step,
timeRange: {
from: range.from.toISOString(),
to: range.to.toISOString(),
},
},
})
.pipe(
takeWhile((evt) => {
if ('message' in evt && evt?.message) {
const frameState: SearchStreamingState = evt.message.data.values[2][0];
if (frameState === SearchStreamingState.Done || frameState === SearchStreamingState.Error) {
return false;
}
}
return true;
}, true),
map((evt) => {
let newResult: DataQueryResponse = { data: [], state: LoadingState.NotStarted };
if ('message' in evt && evt?.message) {
const messageFrame = dataFrameFromJSON(evt.message);
const fieldCache = new FieldCache(messageFrame);
const data = fieldCache.getFieldByName('result')?.values[0];
const frameState = fieldCache.getFieldByName('state')?.values[0];
const error = fieldCache.getFieldByName('error')?.values[0];
switch (frameState) {
case SearchStreamingState.Done:
state = LoadingState.Done;
break;
case SearchStreamingState.Streaming:
state = LoadingState.Streaming;
break;
case SearchStreamingState.Error:
throw new Error(error);
}
newResult = {
data: data?.map(dataFrameFromJSON) ?? [],
state,
};
}
return newResult;
}),
// Merge results on acc
scan((acc, curr) => {
if (!curr) {
return acc;
}
if (!acc) {
return cloneQueryResponse(curr);
}
return mergeFrames(acc, curr);
})
);
}
function mergeFrames(acc: DataQueryResponse, newResult: DataQueryResponse): DataQueryResponse {
const result = combineResponses(cloneQueryResponse(acc), newResult);
// Remove duplicate time field values for all frames
result.data = result.data.map((frame: DataFrame) => {
let newFrame = frame;
const timeFieldIndex = frame.fields.findIndex((f) => f.type === FieldType.time);
if (timeFieldIndex >= 0) {
removeDuplicateTimeFieldValues(frame, timeFieldIndex);
newFrame = sortDataFrame(frame, timeFieldIndex);
}
return newFrame;
});
result.state = newResult.state;
return result;
}
/**
* Remove duplicate time field values from the DataFrame. This is necessary because Tempo sends partial results to Grafana
* that we append to an existing DataFrame. This can result in duplicate values for the same timestamp so this function removes
* older values and keeps the latest value.
* @param accFrame
* @param timeFieldIndex
*/
function removeDuplicateTimeFieldValues(accFrame: DataFrame, timeFieldIndex: number) {
const duplicatesMap = accFrame.fields[timeFieldIndex].values.reduce((acc: Record<number, number[]>, value, index) => {
if (acc[value]) {
acc[value].push(index);
} else {
acc[value] = [index];
}
return acc;
}, {});
const indexesToRemove = Object.values(duplicatesMap)
.filter((indexes) => indexes.length > 1)
.map((indexes) => indexes.slice(1))
.flat();
accFrame.fields.forEach((field) => {
field.values = field.values.filter((_, index) => !indexesToRemove.includes(index));
});
}
function metricsDataFrame(metrics: SearchMetrics, state: SearchStreamingState, elapsedTime: number) {
const progressThresholds: ThresholdsConfig = {
steps: [

View File

@@ -71,7 +71,8 @@ export function QueryEditor(props: Props) {
<TempoQueryBuilderOptions
query={query}
onChange={props.onChange}
isStreaming={props.datasource.isStreamingSearchEnabled() ?? false}
searchStreaming={props.datasource.isStreamingSearchEnabled() ?? false}
metricsStreaming={props.datasource.isStreamingMetricsEnabled() ?? false}
/>
</div>
</>

View File

@@ -1,5 +1,6 @@
import { css } from '@emotion/css';
import * as React from 'react';
import { useToggle } from 'react-use';
import { GrafanaTheme2 } from '@grafana/data';
import { EditorField, EditorRow } from '@grafana/plugin-ui';
@@ -13,7 +14,8 @@ import { TempoQuery } from '../types';
interface Props {
onChange: (value: TempoQuery) => void;
query: Partial<TempoQuery> & TempoQuery;
isStreaming: boolean;
searchStreaming: boolean;
metricsStreaming: boolean;
}
/**
@@ -29,8 +31,9 @@ const parseIntWithFallback = (val: string, fallback: number) => {
return isNaN(parsed) ? fallback : parsed;
};
export const TempoQueryBuilderOptions = React.memo<Props>(({ onChange, query, isStreaming }) => {
export const TempoQueryBuilderOptions = React.memo<Props>(({ onChange, query, searchStreaming, metricsStreaming }) => {
const styles = useStyles2(getStyles);
const [isOpen, toggleOpen] = useToggle(false);
if (!query.hasOwnProperty('limit')) {
query.limit = DEFAULT_LIMIT;
@@ -76,19 +79,26 @@ export const TempoQueryBuilderOptions = React.memo<Props>(({ onChange, query, is
`Spans Limit: ${query.spss || DEFAULT_SPSS}`,
`Table Format: ${query.tableType === SearchTableType.Traces ? 'Traces' : 'Spans'}`,
'|',
`Streaming: ${isStreaming ? 'Enabled' : 'Disabled'}`,
`Streaming: ${searchStreaming ? 'Enabled' : 'Disabled'}`,
];
const collapsedMetricsOptions = [
`Step: ${query.step || 'auto'}`,
`Type: ${query.metricsQueryType === MetricsQueryType.Range ? 'Range' : 'Instant'}`,
'|',
`Streaming: ${metricsStreaming ? 'Enabled' : 'Disabled'}`,
// `Exemplars: ${query.exemplars !== undefined ? query.exemplars : 'auto'}`,
];
return (
<EditorRow>
<div className={styles.options}>
<QueryOptionGroup title="Search Options" collapsedInfo={collapsedSearchOptions}>
<QueryOptionGroup
title="Search Options"
collapsedInfo={collapsedSearchOptions}
isOpen={isOpen}
onToggle={toggleOpen}
>
<EditorField label="Limit" tooltip="Maximum number of traces to return.">
<AutoSizeInput
className="width-4"
@@ -122,11 +132,16 @@ export const TempoQueryBuilderOptions = React.memo<Props>(({ onChange, query, is
/>
</EditorField>
<EditorField label="Streaming" tooltip={<StreamingTooltip />} tooltipInteractive>
<div>{isStreaming ? 'Enabled' : 'Disabled'}</div>
<div>{searchStreaming ? 'Enabled' : 'Disabled'}</div>
</EditorField>
</QueryOptionGroup>
<QueryOptionGroup title="Metrics Options" collapsedInfo={collapsedMetricsOptions}>
<QueryOptionGroup
title="Metrics Options"
collapsedInfo={collapsedMetricsOptions}
isOpen={isOpen}
onToggle={toggleOpen}
>
<EditorField
label="Step"
tooltip="Defines the step for metric queries. Use duration notation, for example 30s or 1m"
@@ -150,6 +165,10 @@ export const TempoQueryBuilderOptions = React.memo<Props>(({ onChange, query, is
onChange={onMetricsQueryTypeChange}
/>
</EditorField>
<EditorField label="Streaming" tooltip={<StreamingTooltip />} tooltipInteractive>
<div>{metricsStreaming ? 'Enabled' : 'Disabled'}</div>
</EditorField>
{/*<EditorField*/}
{/* label="Exemplars"*/}
{/* tooltip="Defines the amount of exemplars to request for metric queries. A value of 0 means no exemplars."*/}

View File

@@ -1,4 +1,4 @@
import { DataSourceApi } from '@grafana/data';
import { DataSourceApi, parseDuration } from '@grafana/data';
import { getDataSourceSrv } from '@grafana/runtime';
import { generateId } from './SearchTraceQLEditor/TagsInput';
@@ -99,3 +99,34 @@ export const migrateFromSearchToTraceQLSearch = (query: TempoQuery) => {
};
return migratedQuery;
};
export const stepToNanos = (step?: string) => {
if (!step) {
return 0;
}
const match = step.match(/(\d+)(.+)/);
const rawLength = match?.[1];
const unit = match?.[2];
if (rawLength) {
if (unit === 'ns') {
return parseInt(rawLength, 10);
}
if (unit === 'µs') {
return parseInt(rawLength, 10) * 1000;
}
if (unit === 'ms') {
return parseInt(rawLength, 10) * 1000000;
}
const duration = parseDuration(step);
return (
(duration.seconds || 0) * 1000000000 +
(duration.minutes || 0) * 60000000000 +
(duration.hours || 0) * 3600000000000
);
}
return 0;
};