Tempo: Support multiple filter expressions for service graph queries (#81037)

* support "OR" for service graph queries

* make betterer happy

* continue appeasing betterer

* betterer results
This commit is contained in:
Domas 2024-01-26 16:37:49 +02:00 committed by GitHub
parent 8c212a1952
commit e9a99a46b0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 233 additions and 75 deletions

View File

@ -5716,12 +5716,7 @@ exports[`better eslint`] = {
[0, 0, 0, "Unexpected any. Specify a different type.", "6"],
[0, 0, 0, "Unexpected any. Specify a different type.", "7"],
[0, 0, 0, "Unexpected any. Specify a different type.", "8"],
[0, 0, 0, "Unexpected any. Specify a different type.", "9"],
[0, 0, 0, "Unexpected any. Specify a different type.", "10"],
[0, 0, 0, "Unexpected any. Specify a different type.", "11"],
[0, 0, 0, "Unexpected any. Specify a different type.", "12"],
[0, 0, 0, "Unexpected any. Specify a different type.", "13"],
[0, 0, 0, "Unexpected any. Specify a different type.", "14"]
[0, 0, 0, "Unexpected any. Specify a different type.", "9"]
],
"public/app/plugins/datasource/tempo/datasource.ts:5381": [
[0, 0, 0, "Do not use any type assertions.", "0"],

View File

@ -44,9 +44,9 @@ export interface TempoQuery extends common.DataQuery {
*/
serviceMapIncludeNamespace?: boolean;
/**
* Filters to be included in a PromQL query to select data for the service graph. Example: {client="app",service="app"}
* Filters to be included in a PromQL query to select data for the service graph. Example: {client="app",service="app"}. Providing multiple values will produce union of results for each filter, using PromQL OR operator internally.
*/
serviceMapQuery?: string;
serviceMapQuery?: (string | Array<string>);
/**
* @deprecated Query traces by service name
*/

View File

@ -126,8 +126,8 @@ type TempoQuery struct {
// Use service.namespace in addition to service.name to uniquely identify a service.
ServiceMapIncludeNamespace *bool `json:"serviceMapIncludeNamespace,omitempty"`
// Filters to be included in a PromQL query to select data for the service graph. Example: {client="app",service="app"}
ServiceMapQuery *string `json:"serviceMapQuery,omitempty"`
// Filters to be included in a PromQL query to select data for the service graph. Example: {client="app",service="app"}. Providing multiple values will produce union of results for each filter, using PromQL OR operator internally.
ServiceMapQuery *any `json:"serviceMapQuery,omitempty"`
// @deprecated Query traces by service name
ServiceName *string `json:"serviceName,omitempty"`

View File

@ -67,7 +67,9 @@ export function ServiceGraphSection({
);
}
const filters = queryToFilter(query.serviceMapQuery || '');
const filters = queryToFilter(
(Array.isArray(query.serviceMapQuery) ? query.serviceMapQuery[0] : query.serviceMapQuery) || ''
);
return (
<div>

View File

@ -38,8 +38,8 @@ composableKinds: DataQuery: {
minDuration?: string
// @deprecated Define the maximum duration to select traces. Use duration format, for example: 1.2s, 100ms
maxDuration?: string
// Filters to be included in a PromQL query to select data for the service graph. Example: {client="app",service="app"}
serviceMapQuery?: string
// Filters to be included in a PromQL query to select data for the service graph. Example: {client="app",service="app"}. Providing multiple values will produce union of results for each filter, using PromQL OR operator internally.
serviceMapQuery?: string | [...string]
// Use service.namespace in addition to service.name to uniquely identify a service.
serviceMapIncludeNamespace?: bool
// Defines the maximum number of traces that are returned from Tempo

View File

@ -41,9 +41,9 @@ export interface TempoQuery extends common.DataQuery {
*/
serviceMapIncludeNamespace?: boolean;
/**
* Filters to be included in a PromQL query to select data for the service graph. Example: {client="app",service="app"}
* Filters to be included in a PromQL query to select data for the service graph. Example: {client="app",service="app"}. Providing multiple values will produce union of results for each filter, using PromQL OR operator internally.
*/
serviceMapQuery?: string;
serviceMapQuery?: (string | Array<string>);
/**
* @deprecated Query traces by service name
*/

View File

@ -11,6 +11,9 @@ import {
createDataFrame,
PluginType,
CoreApp,
DataSourceApi,
DataQueryRequest,
getTimeZone,
} from '@grafana/data';
import {
BackendDataSourceResponse,
@ -18,8 +21,10 @@ import {
setBackendSrv,
setDataSourceSrv,
TemplateSrv,
DataSourceSrv,
BackendSrv,
} from '@grafana/runtime';
import { BarGaugeDisplayMode, TableCellDisplayMode } from '@grafana/schema';
import { BarGaugeDisplayMode, DataQuery, TableCellDisplayMode } from '@grafana/schema';
import { TempoVariableQueryType } from './VariableQueryEditor';
import { createFetchResponse } from './_importedDependencies/test/helpers/createFetchResponse';
@ -70,7 +75,7 @@ describe('Tempo data source', () => {
});
describe('Variables should be interpolated correctly', () => {
function getQuery(): TempoQuery {
function getQuery(serviceMapQuery: string | string[] = '$interpolationVar'): TempoQuery {
return {
refId: 'x',
queryType: 'traceql',
@ -84,7 +89,7 @@ describe('Tempo data source', () => {
search: '$interpolationVar',
minDuration: '$interpolationVar',
maxDuration: '$interpolationVar',
serviceMapQuery: '$interpolationVar',
serviceMapQuery,
filters: [],
};
}
@ -137,6 +142,13 @@ describe('Tempo data source', () => {
expect(resp.minDuration).toBe(scopedText);
expect(resp.maxDuration).toBe(scopedText);
});
it('when serviceMapQuery is an array', async () => {
const ds = new TempoDatasource(defaultSettings, templateSrv);
const queries = ds.interpolateVariablesInQueries([getQuery(['$interpolationVar', '$interpolationVar'])], {});
expect(queries[0].serviceMapQuery?.[0]).toBe('scopedInterpolationText');
expect(queries[0].serviceMapQuery?.[1]).toBe('scopedInterpolationText');
});
});
it('parses json fields from backend', async () => {
@ -478,7 +490,7 @@ describe('Tempo service graph view', () => {
},
},
});
setDataSourceSrv(backendSrvWithPrometheus as any);
setDataSourceSrv(dataSourceSrvWithPrometheus(prometheusMock()));
const response = await lastValueFrom(
ds.query({ targets: [{ queryType: 'serviceMap' }], range: getDefaultTimeRange(), app: CoreApp.Explore } as any)
);
@ -562,10 +574,104 @@ describe('Tempo service graph view', () => {
expect(response.data[2].fields[0].values.length).toBe(2);
});
it('runs correct queries with single serviceMapQuery defined', async () => {
const ds = new TempoDatasource({
...defaultSettings,
jsonData: {
serviceMap: {
datasourceUid: 'prom',
},
},
});
const promMock = prometheusMock();
setDataSourceSrv(dataSourceSrvWithPrometheus(promMock));
const response = await lastValueFrom(
ds.query({
targets: [{ queryType: 'serviceMap', serviceMapQuery: '{ foo="bar" }', refId: 'foo', filters: [] }],
range: getDefaultTimeRange(),
app: CoreApp.Explore,
requestId: '1',
interval: '60s',
intervalMs: 60000,
scopedVars: {},
startTime: Date.now(),
timezone: getTimeZone(),
})
);
expect(response.data).toHaveLength(2);
expect(response.state).toBe(LoadingState.Done);
expect(response.data[0].name).toBe('Nodes');
expect(response.data[1].name).toBe('Edges');
expect(promMock.query).toHaveBeenCalledTimes(3);
const nthQuery = (n: number) =>
(promMock.query as jest.MockedFn<jest.MockableFunction>).mock.calls[n][0] as DataQueryRequest<PromQuery>;
expect(nthQuery(0).targets[0].expr).toBe(
'sum by (client, server) (rate(traces_service_graph_request_server_seconds_sum{ foo="bar" }[$__range]))'
);
expect(nthQuery(0).targets[1].expr).toBe(
'sum by (client, server) (rate(traces_service_graph_request_total{ foo="bar" }[$__range]))'
);
expect(nthQuery(0).targets[2].expr).toBe(
'sum by (client, server) (rate(traces_service_graph_request_failed_total{ foo="bar" }[$__range]))'
);
expect(nthQuery(0).targets[3].expr).toBe(
'sum by (client, server) (rate(traces_service_graph_request_server_seconds_bucket{ foo="bar" }[$__range]))'
);
});
it('runs correct queries with multiple serviceMapQuery defined', async () => {
const ds = new TempoDatasource({
...defaultSettings,
jsonData: {
serviceMap: {
datasourceUid: 'prom',
},
},
});
const promMock = prometheusMock();
setDataSourceSrv(dataSourceSrvWithPrometheus(promMock));
const response = await lastValueFrom(
ds.query({
targets: [
{ queryType: 'serviceMap', serviceMapQuery: ['{ foo="bar" }', '{baz="bad"}'], refId: 'foo', filters: [] },
],
requestId: '1',
interval: '60s',
intervalMs: 60000,
scopedVars: {},
startTime: Date.now(),
timezone: getTimeZone(),
range: getDefaultTimeRange(),
app: CoreApp.Explore,
})
);
expect(response.data).toHaveLength(2);
expect(response.state).toBe(LoadingState.Done);
expect(response.data[0].name).toBe('Nodes');
expect(response.data[1].name).toBe('Edges');
expect(promMock.query).toHaveBeenCalledTimes(3);
const nthQuery = (n: number) =>
(promMock.query as jest.MockedFn<jest.MockableFunction>).mock.calls[n][0] as DataQueryRequest<PromQuery>;
expect(nthQuery(0).targets[0].expr).toBe(
'sum by (client, server) (rate(traces_service_graph_request_server_seconds_sum{ foo="bar" }[$__range])) OR sum by (client, server) (rate(traces_service_graph_request_server_seconds_sum{baz="bad"}[$__range]))'
);
expect(nthQuery(0).targets[1].expr).toBe(
'sum by (client, server) (rate(traces_service_graph_request_total{ foo="bar" }[$__range])) OR sum by (client, server) (rate(traces_service_graph_request_total{baz="bad"}[$__range]))'
);
expect(nthQuery(0).targets[2].expr).toBe(
'sum by (client, server) (rate(traces_service_graph_request_failed_total{ foo="bar" }[$__range])) OR sum by (client, server) (rate(traces_service_graph_request_failed_total{baz="bad"}[$__range]))'
);
expect(nthQuery(0).targets[3].expr).toBe(
'sum by (client, server) (rate(traces_service_graph_request_server_seconds_bucket{ foo="bar" }[$__range])) OR sum by (client, server) (rate(traces_service_graph_request_server_seconds_bucket{baz="bad"}[$__range]))'
);
});
it('should build expr correctly', () => {
let targets = { targets: [{ queryType: 'serviceMap' }] } as any;
let targets = { targets: [{ queryType: 'serviceMap' }] } as DataQueryRequest<TempoQuery>;
let builtQuery = buildExpr(
{ expr: 'topk(5, sum(rate(traces_spanmetrics_calls_total{}[$__range])) by (span_name))', params: [] },
{ expr: 'sum(rate(traces_spanmetrics_calls_total{}[$__range])) by (span_name)', params: [], topk: 5 },
'',
targets
);
@ -573,8 +679,9 @@ describe('Tempo service graph view', () => {
builtQuery = buildExpr(
{
expr: 'topk(5, sum(rate(traces_spanmetrics_calls_total{}[$__range])) by (span_name))',
expr: 'sum(rate(traces_spanmetrics_calls_total{}[$__range])) by (span_name)',
params: ['status_code="STATUS_CODE_ERROR"'],
topk: 5,
},
'span_name=~"HTTP Client|HTTP GET|HTTP GET - root|HTTP POST|HTTP POST - post"',
targets
@ -595,7 +702,21 @@ describe('Tempo service graph view', () => {
'histogram_quantile(.9, sum(rate(traces_spanmetrics_latency_bucket{status_code="STATUS_CODE_ERROR",span_name=~"HTTP Client"}[$__range])) by (le))'
);
targets = { targets: [{ queryType: 'serviceMap', serviceMapQuery: '{client="app",service="app"}' }] } as any;
targets = {
targets: [{ queryType: 'serviceMap', serviceMapQuery: '{client="app",service="app"}' }],
} as DataQueryRequest<TempoQuery>;
builtQuery = buildExpr(
{ expr: 'sum(rate(traces_spanmetrics_calls_total{}[$__range])) by (span_name)', params: [], topk: 5 },
'',
targets
);
expect(builtQuery).toBe(
'topk(5, sum(rate(traces_spanmetrics_calls_total{service="app",service="app"}[$__range])) by (span_name))'
);
targets = {
targets: [{ queryType: 'serviceMap', serviceMapQuery: '{client="app",service="app"}' }],
} as DataQueryRequest<TempoQuery>;
builtQuery = buildExpr(
{ expr: 'topk(5, sum(rate(traces_spanmetrics_calls_total{}[$__range])) by (span_name))', params: [] },
'',
@ -605,9 +726,23 @@ describe('Tempo service graph view', () => {
'topk(5, sum(rate(traces_spanmetrics_calls_total{service="app",service="app"}[$__range])) by (span_name))'
);
targets = { targets: [{ queryType: 'serviceMap', serviceMapQuery: '{client="${app}",service="$app"}' }] } as any;
targets = {
targets: [{ queryType: 'serviceMap', serviceMapQuery: ['{foo="app"}', '{bar="app"}'] }],
} as DataQueryRequest<TempoQuery>;
builtQuery = buildExpr(
{ expr: 'topk(5, sum(rate(traces_spanmetrics_calls_total{}[$__range])) by (span_name))', params: [] },
{ expr: 'sum(rate(traces_spanmetrics_calls_total{}[$__range])) by (span_name)', params: [], topk: 5 },
'',
targets
);
expect(builtQuery).toBe(
'topk(5, sum(rate(traces_spanmetrics_calls_total{foo="app"}[$__range])) by (span_name) OR sum(rate(traces_spanmetrics_calls_total{bar="app"}[$__range])) by (span_name))'
);
targets = {
targets: [{ queryType: 'serviceMap', serviceMapQuery: '{client="${app}",service="$app"}' }],
} as DataQueryRequest<TempoQuery>;
builtQuery = buildExpr(
{ expr: 'sum(rate(traces_spanmetrics_calls_total{}[$__range])) by (span_name)', params: [], topk: 5 },
'',
targets
);
@ -993,37 +1128,42 @@ describe('label values', () => {
});
});
const backendSrvWithPrometheus = {
async get(uid: string) {
if (uid === 'prom') {
return {
query() {
return of({
data: [
rateMetric,
errorRateMetric,
durationMetric,
emptyDurationMetric,
totalsPromMetric,
secondsPromMetric,
failedPromMetric,
],
});
},
};
}
throw new Error('unexpected uid');
},
getInstanceSettings(uid: string) {
if (uid === 'prom') {
return { name: 'Prometheus' };
} else if (uid === 'gdev-tempo') {
return { name: 'Tempo' };
}
return '';
},
const prometheusMock = (): DataSourceApi => {
return {
query: jest.fn(() =>
of({
data: [
rateMetric,
errorRateMetric,
durationMetric,
emptyDurationMetric,
totalsPromMetric,
secondsPromMetric,
failedPromMetric,
],
})
),
} as unknown as DataSourceApi;
};
const dataSourceSrvWithPrometheus = (promMock: DataSourceApi) =>
({
async get(uid: string) {
if (uid === 'prom') {
return promMock;
}
throw new Error('unexpected uid');
},
getInstanceSettings(uid: string) {
if (uid === 'prom') {
return { name: 'Prometheus' };
} else if (uid === 'gdev-tempo') {
return { name: 'Tempo' };
}
return '';
},
}) as unknown as DataSourceSrv;
function setupBackendSrv(frame: DataFrame) {
setBackendSrv({
fetch(): Observable<FetchResponse<BackendDataSourceResponse>> {
@ -1037,7 +1177,7 @@ function setupBackendSrv(frame: DataFrame) {
})
);
},
} as any);
} as unknown as BackendSrv);
}
export const defaultSettings: DataSourceInstanceSettings<TempoJsonData> = {
@ -1243,3 +1383,7 @@ const serviceGraphLinks = [
},
},
];
interface PromQuery extends DataQuery {
expr: string;
}

View File

@ -530,7 +530,9 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
search: this.templateSrv.replace(query.search ?? '', scopedVars),
minDuration: this.templateSrv.replace(query.minDuration ?? '', scopedVars),
maxDuration: this.templateSrv.replace(query.maxDuration ?? '', scopedVars),
serviceMapQuery: this.templateSrv.replace(query.serviceMapQuery ?? '', scopedVars),
serviceMapQuery: Array.isArray(query.serviceMapQuery)
? query.serviceMapQuery.map((query) => this.templateSrv.replace(query, scopedVars))
: this.templateSrv.replace(query.serviceMapQuery ?? '', scopedVars),
};
}
@ -1084,12 +1086,16 @@ function makePromServiceMapRequest(options: DataQueryRequest<TempoQuery>): DataQ
targets: serviceMapMetrics.map((metric) => {
const { serviceMapQuery, serviceMapIncludeNamespace: serviceMapIncludeNamespace } = options.targets[0];
const extraSumByFields = serviceMapIncludeNamespace ? ', client_service_namespace, server_service_namespace' : '';
const queries = Array.isArray(serviceMapQuery) ? serviceMapQuery : [serviceMapQuery];
const subExprs = queries.map(
(query) => `sum by (client, server${extraSumByFields}) (rate(${metric}${query || ''}[$__range]))`
);
return {
format: 'table',
refId: metric,
// options.targets[0] is not correct here, but not sure what should happen if you have multiple queries for
// service map at the same time anyway
expr: `sum by (client, server${extraSumByFields}) (rate(${metric}${serviceMapQuery || ''}[$__range]))`,
expr: subExprs.join(' OR '),
instant: true,
};
}),
@ -1254,24 +1260,33 @@ function getServiceGraphView(
}
export function buildExpr(
metric: { expr: string; params: string[] },
metric: { expr: string; params: string[]; topk?: number },
extraParams: string,
request: DataQueryRequest<TempoQuery>
) {
): string {
let serviceMapQuery = request.targets[0]?.serviceMapQuery ?? '';
const serviceMapQueryMatch = serviceMapQuery.match(/^{(.*)}$/);
if (serviceMapQueryMatch?.length) {
serviceMapQuery = serviceMapQueryMatch[1];
const serviceMapQueries = Array.isArray(serviceMapQuery) ? serviceMapQuery : [serviceMapQuery];
const metricParamsArray = serviceMapQueries.map((query) => {
// remove surrounding curly braces from serviceMapQuery
const serviceMapQueryMatch = query.match(/^{(.*)}$/);
if (serviceMapQueryMatch?.length) {
query = serviceMapQueryMatch[1];
}
// map serviceGraph metric tags to serviceGraphView metric tags
query = query.replace('client', 'service').replace('server', 'service');
return query.includes('span_name')
? metric.params.concat(query)
: metric.params
.concat(query)
.concat(extraParams)
.filter((item: string) => item);
});
const exprs = metricParamsArray.map((params) => metric.expr.replace('{}', '{' + params.join(',') + '}'));
const expr = exprs.join(' OR ');
if (metric.topk) {
return `topk(${metric.topk}, ${expr})`;
}
// map serviceGraph metric tags to serviceGraphView metric tags
serviceMapQuery = serviceMapQuery.replace('client', 'service').replace('server', 'service');
const metricParams = serviceMapQuery.includes('span_name')
? metric.params.concat(serviceMapQuery)
: metric.params
.concat(serviceMapQuery)
.concat(extraParams)
.filter((item: string) => item);
return metric.expr.replace('{}', '{' + metricParams.join(',') + '}');
return expr;
}
export function buildLinkExpr(expr: string) {

View File

@ -138,11 +138,13 @@ export const failedMetric = 'traces_service_graph_request_failed_total';
export const histogramMetric = 'traces_service_graph_request_server_seconds_bucket';
export const rateMetric = {
expr: 'topk(5, sum(rate(traces_spanmetrics_calls_total{}[$__range])) by (span_name))',
expr: 'sum(rate(traces_spanmetrics_calls_total{}[$__range])) by (span_name)',
topk: 5,
params: [],
};
export const errorRateMetric = {
expr: 'topk(5, sum(rate(traces_spanmetrics_calls_total{}[$__range])) by (span_name))',
expr: 'sum(rate(traces_spanmetrics_calls_total{}[$__range])) by (span_name)',
topk: 5,
params: ['status_code="STATUS_CODE_ERROR"'],
};
export const durationMetric = {

View File

@ -86,6 +86,6 @@ export const onDashboardLoadedHandler = ({
}
};
const hasTemplateVariables = (val?: string): boolean => {
return getTemplateSrv().containsTemplate(val);
const hasTemplateVariables = (val?: string | string[]): boolean => {
return (Array.isArray(val) ? val : [val]).some((v) => getTemplateSrv().containsTemplate(v));
};