Tempo: Service map (#37661)

* Add prometheus queries

* Add stats

* Refactor transform

* Fix stat format

* Refactor transform

* Hide behind feature flag

* Better linking error messages

* Add test

* Add test for datasource

* Fix lint

* Make optionality checking more explicit
This commit is contained in:
Andrej Ocenas
2021-08-17 15:48:29 +02:00
committed by GitHub
parent 6aa2a0dc8a
commit 11c848f00d
9 changed files with 560 additions and 62 deletions

View File

@@ -2,6 +2,8 @@ import { DataSourcePluginOptionsEditorProps } from '@grafana/data';
import { DataSourceHttpSettings } from '@grafana/ui';
import { TraceToLogsSettings } from 'app/core/components/TraceToLogsSettings';
import React from 'react';
import { ServiceMapSettings } from './ServiceMapSettings';
import { config } from '@grafana/runtime';
export type Props = DataSourcePluginOptionsEditorProps;
@@ -15,7 +17,12 @@ export const ConfigEditor: React.FC<Props> = ({ options, onOptionsChange }) => {
onChange={onOptionsChange}
/>
<TraceToLogsSettings options={options} onOptionsChange={onOptionsChange} />
<div className="gf-form-group">
<TraceToLogsSettings options={options} onOptionsChange={onOptionsChange} />
</div>
{config.featureToggles.tempoServiceGraph && (
<ServiceMapSettings options={options} onOptionsChange={onOptionsChange} />
)}
</>
);
};

View File

@@ -1,7 +1,7 @@
import { css } from '@emotion/css';
import { DataQuery, DataSourceApi, ExploreQueryFieldProps } from '@grafana/data';
import { DataSourceApi, ExploreQueryFieldProps, SelectableValue } from '@grafana/data';
import { selectors } from '@grafana/e2e-selectors';
import { getDataSourceSrv } from '@grafana/runtime';
import { config, getDataSourceSrv } from '@grafana/runtime';
import {
FileDropzone,
InlineField,
@@ -16,16 +16,27 @@ import { TraceToLogsOptions } from 'app/core/components/TraceToLogsSettings';
import React from 'react';
import { LokiQueryField } from '../loki/components/LokiQueryField';
import { TempoDatasource, TempoQuery, TempoQueryType } from './datasource';
import LokiDatasource from '../loki/datasource';
import { LokiQuery } from '../loki/types';
import { PrometheusDatasource } from '../prometheus/datasource';
import useAsync from 'react-use/lib/useAsync';
interface Props extends ExploreQueryFieldProps<TempoDatasource, TempoQuery>, Themeable2 {}
const DEFAULT_QUERY_TYPE: TempoQueryType = 'traceId';
interface State {
linkedDatasource?: DataSourceApi;
linkedDatasourceUid?: string;
linkedDatasource?: LokiDatasource;
serviceMapDatasourceUid?: string;
serviceMapDatasource?: PrometheusDatasource;
}
class TempoQueryFieldComponent extends React.PureComponent<Props, State> {
state = {
linkedDatasourceUid: undefined,
linkedDatasource: undefined,
serviceMapDatasourceUid: undefined,
serviceMapDatasource: undefined,
};
constructor(props: Props) {
@@ -37,16 +48,21 @@ class TempoQueryFieldComponent extends React.PureComponent<Props, State> {
// Find query field from linked datasource
const tracesToLogsOptions: TraceToLogsOptions = datasource.tracesToLogs || {};
const linkedDatasourceUid = tracesToLogsOptions.datasourceUid;
if (linkedDatasourceUid) {
const dsSrv = getDataSourceSrv();
const linkedDatasource = await dsSrv.get(linkedDatasourceUid);
this.setState({
linkedDatasource,
});
}
const serviceMapDsUid = datasource.serviceMap?.datasourceUid;
// Check status of linked data sources so we can show warnings if needed.
const [logsDs, serviceMapDs] = await Promise.all([getDS(linkedDatasourceUid), getDS(serviceMapDsUid)]);
this.setState({
linkedDatasourceUid: linkedDatasourceUid,
linkedDatasource: logsDs as LokiDatasource,
serviceMapDatasourceUid: serviceMapDsUid,
serviceMapDatasource: serviceMapDs as PrometheusDatasource,
});
}
onChangeLinkedQuery = (value: DataQuery) => {
onChangeLinkedQuery = (value: LokiQuery) => {
const { query, onChange } = this.props;
onChange({
...query,
@@ -59,19 +75,28 @@ class TempoQueryFieldComponent extends React.PureComponent<Props, State> {
};
render() {
const { query, onChange } = this.props;
const { linkedDatasource } = this.state;
const { query, onChange, datasource } = this.props;
// Find query field from linked datasource
const tracesToLogsOptions: TraceToLogsOptions = datasource.tracesToLogs || {};
const logsDatasourceUid = tracesToLogsOptions.datasourceUid;
const graphDatasourceUid = datasource.serviceMap?.datasourceUid;
const queryTypeOptions: Array<SelectableValue<TempoQueryType>> = [
{ value: 'search', label: 'Search' },
{ value: 'traceId', label: 'TraceID' },
{ value: 'upload', label: 'JSON file' },
];
if (config.featureToggles.tempoServiceGraph) {
queryTypeOptions.push({ value: 'serviceMap', label: 'Service Map' });
}
return (
<>
<InlineFieldRow>
<InlineField label="Query type">
<RadioButtonGroup<TempoQueryType>
options={[
{ value: 'search', label: 'Search' },
{ value: 'traceId', label: 'TraceID' },
{ value: 'upload', label: 'JSON file' },
]}
options={queryTypeOptions}
value={query.queryType || DEFAULT_QUERY_TYPE}
onChange={(v) =>
onChange({
@@ -83,23 +108,13 @@ class TempoQueryFieldComponent extends React.PureComponent<Props, State> {
/>
</InlineField>
</InlineFieldRow>
{query.queryType === 'search' && linkedDatasource && (
<>
<InlineLabel>
Tempo uses {((linkedDatasource as unknown) as DataSourceApi).name} to find traces.
</InlineLabel>
<LokiQueryField
datasource={linkedDatasource!}
onChange={this.onChangeLinkedQuery}
onRunQuery={this.onRunLinkedQuery}
query={this.props.query.linkedQuery ?? ({ refId: 'linked' } as any)}
history={[]}
/>
</>
)}
{query.queryType === 'search' && !linkedDatasource && (
<div className="text-warning">Please set up a Traces-to-logs datasource in the datasource settings.</div>
{query.queryType === 'search' && (
<SearchSection
linkedDatasourceUid={logsDatasourceUid}
query={query}
onRunQuery={this.onRunLinkedQuery}
onChange={this.onChangeLinkedQuery}
/>
)}
{query.queryType === 'upload' && (
<div className={css({ padding: this.props.theme.spacing(2) })}>
@@ -112,7 +127,7 @@ class TempoQueryFieldComponent extends React.PureComponent<Props, State> {
/>
</div>
)}
{(!query.queryType || query.queryType === 'traceId') && (
{query.queryType === 'traceId' && (
<LegacyForms.FormField
label="Trace ID"
labelWidth={4}
@@ -136,9 +151,94 @@ class TempoQueryFieldComponent extends React.PureComponent<Props, State> {
}
/>
)}
{query.queryType === 'serviceMap' && <ServiceMapSection graphDatasourceUid={graphDatasourceUid} />}
</>
);
}
}
function ServiceMapSection({ graphDatasourceUid }: { graphDatasourceUid?: string }) {
const dsState = useAsync(() => getDS(graphDatasourceUid), [graphDatasourceUid]);
if (dsState.loading) {
return null;
}
const ds = dsState.value as LokiDatasource;
if (!graphDatasourceUid) {
return <div className="text-warning">Please set up a service graph datasource in the datasource settings.</div>;
}
if (graphDatasourceUid && !ds) {
return (
<div className="text-warning">
Service graph datasource is configured but the data source no longer exists. Please configure existing data
source to use the service graph functionality.
</div>
);
}
return null;
}
interface SearchSectionProps {
linkedDatasourceUid?: string;
onChange: (value: LokiQuery) => void;
onRunQuery: () => void;
query: TempoQuery;
}
function SearchSection({ linkedDatasourceUid, onChange, onRunQuery, query }: SearchSectionProps) {
const dsState = useAsync(() => getDS(linkedDatasourceUid), [linkedDatasourceUid]);
if (dsState.loading) {
return null;
}
const ds = dsState.value as LokiDatasource;
if (ds) {
return (
<>
<InlineLabel>Tempo uses {ds.name} to find traces.</InlineLabel>
<LokiQueryField
datasource={ds}
onChange={onChange}
onRunQuery={onRunQuery}
query={query.linkedQuery ?? ({ refId: 'linked' } as any)}
history={[]}
/>
</>
);
}
if (!linkedDatasourceUid) {
return <div className="text-warning">Please set up a Traces-to-logs datasource in the datasource settings.</div>;
}
if (linkedDatasourceUid && !ds) {
return (
<div className="text-warning">
Traces-to-logs datasource is configured but the data source no longer exists. Please configure existing data
source to use the search.
</div>
);
}
return null;
}
async function getDS(uid?: string): Promise<DataSourceApi | undefined> {
if (!uid) {
return undefined;
}
const dsSrv = getDataSourceSrv();
try {
return await dsSrv.get(uid);
} catch (error) {
console.error('Failed to load data source', error);
return undefined;
}
}
export const TempoQueryField = withTheme2(TempoQueryFieldComponent);

View File

@@ -0,0 +1,64 @@
import { css } from '@emotion/css';
import { DataSourcePluginOptionsEditorProps, GrafanaTheme, updateDatasourcePluginJsonDataOption } from '@grafana/data';
import { DataSourcePicker } from '@grafana/runtime';
import { Button, InlineField, InlineFieldRow, useStyles } from '@grafana/ui';
import React from 'react';
import { TempoJsonData } from './datasource';
interface Props extends DataSourcePluginOptionsEditorProps<TempoJsonData> {}
export function ServiceMapSettings({ options, onOptionsChange }: Props) {
const styles = useStyles(getStyles);
return (
<div className={css({ width: '100%' })}>
<h3 className="page-heading">Service map</h3>
<div className={styles.infoText}>
To allow querying service map data you have to select a Prometheus instance where the data is stored.
</div>
<InlineFieldRow className={styles.row}>
<InlineField tooltip="The Prometheus data source with the service map data" label="Data source" labelWidth={26}>
<DataSourcePicker
pluginId="prometheus"
current={options.jsonData.serviceMap?.datasourceUid}
noDefault={true}
width={40}
onChange={(ds) =>
updateDatasourcePluginJsonDataOption({ onOptionsChange, options }, 'serviceMap', {
datasourceUid: ds.uid,
})
}
/>
</InlineField>
<Button
type={'button'}
variant={'secondary'}
size={'sm'}
fill={'text'}
onClick={() => {
updateDatasourcePluginJsonDataOption({ onOptionsChange, options }, 'serviceMap', {
datasourceUid: undefined,
});
}}
>
Clear
</Button>
</InlineFieldRow>
</div>
);
}
const getStyles = (theme: GrafanaTheme) => ({
infoText: css`
label: infoText;
padding-bottom: ${theme.spacing.md};
color: ${theme.colors.textSemiWeak};
`,
row: css`
label: row;
align-items: baseline;
`,
});

View File

@@ -3,13 +3,15 @@ import {
dataFrameToJSON,
DataSourceInstanceSettings,
FieldType,
getDefaultTimeRange,
LoadingState,
MutableDataFrame,
PluginType,
} from '@grafana/data';
import { BackendDataSourceResponse, FetchResponse, setBackendSrv } from '@grafana/runtime';
import { Observable, of } from 'rxjs';
import { createFetchResponse } from 'test/helpers/createFetchResponse';
import { TempoDatasource } from './datasource';
import { FetchResponse, setBackendSrv, BackendDataSourceResponse, setDataSourceSrv } from '@grafana/runtime';
import mockJson from './mockJsonResponse.json';
describe('Tempo data source', () => {
@@ -77,6 +79,30 @@ describe('Tempo data source', () => {
]);
});
it('runs service map queries', async () => {
const ds = new TempoDatasource({
...defaultSettings,
jsonData: {
serviceMap: {
datasourceUid: 'prom',
},
},
});
setDataSourceSrv(backendSrvWithPrometheus as any);
const response = await ds
.query({ targets: [{ queryType: 'serviceMap' }], range: getDefaultTimeRange() } as any)
.toPromise();
expect(response.data).toHaveLength(2);
expect(response.data[0].name).toBe('Nodes');
expect(response.data[0].fields[0].values.length).toBe(3);
expect(response.data[1].name).toBe('Edges');
expect(response.data[1].fields[0].values.length).toBe(2);
expect(response.state).toBe(LoadingState.Done);
});
it('should handle json file upload', async () => {
const ds = new TempoDatasource(defaultSettings);
ds.uploadedJson = JSON.stringify(mockJson);
@@ -93,6 +119,19 @@ describe('Tempo data source', () => {
});
});
const backendSrvWithPrometheus = {
async get(uid: string) {
if (uid === 'prom') {
return {
query() {
return of({ data: [totalsPromMetric] }, { data: [secondsPromMetric] });
},
};
}
throw new Error('unexpected uid');
},
};
function setupBackendSrv(frame: DataFrame) {
setBackendSrv({
fetch(): Observable<FetchResponse<BackendDataSourceResponse>> {
@@ -113,11 +152,11 @@ const defaultSettings: DataSourceInstanceSettings = {
id: 0,
uid: '0',
type: 'tracing',
name: 'jaeger',
name: 'tempo',
access: 'proxy',
meta: {
id: 'jaeger',
name: 'jaeger',
id: 'tempo',
name: 'tempo',
type: PluginType.datasource,
info: {} as any,
module: '',
@@ -125,3 +164,29 @@ const defaultSettings: DataSourceInstanceSettings = {
},
jsonData: {},
};
const totalsPromMetric = new MutableDataFrame({
refId: 'tempo_service_graph_request_total',
fields: [
{ name: 'Time', values: [1628169788000, 1628169788000] },
{ name: 'client', values: ['app', 'lb'] },
{ name: 'instance', values: ['127.0.0.1:12345', '127.0.0.1:12345'] },
{ name: 'job', values: ['local_scrape', 'local_scrape'] },
{ name: 'server', values: ['db', 'app'] },
{ name: 'tempo_config', values: ['default', 'default'] },
{ name: 'Value #tempo_service_graph_request_total', values: [10, 20] },
],
});
const secondsPromMetric = new MutableDataFrame({
refId: 'tempo_service_graph_request_server_seconds_sum',
fields: [
{ name: 'Time', values: [1628169788000, 1628169788000] },
{ name: 'client', values: ['app', 'lb'] },
{ name: 'instance', values: ['127.0.0.1:12345', '127.0.0.1:12345'] },
{ name: 'job', values: ['local_scrape', 'local_scrape'] },
{ name: 'server', values: ['db', 'app'] },
{ name: 'tempo_config', values: ['default', 'default'] },
{ name: 'Value #tempo_service_graph_request_server_seconds_sum', values: [10, 40] },
],
});

View File

@@ -1,54 +1,66 @@
import { groupBy } from 'lodash';
import {
DataQuery,
DataQueryRequest,
DataQueryResponse,
DataSourceApi,
DataSourceInstanceSettings,
DataSourceJsonData,
LoadingState,
} from '@grafana/data';
import { DataSourceWithBackend } from '@grafana/runtime';
import { TraceToLogsData, TraceToLogsOptions } from 'app/core/components/TraceToLogsSettings';
import { TraceToLogsOptions } from 'app/core/components/TraceToLogsSettings';
import { getDatasourceSrv } from 'app/features/plugins/datasource_srv';
import { from, merge, Observable, of, throwError } from 'rxjs';
import { map, mergeMap } from 'rxjs/operators';
import { LokiOptions } from '../loki/types';
import { transformFromOTLP as transformFromOTEL, transformTrace, transformTraceList } from './resultTransformer';
import { map, mergeMap, toArray } from 'rxjs/operators';
import { LokiOptions, LokiQuery } from '../loki/types';
import { transformTrace, transformTraceList, transformFromOTLP as transformFromOTEL } from './resultTransformer';
import { PrometheusDatasource } from '../prometheus/datasource';
import { PromQuery } from '../prometheus/types';
import { mapPromMetricsToServiceMap, serviceMapMetrics } from './graphTransform';
export type TempoQueryType = 'search' | 'traceId' | 'upload';
export type TempoQueryType = 'search' | 'traceId' | 'serviceMap' | 'upload';
export interface TempoJsonData extends DataSourceJsonData {
tracesToLogs?: TraceToLogsOptions;
serviceMap?: {
datasourceUid?: string;
};
}
export type TempoQuery = {
query: string;
// Query to find list of traces, e.g., via Loki
linkedQuery?: DataQuery;
linkedQuery?: LokiQuery;
queryType: TempoQueryType;
} & DataQuery;
export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TraceToLogsData> {
export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJsonData> {
tracesToLogs?: TraceToLogsOptions;
serviceMap?: {
datasourceUid?: string;
};
uploadedJson?: string | ArrayBuffer | null = null;
constructor(instanceSettings: DataSourceInstanceSettings<TraceToLogsData>) {
constructor(instanceSettings: DataSourceInstanceSettings<TempoJsonData>) {
super(instanceSettings);
this.tracesToLogs = instanceSettings.jsonData.tracesToLogs;
this.serviceMap = instanceSettings.jsonData.serviceMap;
}
query(options: DataQueryRequest<TempoQuery>): Observable<DataQueryResponse> {
const subQueries: Array<Observable<DataQueryResponse>> = [];
const filteredTargets = options.targets.filter((target) => !target.hide);
const searchTargets = filteredTargets.filter((target) => target.queryType === 'search');
const uploadTargets = filteredTargets.filter((target) => target.queryType === 'upload');
const traceTargets = filteredTargets.filter(
(target) => target.queryType === 'traceId' || target.queryType === undefined
);
const targets: { [type: string]: TempoQuery[] } = groupBy(filteredTargets, (t) => t.queryType || 'traceId');
// Run search queries on linked datasource
if (this.tracesToLogs?.datasourceUid && searchTargets.length > 0) {
if (this.tracesToLogs?.datasourceUid && targets.search?.length > 0) {
const dsSrv = getDatasourceSrv();
subQueries.push(
from(dsSrv.get(this.tracesToLogs.datasourceUid)).pipe(
mergeMap((linkedDatasource: DataSourceApi) => {
// Wrap linked query into a data request based on original request
const linkedRequest: DataQueryRequest = { ...options, targets: searchTargets.map((t) => t.linkedQuery!) };
const linkedRequest: DataQueryRequest = { ...options, targets: targets.search.map((t) => t.linkedQuery!) };
// Find trace matchers in derived fields of the linked datasource that's identical to this datasource
const settings: DataSourceInstanceSettings<LokiOptions> = (linkedDatasource as any).instanceSettings;
const traceLinkMatcher: string[] =
@@ -71,7 +83,7 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TraceToLo
);
}
if (uploadTargets.length) {
if (targets.upload?.length) {
if (this.uploadedJson) {
const otelTraceData = JSON.parse(this.uploadedJson as string);
if (!otelTraceData.batches) {
@@ -84,8 +96,12 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TraceToLo
}
}
if (traceTargets.length > 0) {
const traceRequest: DataQueryRequest<TempoQuery> = { ...options, targets: traceTargets };
if (this.serviceMap?.datasourceUid && targets.serviceMap?.length > 0) {
subQueries.push(serviceMapQuery(options, this.serviceMap.datasourceUid));
}
if (targets.traceId?.length > 0) {
const traceRequest: DataQueryRequest<TempoQuery> = { ...options, targets: targets.traceId };
subQueries.push(
super.query(traceRequest).pipe(
map((response) => {
@@ -121,3 +137,37 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TraceToLo
return query.query;
}
}
function queryServiceMapPrometheus(request: DataQueryRequest<PromQuery>, datasourceUid: string) {
return from(getDatasourceSrv().get(datasourceUid)).pipe(
mergeMap((ds) => {
return (ds as PrometheusDatasource).query(request);
})
);
}
function serviceMapQuery(request: DataQueryRequest<TempoQuery>, datasourceUid: string) {
return queryServiceMapPrometheus(makePromServiceMapRequest(request), datasourceUid).pipe(
// Just collect all the responses first before processing into node graph data
toArray(),
map((responses: DataQueryResponse[]) => {
return {
data: mapPromMetricsToServiceMap(responses, request.range),
state: LoadingState.Done,
};
})
);
}
function makePromServiceMapRequest(options: DataQueryRequest<TempoQuery>): DataQueryRequest<PromQuery> {
return {
...options,
targets: serviceMapMetrics.map((metric) => {
return {
refId: metric,
expr: `delta(${metric}[$__range])`,
instant: true,
};
}),
};
}

View File

@@ -1,6 +1,6 @@
import { createGraphFrames } from './graphTransform';
import { createGraphFrames, mapPromMetricsToServiceMap } from './graphTransform';
import { bigResponse } from './testResponse';
import { DataFrameView, MutableDataFrame } from '@grafana/data';
import { ArrayVector, DataFrameView, dateTime, MutableDataFrame } from '@grafana/data';
describe('createGraphFrames', () => {
it('transforms basic response into nodes and edges frame', async () => {
@@ -58,6 +58,33 @@ describe('createGraphFrames', () => {
});
});
describe('mapPromMetricsToServiceMap', () => {
it('transforms prom metrics to service map', async () => {
const range = {
from: dateTime('2000-01-01T00:00:00'),
to: dateTime('2000-01-01T00:01:00'),
};
const [nodes, edges] = mapPromMetricsToServiceMap([{ data: [totalsPromMetric] }, { data: [secondsPromMetric] }], {
...range,
raw: range,
});
expect(nodes.fields).toMatchObject([
{ name: 'id', values: new ArrayVector(['db', 'app', 'lb']) },
{ name: 'title', values: new ArrayVector(['db', 'app', 'lb']) },
{ name: 'mainStat', values: new ArrayVector([1000, 2000, NaN]) },
{ name: 'secondaryStat', values: new ArrayVector([10, 20, NaN]) },
]);
expect(edges.fields).toMatchObject([
{ name: 'id', values: new ArrayVector(['app_db', 'lb_app']) },
{ name: 'source', values: new ArrayVector(['app', 'lb']) },
{ name: 'target', values: new ArrayVector(['db', 'app']) },
{ name: 'mainStat', values: new ArrayVector([10, 20]) },
{ name: 'secondaryStat', values: new ArrayVector([1000, 2000]) },
]);
});
});
const singleSpanResponse = new MutableDataFrame({
fields: [
{ name: 'traceID', values: ['04450900759028499335'] },
@@ -81,3 +108,29 @@ const missingSpanResponse = new MutableDataFrame({
{ name: 'duration', values: [14.984, 4.984] },
],
});
const totalsPromMetric = new MutableDataFrame({
refId: 'tempo_service_graph_request_total',
fields: [
{ name: 'Time', values: [1628169788000, 1628169788000] },
{ name: 'client', values: ['app', 'lb'] },
{ name: 'instance', values: ['127.0.0.1:12345', '127.0.0.1:12345'] },
{ name: 'job', values: ['local_scrape', 'local_scrape'] },
{ name: 'server', values: ['db', 'app'] },
{ name: 'tempo_config', values: ['default', 'default'] },
{ name: 'Value #tempo_service_graph_request_total', values: [10, 20] },
],
});
const secondsPromMetric = new MutableDataFrame({
refId: 'tempo_service_graph_request_server_seconds_sum',
fields: [
{ name: 'Time', values: [1628169788000, 1628169788000] },
{ name: 'client', values: ['app', 'lb'] },
{ name: 'instance', values: ['127.0.0.1:12345', '127.0.0.1:12345'] },
{ name: 'job', values: ['local_scrape', 'local_scrape'] },
{ name: 'server', values: ['db', 'app'] },
{ name: 'tempo_config', values: ['default', 'default'] },
{ name: 'Value #tempo_service_graph_request_server_seconds_sum', values: [10, 40] },
],
});

View File

@@ -1,4 +1,13 @@
import { DataFrame, DataFrameView, NodeGraphDataFrameFieldNames as Fields } from '@grafana/data';
import { groupBy } from 'lodash';
import {
DataFrame,
DataFrameView,
DataQueryResponse,
FieldDTO,
MutableDataFrame,
NodeGraphDataFrameFieldNames as Fields,
TimeRange,
} from '@grafana/data';
import { getNonOverlappingDuration, getStats, makeFrames, makeSpanMap } from '../../../core/utils/tracing';
interface Row {
@@ -117,3 +126,151 @@ function findTraceDuration(view: DataFrameView<Row>): number {
return traceEndTime - traceStartTime;
}
const secondsMetric = 'tempo_service_graph_request_server_seconds_sum';
const totalsMetric = 'tempo_service_graph_request_total';
export const serviceMapMetrics = [
secondsMetric,
totalsMetric,
// We don't show histogram in node graph at the moment but we could later add that into a node context menu.
// 'tempo_service_graph_request_seconds_bucket',
// 'tempo_service_graph_request_seconds_count',
// These are used for debugging the tempo collection so probably not useful for service map right now.
// 'tempo_service_graph_unpaired_spans_total',
// 'tempo_service_graph_untagged_spans_total',
];
/**
* Map response from multiple prometheus metrics into a node graph data frames with nodes and edges.
* @param responses
* @param range
*/
export function mapPromMetricsToServiceMap(responses: DataQueryResponse[], range: TimeRange): [DataFrame, DataFrame] {
const [totalsDFView, secondsDFView] = getMetricFrames(responses);
// First just collect data from the metrics into a map with nodes and edges as keys
const nodesMap: Record<string, any> = {};
const edgesMap: Record<string, any> = {};
// At this moment we don't have any error/success or other counts so we just use these 2
collectMetricData(totalsDFView, 'total', totalsMetric, nodesMap, edgesMap);
collectMetricData(secondsDFView, 'seconds', secondsMetric, nodesMap, edgesMap);
return convertToDataFrames(nodesMap, edgesMap, range);
}
function createServiceMapDataFrames() {
function createDF(name: string, fields: FieldDTO[]) {
return new MutableDataFrame({ name, fields, meta: { preferredVisualisationType: 'nodeGraph' } });
}
const nodes = createDF('Nodes', [
{ name: Fields.id },
{ name: Fields.title },
{ name: Fields.mainStat, config: { unit: 'ms/t', displayName: 'Average response time' } },
{
name: Fields.secondaryStat,
config: { unit: 't/min', displayName: 'Transactions per minute' },
},
]);
const edges = createDF('Edges', [
{ name: Fields.id },
{ name: Fields.source },
{ name: Fields.target },
{ name: Fields.mainStat, config: { unit: 't', displayName: 'Transactions' } },
{ name: Fields.secondaryStat, config: { unit: 'ms/t', displayName: 'Average response time' } },
]);
return [nodes, edges];
}
function getMetricFrames(responses: DataQueryResponse[]) {
const responsesMap = groupBy(responses, (r) => r.data[0].refId);
const totalsDFView = new DataFrameView(responsesMap[totalsMetric][0].data[0]);
const secondsDFView = new DataFrameView(responsesMap[secondsMetric][0].data[0]);
return [totalsDFView, secondsDFView];
}
/**
* Collect data from a metric into a map of nodes and edges. The metric data is modeled as counts of metric per edge
* which is a pair of client-server nodes. This means we convert each row of the metric 1-1 to edges and than we assign
* the metric also to server. We count the stats for server only as we show requests/transactions that particular node
* processed not those which it generated and other stats like average transaction time then stem from that.
* @param frame
* @param stat
* @param metric
* @param nodesMap
* @param edgesMap
*/
function collectMetricData(
frame: DataFrameView,
stat: 'total' | 'seconds',
metric: string,
nodesMap: Record<string, any>,
edgesMap: Record<string, any>
) {
// The name of the value column is in this format
// TODO figure out if it can be changed
const valueName = `Value #${metric}`;
for (let i = 0; i < frame.length; i++) {
const row = frame.get(i);
const edgeId = `${row.client}_${row.server}`;
if (!edgesMap[edgeId]) {
edgesMap[edgeId] = {
target: row.server,
source: row.client,
[stat]: row[valueName],
};
} else {
edgesMap[edgeId][stat] = (edgesMap[edgeId][stat] || 0) + row[valueName];
}
if (!nodesMap[row.server]) {
nodesMap[row.server] = {
[stat]: row[valueName],
};
} else {
nodesMap[row.server][stat] = (nodesMap[row.server][stat] || 0) + row[valueName];
}
if (!nodesMap[row.client]) {
nodesMap[row.client] = {
[stat]: 0,
};
}
}
}
function convertToDataFrames(
nodesMap: Record<string, any>,
edgesMap: Record<string, any>,
range: TimeRange
): [DataFrame, DataFrame] {
const rangeMs = range.to.valueOf() - range.from.valueOf();
const [nodes, edges] = createServiceMapDataFrames();
for (const nodeId of Object.keys(nodesMap)) {
const node = nodesMap[nodeId];
nodes.add({
id: nodeId,
title: nodeId,
// NaN will not be shown in the node graph. This happens for a root client node which did not process
// any requests itself.
mainStat: node.total ? (node.seconds / node.total) * 1000 : Number.NaN,
secondaryStat: node.total ? node.total / (rangeMs / (1000 * 60)) : Number.NaN,
});
}
for (const edgeId of Object.keys(edgesMap)) {
const edge = edgesMap[edgeId];
edges.add({
id: edgeId,
source: edge.source,
target: edge.target,
mainStat: edge.total,
secondaryStat: edge.total ? (edge.seconds / edge.total) * 1000 : Number.NaN,
});
}
return [nodes, edges];
}