Tracing: Trace to profiles (#76670)

* Update Tempo devenv to include profiles

* Update devenv to scrape profiles from local services

* Cleanup devenv

* Fix issue with flame graph

* Add width prop to ProfileTypeCascader

* Add trace to profiles settings

* Add new spanSelector API

* Add spanSelector to query editor

* Update span link query

* Conditionally show span link

* Combine profile and spanProfile query types and run specific query type in backend based on spanSelector presence

* Update placeholder

* Create feature toggle

* Remove spanProfile query type

* Cleanup

* Use feeature toggle

* Update feature toggle

* Update devenv

* Update devenv

* Tests

* Tests

* Profiles for this span

* Styling

* Types

* Update type check

* Tidier funcs

* Add config links from dataframe

* Remove time shift

* Update tests

* Update range in test

* Simplify span link logic

* Update default keys

* Update pyro link

* Use const
This commit is contained in:
Joey
2023-11-01 10:14:24 +00:00
committed by GitHub
parent f42bb86667
commit c39e9a8f52
29 changed files with 804 additions and 67 deletions

View File

@@ -653,6 +653,13 @@ var (
Owner: grafanaPartnerPluginsSquad,
Expression: "true", // on by default
},
{
Name: "traceToProfiles",
Description: "Enables linking between traces and profiles",
Stage: FeatureStageExperimental,
FrontendOnly: true,
Owner: grafanaObservabilityTracesAndProfilingSquad,
},
{
Name: "permissionsFilterRemoveSubquery",
Description: "Alternative permission filter implementation that does not use subqueries for fetching the dashboard folder",

View File

@@ -93,6 +93,7 @@ featureToggleAdminPage,experimental,@grafana/grafana-operator-experience-squad,f
awsAsyncQueryCaching,preview,@grafana/aws-datasources,false,false,false,false
splitScopes,preview,@grafana/grafana-authnz-team,false,false,true,false
azureMonitorDataplane,GA,@grafana/partner-datasources,false,false,false,false
traceToProfiles,experimental,@grafana/observability-traces-and-profiling,false,false,false,true
permissionsFilterRemoveSubquery,experimental,@grafana/backend-platform,false,false,false,false
prometheusConfigOverhaulAuth,GA,@grafana/observability-metrics,false,false,false,false
configurableSchedulerTick,experimental,@grafana/alerting-squad,false,false,true,false
1 Name Stage Owner requiresDevMode RequiresLicense RequiresRestart FrontendOnly
93 awsAsyncQueryCaching preview @grafana/aws-datasources false false false false
94 splitScopes preview @grafana/grafana-authnz-team false false true false
95 azureMonitorDataplane GA @grafana/partner-datasources false false false false
96 traceToProfiles experimental @grafana/observability-traces-and-profiling false false false true
97 permissionsFilterRemoveSubquery experimental @grafana/backend-platform false false false false
98 prometheusConfigOverhaulAuth GA @grafana/observability-metrics false false false false
99 configurableSchedulerTick experimental @grafana/alerting-squad false false true false

View File

@@ -383,6 +383,10 @@ const (
// Adds dataplane compliant frame metadata in the Azure Monitor datasource
FlagAzureMonitorDataplane = "azureMonitorDataplane"
// FlagTraceToProfiles
// Enables linking between traces and profiles
FlagTraceToProfiles = "traceToProfiles"
// FlagPermissionsFilterRemoveSubquery
// Alternative permission filter implementation that does not use subqueries for fetching the dashboard folder
FlagPermissionsFilterRemoveSubquery = "permissionsFilterRemoveSubquery"

View File

@@ -31,6 +31,7 @@ type ProfilingClient interface {
LabelValues(ctx context.Context, label string) ([]string, error)
GetSeries(ctx context.Context, profileTypeID string, labelSelector string, start int64, end int64, groupBy []string, step float64) (*SeriesResponse, error)
GetProfile(ctx context.Context, profileTypeID string, labelSelector string, start int64, end int64, maxNodes *int64) (*ProfileResponse, error)
GetSpanProfile(ctx context.Context, profileTypeID string, labelSelector string, spanSelector []string, start int64, end int64, maxNodes *int64) (*ProfileResponse, error)
}
// PyroscopeDatasource is a datasource for querying application performance profiles.

View File

@@ -79,6 +79,9 @@ type GrafanaPyroscopeDataQuery struct {
// In server side expressions, the refId is used as a variable name to identify results.
// By default, the UI will assign A->Z; however setting meaningful names may be useful.
RefId string `json:"refId"`
// Specifies the query span selectors.
SpanSelector []string `json:"spanSelector,omitempty"`
}
// PyroscopeQueryType defines model for PyroscopeQueryType.

View File

@@ -175,8 +175,41 @@ func (c *PyroscopeClient) GetProfile(ctx context.Context, profileTypeID, labelSe
return nil, nil
}
levels := make([]*Level, len(resp.Msg.Flamegraph.Levels))
for i, level := range resp.Msg.Flamegraph.Levels {
return profileQuery(ctx, err, span, resp.Msg.Flamegraph, profileTypeID)
}
func (c *PyroscopeClient) GetSpanProfile(ctx context.Context, profileTypeID, labelSelector string, spanSelector []string, start, end int64, maxNodes *int64) (*ProfileResponse, error) {
ctx, span := tracing.DefaultTracer().Start(ctx, "datasource.pyroscope.GetSpanProfile", trace.WithAttributes(attribute.String("profileTypeID", profileTypeID), attribute.String("labelSelector", labelSelector), attribute.String("spanSelector", strings.Join(spanSelector, ","))))
defer span.End()
req := &connect.Request[querierv1.SelectMergeSpanProfileRequest]{
Msg: &querierv1.SelectMergeSpanProfileRequest{
ProfileTypeID: profileTypeID,
LabelSelector: labelSelector,
SpanSelector: spanSelector,
Start: start,
End: end,
MaxNodes: maxNodes,
},
}
resp, err := c.connectClient.SelectMergeSpanProfile(ctx, req)
if err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
return nil, err
}
if resp.Msg.Flamegraph == nil {
// Not an error, can happen when querying data oout of range.
return nil, nil
}
return profileQuery(ctx, err, span, resp.Msg.Flamegraph, profileTypeID)
}
func profileQuery(ctx context.Context, err error, span trace.Span, flamegraph *querierv1.FlameGraph, profileTypeID string) (*ProfileResponse, error) {
levels := make([]*Level, len(flamegraph.Levels))
for i, level := range flamegraph.Levels {
levels[i] = &Level{
Values: level.Values,
}
@@ -184,10 +217,10 @@ func (c *PyroscopeClient) GetProfile(ctx context.Context, profileTypeID, labelSe
return &ProfileResponse{
Flamebearer: &Flamebearer{
Names: resp.Msg.Flamegraph.Names,
Names: flamegraph.Names,
Levels: levels,
Total: resp.Msg.Flamegraph.Total,
MaxSelf: resp.Msg.Flamegraph.MaxSelf,
Total: flamegraph.Total,
MaxSelf: flamegraph.MaxSelf,
},
Units: getUnits(profileTypeID),
}, nil

View File

@@ -129,3 +129,7 @@ func (f *FakePyroscopeConnectClient) SelectSeries(ctx context.Context, req *conn
func (f *FakePyroscopeConnectClient) SelectMergeProfile(ctx context.Context, c *connect.Request[querierv1.SelectMergeProfileRequest]) (*connect.Response[googlev1.Profile], error) {
panic("implement me")
}
func (f *FakePyroscopeConnectClient) SelectMergeSpanProfile(ctx context.Context, c *connect.Request[querierv1.SelectMergeSpanProfileRequest]) (*connect.Response[querierv1.SelectMergeSpanProfileResponse], error) {
panic("implement me")
}

View File

@@ -98,18 +98,32 @@ func (d *PyroscopeDatasource) query(ctx context.Context, pCtx backend.PluginCont
if query.QueryType == queryTypeProfile || query.QueryType == queryTypeBoth {
g.Go(func() error {
logger.Debug("Calling GetProfile", "queryModel", qm, "function", logEntrypoint())
prof, err := d.client.GetProfile(gCtx, qm.ProfileTypeId, qm.LabelSelector, query.TimeRange.From.UnixMilli(), query.TimeRange.To.UnixMilli(), qm.MaxNodes)
if err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
logger.Error("Error GetProfile()", "err", err, "function", logEntrypoint())
return err
var profileResp *ProfileResponse
if len(qm.SpanSelector) > 0 {
logger.Debug("Calling GetSpanProfile", "queryModel", qm, "function", logEntrypoint())
prof, err := d.client.GetSpanProfile(gCtx, qm.ProfileTypeId, qm.LabelSelector, qm.SpanSelector, query.TimeRange.From.UnixMilli(), query.TimeRange.To.UnixMilli(), qm.MaxNodes)
if err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
logger.Error("Error GetSpanProfile()", "err", err, "function", logEntrypoint())
return err
}
profileResp = prof
} else {
logger.Debug("Calling GetProfile", "queryModel", qm, "function", logEntrypoint())
prof, err := d.client.GetProfile(gCtx, qm.ProfileTypeId, qm.LabelSelector, query.TimeRange.From.UnixMilli(), query.TimeRange.To.UnixMilli(), qm.MaxNodes)
if err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
logger.Error("Error GetProfile()", "err", err, "function", logEntrypoint())
return err
}
profileResp = prof
}
var frame *data.Frame
if prof != nil {
frame = responseToDataFrames(prof)
if profileResp != nil {
frame = responseToDataFrames(profileResp)
// If query called with streaming on then return a channel
// to subscribe on a client-side and consume updates from a plugin.

View File

@@ -312,6 +312,22 @@ func (f *FakeClient) GetProfile(ctx context.Context, profileTypeID, labelSelecto
}, nil
}
func (f *FakeClient) GetSpanProfile(ctx context.Context, profileTypeID, labelSelector string, spanSelector []string, start, end int64, maxNodes *int64) (*ProfileResponse, error) {
return &ProfileResponse{
Flamebearer: &Flamebearer{
Names: []string{"foo", "bar", "baz"},
Levels: []*Level{
{Values: []int64{0, 10, 0, 0}},
{Values: []int64{0, 9, 0, 1}},
{Values: []int64{0, 8, 8, 2}},
},
Total: 100,
MaxSelf: 56,
},
Units: "count",
}, nil
}
func (f *FakeClient) GetSeries(ctx context.Context, profileTypeID, labelSelector string, start, end int64, groupBy []string, step float64) (*SeriesResponse, error) {
f.Args = []any{profileTypeID, labelSelector, start, end, groupBy, step}
return &SeriesResponse{