From fa710a3172e88a7ce2193179d2c2c7b73265bb5e Mon Sep 17 00:00:00 2001 From: Alexander Zobnin Date: Mon, 27 Apr 2020 15:29:46 +0300 Subject: [PATCH] Rendering: Add metrics (#23827) * Rendering: base metrics * Rendering: rendering_queue_size metric * Chore: fix linter error * Rendering metrics: refactoring * Apply suggestions from code review Co-Authored-By: Marcus Efraimsson * Rendering metrics: handle DeadlineExceeded errors * Rendering metrics: don't measure canceled request time * Rendering metrics: revert deleting summary for canceled requests * Update pkg/services/rendering/rendering.go Co-Authored-By: Marcus Efraimsson * Rendering: return ErrTimeout if context deadline exceeded Co-authored-by: Marcus Efraimsson --- pkg/infra/metrics/metrics.go | 37 +++++++++++++++++++++++++++ pkg/services/rendering/plugin_mode.go | 8 ++++++ pkg/services/rendering/rendering.go | 20 +++++++++++++++ 3 files changed, 65 insertions(+) diff --git a/pkg/infra/metrics/metrics.go b/pkg/infra/metrics/metrics.go index b1e7b9d0bea..a889d4eac9c 100644 --- a/pkg/infra/metrics/metrics.go +++ b/pkg/infra/metrics/metrics.go @@ -98,6 +98,12 @@ var ( // LDAPUsersSyncExecutionTime is a metric summary for LDAP users sync execution duration LDAPUsersSyncExecutionTime prometheus.Summary + + // MRenderingRequestTotal is a metric counter for image rendering requests + MRenderingRequestTotal *prometheus.CounterVec + + // MRenderingQueue is a metric gauge for image rendering queue size + MRenderingQueue prometheus.Gauge ) // Timers @@ -107,6 +113,9 @@ var ( // MAlertingExecutionTime is a metric summary of alert exeuction duration MAlertingExecutionTime prometheus.Summary + + // MRenderingSummary is a metric summary for image rendering request duration + MRenderingSummary *prometheus.SummaryVec ) // StatTotals @@ -343,6 +352,31 @@ func init() { Namespace: ExporterName, }) + MRenderingRequestTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "rendering_request_total", + Help: "counter for image rendering requests", + Namespace: ExporterName, + }, + []string{"status"}, + ) + + MRenderingSummary = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Name: "rendering_request_duration_milliseconds", + Help: "summary of image rendering request duration", + Objectives: objectiveMap, + Namespace: ExporterName, + }, + []string{"status"}, + ) + + MRenderingQueue = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "rendering_queue_size", + Help: "size of image rendering queue", + Namespace: ExporterName, + }) + MDataSourceProxyReqTimer = prometheus.NewSummary(prometheus.SummaryOpts{ Name: "api_dataproxy_request_all_milliseconds", Help: "summary for dataproxy request duration", @@ -489,6 +523,9 @@ func initMetricVars() { MAwsCloudWatchGetMetricData, MDBDataSourceQueryByID, LDAPUsersSyncExecutionTime, + MRenderingRequestTotal, + MRenderingSummary, + MRenderingQueue, MAlertingActiveAlerts, MStatTotalDashboards, MStatTotalUsers, diff --git a/pkg/services/rendering/plugin_mode.go b/pkg/services/rendering/plugin_mode.go index 475ca709e7f..9e2cb5cdda1 100644 --- a/pkg/services/rendering/plugin_mode.go +++ b/pkg/services/rendering/plugin_mode.go @@ -45,6 +45,10 @@ func (rs *RenderingService) renderViaPluginV1(ctx context.Context, renderKey str rs.log.Debug("calling renderer plugin", "req", req) rsp, err := rs.pluginInfo.GrpcPluginV1.Render(ctx, req) + if ctx.Err() == context.DeadlineExceeded { + rs.log.Info("Rendering timed out") + return nil, ErrTimeout + } if err != nil { return nil, err } @@ -84,6 +88,10 @@ func (rs *RenderingService) renderViaPluginV2(ctx context.Context, renderKey str rs.log.Debug("Calling renderer plugin", "req", req) rsp, err := rs.pluginInfo.GrpcPluginV2.Render(ctx, req) + if ctx.Err() == context.DeadlineExceeded { + rs.log.Info("Rendering timed out") + return nil, ErrTimeout + } if err != nil { return nil, err } diff --git a/pkg/services/rendering/rendering.go b/pkg/services/rendering/rendering.go index 04f442104f0..31693f10b04 100644 --- a/pkg/services/rendering/rendering.go +++ b/pkg/services/rendering/rendering.go @@ -10,6 +10,7 @@ import ( "strings" "time" + "github.com/grafana/grafana/pkg/infra/metrics" "github.com/grafana/grafana/pkg/infra/remotecache" "github.com/grafana/grafana/pkg/infra/log" @@ -130,6 +131,23 @@ func (rs *RenderingService) renderUnavailableImage() *RenderResult { } func (rs *RenderingService) Render(ctx context.Context, opts Opts) (*RenderResult, error) { + startTime := time.Now() + result, err := rs.render(ctx, opts) + elapsedTime := time.Since(startTime).Milliseconds() + if err == ErrTimeout { + metrics.MRenderingRequestTotal.WithLabelValues("timeout").Inc() + metrics.MRenderingSummary.WithLabelValues("timeout").Observe(float64(elapsedTime)) + } else if err != nil { + metrics.MRenderingRequestTotal.WithLabelValues("failure").Inc() + metrics.MRenderingSummary.WithLabelValues("failure").Observe(float64(elapsedTime)) + } else { + metrics.MRenderingRequestTotal.WithLabelValues("success").Inc() + metrics.MRenderingSummary.WithLabelValues("success").Observe(float64(elapsedTime)) + } + return result, err +} + +func (rs *RenderingService) render(ctx context.Context, opts Opts) (*RenderResult, error) { if rs.inProgressCount > opts.ConcurrentLimit { return &RenderResult{ FilePath: filepath.Join(setting.HomePath, "public/img/rendering_limit.png"), @@ -156,9 +174,11 @@ func (rs *RenderingService) Render(ctx context.Context, opts Opts) (*RenderResul defer func() { rs.inProgressCount-- + metrics.MRenderingQueue.Set(float64(rs.inProgressCount)) }() rs.inProgressCount++ + metrics.MRenderingQueue.Set(float64(rs.inProgressCount)) return rs.renderAction(ctx, renderKey, opts) }