mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Convert some metrics to Histograms (#50420)
Because Summary metrics can not be aggreated, convert them to histograms so that users with HA deployments can use these metrics. * Convert metrics registration to promauto. * Improve help text style. Signed-off-by: SuperQ <superq@gmail.com>
This commit is contained in:
parent
390b7d084e
commit
68691d7775
@ -7,52 +7,48 @@ import (
|
|||||||
"github.com/grafana/grafana/pkg/infra/httpclient"
|
"github.com/grafana/grafana/pkg/infra/httpclient"
|
||||||
"github.com/grafana/grafana/pkg/infra/metrics/metricutil"
|
"github.com/grafana/grafana/pkg/infra/metrics/metricutil"
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||||
)
|
)
|
||||||
|
|
||||||
var datasourceRequestCounter = prometheus.NewCounterVec(
|
var (
|
||||||
prometheus.CounterOpts{
|
datasourceRequestCounter = promauto.NewCounterVec(
|
||||||
Namespace: "grafana",
|
prometheus.CounterOpts{
|
||||||
Name: "datasource_request_total",
|
Namespace: "grafana",
|
||||||
Help: "A counter for outgoing requests for a datasource",
|
Name: "datasource_request_total",
|
||||||
},
|
Help: "A counter for outgoing requests for a data source",
|
||||||
[]string{"datasource", "code", "method"},
|
},
|
||||||
)
|
[]string{"datasource", "code", "method"},
|
||||||
|
)
|
||||||
|
|
||||||
var datasourceRequestSummary = prometheus.NewSummaryVec(
|
datasourceRequestHistogram = promauto.NewHistogramVec(
|
||||||
prometheus.SummaryOpts{
|
prometheus.HistogramOpts{
|
||||||
Namespace: "grafana",
|
Namespace: "grafana",
|
||||||
Name: "datasource_request_duration_seconds",
|
Name: "datasource_request_duration_seconds",
|
||||||
Help: "summary of outgoing datasource requests sent from Grafana",
|
Help: "summary of outgoing data source requests sent from Grafana",
|
||||||
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
|
Buckets: []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10, 25, 50, 100},
|
||||||
}, []string{"datasource", "code", "method"},
|
}, []string{"datasource", "code", "method"},
|
||||||
)
|
)
|
||||||
|
|
||||||
var datasourceResponseSummary = prometheus.NewSummaryVec(
|
datasourceResponseHistogram = promauto.NewHistogramVec(
|
||||||
prometheus.SummaryOpts{
|
prometheus.HistogramOpts{
|
||||||
Namespace: "grafana",
|
Namespace: "grafana",
|
||||||
Name: "datasource_response_size_bytes",
|
Name: "datasource_response_size_bytes",
|
||||||
Help: "summary of datasource response sizes returned to Grafana",
|
Help: "summary of data source response sizes returned to Grafana",
|
||||||
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
|
Buckets: []float64{128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576},
|
||||||
}, []string{"datasource"},
|
}, []string{"datasource"},
|
||||||
)
|
)
|
||||||
|
|
||||||
var datasourceRequestsInFlight = prometheus.NewGaugeVec(
|
datasourceRequestsInFlight = promauto.NewGaugeVec(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
Namespace: "grafana",
|
Namespace: "grafana",
|
||||||
Name: "datasource_request_in_flight",
|
Name: "datasource_request_in_flight",
|
||||||
Help: "A gauge of outgoing datasource requests currently being sent by Grafana",
|
Help: "A gauge of outgoing data source requests currently being sent by Grafana",
|
||||||
},
|
},
|
||||||
[]string{"datasource"},
|
[]string{"datasource"},
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
|
||||||
prometheus.MustRegister(datasourceRequestSummary,
|
|
||||||
datasourceRequestCounter,
|
|
||||||
datasourceRequestsInFlight,
|
|
||||||
datasourceResponseSummary)
|
|
||||||
}
|
|
||||||
|
|
||||||
const DataSourceMetricsMiddlewareName = "metrics"
|
const DataSourceMetricsMiddlewareName = "metrics"
|
||||||
|
|
||||||
var executeMiddlewareFunc = executeMiddleware
|
var executeMiddlewareFunc = executeMiddleware
|
||||||
@ -84,11 +80,11 @@ func DataSourceMetricsMiddleware() sdkhttpclient.Middleware {
|
|||||||
func executeMiddleware(next http.RoundTripper, datasourceLabel prometheus.Labels) http.RoundTripper {
|
func executeMiddleware(next http.RoundTripper, datasourceLabel prometheus.Labels) http.RoundTripper {
|
||||||
return sdkhttpclient.RoundTripperFunc(func(r *http.Request) (*http.Response, error) {
|
return sdkhttpclient.RoundTripperFunc(func(r *http.Request) (*http.Response, error) {
|
||||||
requestCounter := datasourceRequestCounter.MustCurryWith(datasourceLabel)
|
requestCounter := datasourceRequestCounter.MustCurryWith(datasourceLabel)
|
||||||
requestSummary := datasourceRequestSummary.MustCurryWith(datasourceLabel)
|
requestHistogram := datasourceRequestHistogram.MustCurryWith(datasourceLabel)
|
||||||
requestInFlight := datasourceRequestsInFlight.With(datasourceLabel)
|
requestInFlight := datasourceRequestsInFlight.With(datasourceLabel)
|
||||||
responseSizeSummary := datasourceResponseSummary.With(datasourceLabel)
|
responseSizeHistogram := datasourceResponseHistogram.With(datasourceLabel)
|
||||||
|
|
||||||
res, err := promhttp.InstrumentRoundTripperDuration(requestSummary,
|
res, err := promhttp.InstrumentRoundTripperDuration(requestHistogram,
|
||||||
promhttp.InstrumentRoundTripperCounter(requestCounter,
|
promhttp.InstrumentRoundTripperCounter(requestCounter,
|
||||||
promhttp.InstrumentRoundTripperInFlight(requestInFlight, next))).
|
promhttp.InstrumentRoundTripperInFlight(requestInFlight, next))).
|
||||||
RoundTrip(r)
|
RoundTrip(r)
|
||||||
@ -98,7 +94,7 @@ func executeMiddleware(next http.RoundTripper, datasourceLabel prometheus.Labels
|
|||||||
|
|
||||||
if res != nil && res.StatusCode != http.StatusSwitchingProtocols {
|
if res != nil && res.StatusCode != http.StatusSwitchingProtocols {
|
||||||
res.Body = httpclient.CountBytesReader(res.Body, func(bytesRead int64) {
|
res.Body = httpclient.CountBytesReader(res.Body, func(bytesRead int64) {
|
||||||
responseSizeSummary.Observe(float64(bytesRead))
|
responseSizeHistogram.Observe(float64(bytesRead))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,29 +5,23 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
pluginRequestCounter *prometheus.CounterVec
|
pluginRequestCounter = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||||
pluginRequestDuration *prometheus.SummaryVec
|
|
||||||
)
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
pluginRequestCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
||||||
Namespace: "grafana",
|
Namespace: "grafana",
|
||||||
Name: "plugin_request_total",
|
Name: "plugin_request_total",
|
||||||
Help: "The total amount of plugin requests",
|
Help: "The total amount of plugin requests",
|
||||||
}, []string{"plugin_id", "endpoint", "status"})
|
}, []string{"plugin_id", "endpoint", "status"})
|
||||||
|
|
||||||
pluginRequestDuration = prometheus.NewSummaryVec(prometheus.SummaryOpts{
|
pluginRequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
|
||||||
Namespace: "grafana",
|
Namespace: "grafana",
|
||||||
Name: "plugin_request_duration_milliseconds",
|
Name: "plugin_request_duration_milliseconds",
|
||||||
Help: "Plugin request duration",
|
Help: "Plugin request duration",
|
||||||
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
|
Buckets: []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10, 25, 50, 100},
|
||||||
}, []string{"plugin_id", "endpoint"})
|
}, []string{"plugin_id", "endpoint"})
|
||||||
|
)
|
||||||
prometheus.MustRegister(pluginRequestCounter, pluginRequestDuration)
|
|
||||||
}
|
|
||||||
|
|
||||||
// instrumentPluginRequest instruments success rate and latency of `fn`
|
// instrumentPluginRequest instruments success rate and latency of `fn`
|
||||||
func instrumentPluginRequest(pluginID string, endpoint string, fn func() error) error {
|
func instrumentPluginRequest(pluginID string, endpoint string, fn func() error) error {
|
||||||
|
@ -50,7 +50,7 @@ type Scheduler struct {
|
|||||||
BehindSeconds prometheus.Gauge
|
BehindSeconds prometheus.Gauge
|
||||||
EvalTotal *prometheus.CounterVec
|
EvalTotal *prometheus.CounterVec
|
||||||
EvalFailures *prometheus.CounterVec
|
EvalFailures *prometheus.CounterVec
|
||||||
EvalDuration *prometheus.SummaryVec
|
EvalDuration *prometheus.HistogramVec
|
||||||
SchedulePeriodicDuration prometheus.Histogram
|
SchedulePeriodicDuration prometheus.Histogram
|
||||||
SchedulableAlertRules prometheus.Gauge
|
SchedulableAlertRules prometheus.Gauge
|
||||||
SchedulableAlertRulesHash prometheus.Gauge
|
SchedulableAlertRulesHash prometheus.Gauge
|
||||||
@ -156,13 +156,13 @@ func newSchedulerMetrics(r prometheus.Registerer) *Scheduler {
|
|||||||
},
|
},
|
||||||
[]string{"org"},
|
[]string{"org"},
|
||||||
),
|
),
|
||||||
EvalDuration: promauto.With(r).NewSummaryVec(
|
EvalDuration: promauto.With(r).NewHistogramVec(
|
||||||
prometheus.SummaryOpts{
|
prometheus.HistogramOpts{
|
||||||
Namespace: Namespace,
|
Namespace: Namespace,
|
||||||
Subsystem: Subsystem,
|
Subsystem: Subsystem,
|
||||||
Name: "rule_evaluation_duration_seconds",
|
Name: "rule_evaluation_duration_seconds",
|
||||||
Help: "The duration for a rule to execute.",
|
Help: "The duration for a rule to execute.",
|
||||||
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
|
Buckets: []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10, 25, 50, 100},
|
||||||
},
|
},
|
||||||
[]string{"org"},
|
[]string{"org"},
|
||||||
),
|
),
|
||||||
|
@ -441,10 +441,22 @@ func TestSchedule_ruleRoutine(t *testing.T) {
|
|||||||
// duration metric has 0 values because of mocked clock that do not advance
|
// duration metric has 0 values because of mocked clock that do not advance
|
||||||
expectedMetric := fmt.Sprintf(
|
expectedMetric := fmt.Sprintf(
|
||||||
`# HELP grafana_alerting_rule_evaluation_duration_seconds The duration for a rule to execute.
|
`# HELP grafana_alerting_rule_evaluation_duration_seconds The duration for a rule to execute.
|
||||||
# TYPE grafana_alerting_rule_evaluation_duration_seconds summary
|
# TYPE grafana_alerting_rule_evaluation_duration_seconds histogram
|
||||||
grafana_alerting_rule_evaluation_duration_seconds{org="%[1]d",quantile="0.5"} 0
|
grafana_alerting_rule_evaluation_duration_seconds_bucket{org="%[1]d",le="0.005"} 1
|
||||||
grafana_alerting_rule_evaluation_duration_seconds{org="%[1]d",quantile="0.9"} 0
|
grafana_alerting_rule_evaluation_duration_seconds_bucket{org="%[1]d",le="0.01"} 1
|
||||||
grafana_alerting_rule_evaluation_duration_seconds{org="%[1]d",quantile="0.99"} 0
|
grafana_alerting_rule_evaluation_duration_seconds_bucket{org="%[1]d",le="0.025"} 1
|
||||||
|
grafana_alerting_rule_evaluation_duration_seconds_bucket{org="%[1]d",le="0.05"} 1
|
||||||
|
grafana_alerting_rule_evaluation_duration_seconds_bucket{org="%[1]d",le="0.1"} 1
|
||||||
|
grafana_alerting_rule_evaluation_duration_seconds_bucket{org="%[1]d",le="0.25"} 1
|
||||||
|
grafana_alerting_rule_evaluation_duration_seconds_bucket{org="%[1]d",le="0.5"} 1
|
||||||
|
grafana_alerting_rule_evaluation_duration_seconds_bucket{org="%[1]d",le="1"} 1
|
||||||
|
grafana_alerting_rule_evaluation_duration_seconds_bucket{org="%[1]d",le="2.5"} 1
|
||||||
|
grafana_alerting_rule_evaluation_duration_seconds_bucket{org="%[1]d",le="5"} 1
|
||||||
|
grafana_alerting_rule_evaluation_duration_seconds_bucket{org="%[1]d",le="10"} 1
|
||||||
|
grafana_alerting_rule_evaluation_duration_seconds_bucket{org="%[1]d",le="25"} 1
|
||||||
|
grafana_alerting_rule_evaluation_duration_seconds_bucket{org="%[1]d",le="50"} 1
|
||||||
|
grafana_alerting_rule_evaluation_duration_seconds_bucket{org="%[1]d",le="100"} 1
|
||||||
|
grafana_alerting_rule_evaluation_duration_seconds_bucket{org="%[1]d",le="+Inf"} 1
|
||||||
grafana_alerting_rule_evaluation_duration_seconds_sum{org="%[1]d"} 0
|
grafana_alerting_rule_evaluation_duration_seconds_sum{org="%[1]d"} 0
|
||||||
grafana_alerting_rule_evaluation_duration_seconds_count{org="%[1]d"} 1
|
grafana_alerting_rule_evaluation_duration_seconds_count{org="%[1]d"} 1
|
||||||
# HELP grafana_alerting_rule_evaluation_failures_total The total number of rule evaluation failures.
|
# HELP grafana_alerting_rule_evaluation_failures_total The total number of rule evaluation failures.
|
||||||
|
Loading…
Reference in New Issue
Block a user