Add metrics to ngalert scheduler (#44602)

This pull request adds metrics to the ngalert scheduler so we can see how long it takes to evaluate a tick.
This commit is contained in:
George Robinson
2022-01-31 16:56:43 +00:00
committed by GitHub
parent 793e3d3556
commit 5e2280ceee
5 changed files with 99 additions and 68 deletions

View File

@@ -45,10 +45,13 @@ type NGAlert struct {
}
type Scheduler struct {
Registerer prometheus.Registerer
EvalTotal *prometheus.CounterVec
EvalFailures *prometheus.CounterVec
EvalDuration *prometheus.SummaryVec
Registerer prometheus.Registerer
BehindSeconds prometheus.Gauge
EvalTotal *prometheus.CounterVec
EvalFailures *prometheus.CounterVec
EvalDuration *prometheus.SummaryVec
GetAlertRulesDuration prometheus.Histogram
SchedulePeriodicDuration prometheus.Histogram
}
type MultiOrgAlertmanager struct {
@@ -120,6 +123,12 @@ func (moa *MultiOrgAlertmanager) GetOrCreateOrgRegistry(id int64) prometheus.Reg
func newSchedulerMetrics(r prometheus.Registerer) *Scheduler {
return &Scheduler{
Registerer: r,
BehindSeconds: promauto.With(r).NewGauge(prometheus.GaugeOpts{
Namespace: Namespace,
Subsystem: Subsystem,
Name: "scheduler_behind_seconds",
Help: "The total number of seconds the scheduler is behind.",
}),
// TODO: once rule groups support multiple rules, consider partitioning
// on rule group as well as tenant, similar to loki|cortex.
EvalTotal: promauto.With(r).NewCounterVec(
@@ -152,6 +161,24 @@ func newSchedulerMetrics(r prometheus.Registerer) *Scheduler {
},
[]string{"org"},
),
GetAlertRulesDuration: promauto.With(r).NewHistogram(
prometheus.HistogramOpts{
Namespace: Namespace,
Subsystem: Subsystem,
Name: "get_alert_rules_duration_seconds",
Help: "The time taken to get all alert rules.",
Buckets: []float64{0.1, 0.25, 0.5, 1, 2, 5, 10},
},
),
SchedulePeriodicDuration: promauto.With(r).NewHistogram(
prometheus.HistogramOpts{
Namespace: Namespace,
Subsystem: Subsystem,
Name: "schedule_periodic_duration_seconds",
Help: "The time taken to run the scheduler.",
Buckets: []float64{0.1, 0.25, 0.5, 1, 2, 5, 10},
},
),
}
}