Alerting: Add new metrics and tracings to state manager and scheduler (#71398)

* add metrics and tracing to state manager

* propagate tracer to state manager

* add scheduler metrics

* fix backtesting

* add test for state metrics

* remove StateUpdateCount

* update docs

* metrics can be null

* add tracer to new tests
This commit is contained in:
Yuri Tseretyan
2023-08-16 03:04:18 -04:00
committed by GitHub
parent 90e3f516ff
commit 938e26b59f
14 changed files with 264 additions and 64 deletions

View File

@@ -18,6 +18,8 @@ type Scheduler struct {
EvalTotal *prometheus.CounterVec
EvalFailures *prometheus.CounterVec
EvalDuration *prometheus.HistogramVec
ProcessDuration *prometheus.HistogramVec
SendDuration *prometheus.HistogramVec
GroupRules *prometheus.GaugeVec
SchedulePeriodicDuration prometheus.Histogram
SchedulableAlertRules prometheus.Gauge
@@ -63,8 +65,28 @@ func NewSchedulerMetrics(r prometheus.Registerer) *Scheduler {
Namespace: Namespace,
Subsystem: Subsystem,
Name: "rule_evaluation_duration_seconds",
Help: "The duration for a rule to execute.",
Buckets: []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10, 25, 50, 100},
Help: "The time to evaluate a rule.",
Buckets: []float64{.01, .1, .5, 1, 5, 10, 15, 30, 60, 120, 180, 240, 300},
},
[]string{"org"},
),
ProcessDuration: promauto.With(r).NewHistogramVec(
prometheus.HistogramOpts{
Namespace: Namespace,
Subsystem: Subsystem,
Name: "rule_process_evaluation_duration_seconds",
Help: "The time to process the evaluation results for a rule.",
Buckets: []float64{.01, .1, .5, 1, 5, 10, 15, 30, 60, 120, 180, 240, 300},
},
[]string{"org"},
),
SendDuration: promauto.With(r).NewHistogramVec(
prometheus.HistogramOpts{
Namespace: Namespace,
Subsystem: Subsystem,
Name: "rule_send_alerts_duration_seconds",
Help: "The time to send the alerts to Alertmanager.",
Buckets: []float64{.01, .1, .5, 1, 5, 10, 15, 30, 60, 120, 180, 240, 300},
},
[]string{"org"},
),

View File

@@ -6,7 +6,8 @@ import (
)
type State struct {
AlertState *prometheus.GaugeVec
AlertState *prometheus.GaugeVec
StateUpdateDuration prometheus.Histogram
}
func NewStateMetrics(r prometheus.Registerer) *State {
@@ -17,5 +18,14 @@ func NewStateMetrics(r prometheus.Registerer) *State {
Name: "alerts",
Help: "How many alerts by state.",
}, []string{"state"}),
StateUpdateDuration: promauto.With(r).NewHistogram(
prometheus.HistogramOpts{
Namespace: Namespace,
Subsystem: Subsystem,
Name: "state_calculation_duration_seconds",
Help: "The duration of calculation of a single state.",
Buckets: []float64{0.01, 0.1, 1, 2, 5, 10},
},
),
}
}