mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Expose Prometheus metrics for persisting state history (#63157)
* Create historian metrics and dependency inject * Record counter for total number of state transitions logged * Track write failures * Track current number of active write goroutines * Record histogram of how long it takes to write history data * Don't copy the registerer * Adjust naming of write failures metric * Introduce WritesTotal to complement WritesFailedTotal * Measure TransitionsFailedTotal to complement TransitionsTotal * Rename all to state_history * Remove redundant Total suffix * Increment totals all the time, not just on success * Drop ActiveWriteGoroutines * Drop PersistDuration in favor of WriteDuration * Drop unused gauge * Make writes and writesFailed per org * Add metric indicating backend and a spot for future metadata * Drop _batch_ from names and update help * Add metric for bytes written * Better pairing of total + failure metric updates * Few tweaks to wording and naming * Record info metric during composition * Create fakeRequester and simple happy path test using it * Blocking test for the full historian and test for happy path metrics * Add tests for failure case metrics * Smoke test for full annotation persistence * Create test for metrics on annotation persistence, both happy and failing paths * Address linter complaints * More linter complaints * Remove unnecessary whitespace * Consistency improvements to help texts * Update tests to match new descs
This commit is contained in:
@@ -7,17 +7,59 @@ import (
|
||||
)
|
||||
|
||||
type Historian struct {
|
||||
WriteDuration *instrument.HistogramCollector
|
||||
Info *prometheus.GaugeVec
|
||||
TransitionsTotal *prometheus.CounterVec
|
||||
TransitionsFailed *prometheus.CounterVec
|
||||
WritesTotal *prometheus.CounterVec
|
||||
WritesFailed *prometheus.CounterVec
|
||||
WriteDuration *instrument.HistogramCollector
|
||||
BytesWritten prometheus.Counter
|
||||
}
|
||||
|
||||
func NewHistorianMetrics(r prometheus.Registerer) *Historian {
|
||||
return &Historian{
|
||||
Info: promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: Namespace,
|
||||
Subsystem: Subsystem,
|
||||
Name: "state_history_info",
|
||||
Help: "Information about the state history store.",
|
||||
}, []string{"backend"}),
|
||||
TransitionsTotal: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: Namespace,
|
||||
Subsystem: Subsystem,
|
||||
Name: "state_history_transitions_total",
|
||||
Help: "The total number of state transitions processed.",
|
||||
}, []string{"org"}),
|
||||
TransitionsFailed: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: Namespace,
|
||||
Subsystem: Subsystem,
|
||||
Name: "state_history_transitions_failed_total",
|
||||
Help: "The total number of state transitions that failed to be written - they are not retried.",
|
||||
}, []string{"org"}),
|
||||
WritesTotal: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: Namespace,
|
||||
Subsystem: Subsystem,
|
||||
Name: "state_history_writes_total",
|
||||
Help: "The total number of state history batches that were attempted to be written.",
|
||||
}, []string{"org"}),
|
||||
WritesFailed: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: Namespace,
|
||||
Subsystem: Subsystem,
|
||||
Name: "state_history_writes_failed_total",
|
||||
Help: "The total number of failed writes of state history batches.",
|
||||
}, []string{"org"}),
|
||||
WriteDuration: instrument.NewHistogramCollector(promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: Namespace,
|
||||
Subsystem: Subsystem,
|
||||
Name: "state_history_request_duration_seconds",
|
||||
Help: "Histogram of request durations to the state history store.",
|
||||
Help: "Histogram of request durations to the state history store. Only valid when using external stores.",
|
||||
Buckets: instrument.DefBuckets,
|
||||
}, instrument.HistogramCollectorBuckets)),
|
||||
BytesWritten: promauto.With(r).NewCounter(prometheus.CounterOpts{
|
||||
Namespace: Namespace,
|
||||
Subsystem: Subsystem,
|
||||
Name: "state_history_writes_bytes_total",
|
||||
Help: "The total number of bytes sent within a batch to the state history store. Only valid when using the Loki store.",
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user