Alerting: Add "backend" label to state history writes metrics (#65395)

* Add backend label to state history writes metrics

* Update test expectations
This commit is contained in:
Alexander Weaver 2023-03-28 08:49:51 -05:00 committed by GitHub
parent f0ddf900c0
commit dd04757fc9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 10 additions and 10 deletions

View File

@ -41,13 +41,13 @@ func NewHistorianMetrics(r prometheus.Registerer) *Historian {
Subsystem: Subsystem,
Name: "state_history_writes_total",
Help: "The total number of state history batches that were attempted to be written.",
}, []string{"org"}),
}, []string{"org", "backend"}),
WritesFailed: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
Namespace: Namespace,
Subsystem: Subsystem,
Name: "state_history_writes_failed_total",
Help: "The total number of failed writes of state history batches.",
}, []string{"org"}),
}, []string{"org", "backend"}),
WriteDuration: instrument.NewHistogramCollector(promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{
Namespace: Namespace,
Subsystem: Subsystem,

View File

@ -199,11 +199,11 @@ func (h *AnnotationBackend) recordAnnotations(ctx context.Context, panel *panelK
}
org := fmt.Sprint(orgID)
h.metrics.WritesTotal.WithLabelValues(org).Inc()
h.metrics.WritesTotal.WithLabelValues(org, "annotations").Inc()
h.metrics.TransitionsTotal.WithLabelValues(org).Add(float64(len(annotations)))
if err := h.annotations.SaveMany(ctx, annotations); err != nil {
logger.Error("Error saving alert annotation batch", "error", err)
h.metrics.WritesFailed.WithLabelValues(org).Inc()
h.metrics.WritesFailed.WithLabelValues(org, "annotations").Inc()
h.metrics.TransitionsFailed.WithLabelValues(org).Add(float64(len(annotations)))
return fmt.Errorf("error saving alert annotation batch: %w", err)
}

View File

@ -83,10 +83,10 @@ grafana_alerting_state_history_transitions_failed_total{org="1"} 1
grafana_alerting_state_history_transitions_total{org="1"} 2
# HELP grafana_alerting_state_history_writes_failed_total The total number of failed writes of state history batches.
# TYPE grafana_alerting_state_history_writes_failed_total counter
grafana_alerting_state_history_writes_failed_total{org="1"} 1
grafana_alerting_state_history_writes_failed_total{backend="annotations",org="1"} 1
# HELP grafana_alerting_state_history_writes_total The total number of state history batches that were attempted to be written.
# TYPE grafana_alerting_state_history_writes_total counter
grafana_alerting_state_history_writes_total{org="1"} 2
grafana_alerting_state_history_writes_total{backend="annotations",org="1"} 2
`)
err := testutil.GatherAndCompare(reg, exp,
"grafana_alerting_state_history_transitions_total",

View File

@ -83,7 +83,7 @@ func (h *RemoteLokiBackend) Record(ctx context.Context, rule history_model.RuleM
defer close(errCh)
org := fmt.Sprint(rule.OrgID)
h.metrics.WritesTotal.WithLabelValues(org).Inc()
h.metrics.WritesTotal.WithLabelValues(org, "loki").Inc()
samples := 0
for _, s := range streams {
samples += len(s.Values)
@ -92,7 +92,7 @@ func (h *RemoteLokiBackend) Record(ctx context.Context, rule history_model.RuleM
if err := h.recordStreams(ctx, streams, logger); err != nil {
logger.Error("Failed to save alert state history batch", "error", err)
h.metrics.WritesFailed.WithLabelValues(org).Inc()
h.metrics.WritesFailed.WithLabelValues(org, "loki").Inc()
h.metrics.TransitionsFailed.WithLabelValues(org).Add(float64(samples))
errCh <- fmt.Errorf("failed to save alert state history batch: %w", err)
}

View File

@ -298,10 +298,10 @@ grafana_alerting_state_history_transitions_failed_total{org="1"} 1
grafana_alerting_state_history_transitions_total{org="1"} 2
# HELP grafana_alerting_state_history_writes_failed_total The total number of failed writes of state history batches.
# TYPE grafana_alerting_state_history_writes_failed_total counter
grafana_alerting_state_history_writes_failed_total{org="1"} 1
grafana_alerting_state_history_writes_failed_total{backend="loki",org="1"} 1
# HELP grafana_alerting_state_history_writes_total The total number of state history batches that were attempted to be written.
# TYPE grafana_alerting_state_history_writes_total counter
grafana_alerting_state_history_writes_total{org="1"} 2
grafana_alerting_state_history_writes_total{backend="loki",org="1"} 2
`)
err := testutil.GatherAndCompare(reg, exp,
"grafana_alerting_state_history_transitions_total",