mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Add metrics for active receiver and integrations (#64050)
* Alerting: Add metrics for active receiver and integrations Introduces metrics that allows us to track the number of configured receivers and integration in the Alertmanager for all orgs. As a bonus, I realised that the alert reception metrics where not being exported nor collected. This does that too.
This commit is contained in:
@@ -72,6 +72,12 @@ func (moa *MultiOrgAlertmanager) GetOrCreateOrgRegistry(id int64) prometheus.Reg
|
||||
type AlertmanagerAggregatedMetrics struct {
|
||||
registries *metrics.TenantRegistries
|
||||
|
||||
// metrics gather from the in-house "Alertmanager" directly.
|
||||
numReceivedAlerts *prometheus.Desc
|
||||
numInvalidAlerts *prometheus.Desc
|
||||
configuredReceivers *prometheus.Desc
|
||||
configuredIntegrations *prometheus.Desc
|
||||
|
||||
// exported metrics, gathered from Alertmanager PipelineBuilder
|
||||
numNotifications *prometheus.Desc
|
||||
numFailedNotifications *prometheus.Desc
|
||||
@@ -107,6 +113,23 @@ func NewAlertmanagerAggregatedMetrics(registries *metrics.TenantRegistries) *Ale
|
||||
aggregatedMetrics := &AlertmanagerAggregatedMetrics{
|
||||
registries: registries,
|
||||
|
||||
numReceivedAlerts: prometheus.NewDesc(
|
||||
fmt.Sprintf("%s_%s_alerts_received_total", Namespace, Subsystem),
|
||||
"The total number of received alerts.",
|
||||
[]string{"org", "status"}, nil),
|
||||
numInvalidAlerts: prometheus.NewDesc(
|
||||
fmt.Sprintf("%s_%s_alerts_invalid_total", Namespace, Subsystem),
|
||||
"The total number of received alerts that were invalid.",
|
||||
[]string{"org"}, nil),
|
||||
configuredReceivers: prometheus.NewDesc(
|
||||
fmt.Sprintf("%s_%s_alertmanager_receivers", Namespace, Subsystem),
|
||||
"Number of configured receivers by state. It is considered active if used within a route.",
|
||||
[]string{"org", "state"}, nil),
|
||||
configuredIntegrations: prometheus.NewDesc(
|
||||
fmt.Sprintf("%s_%s_alertmanager_integrations", Namespace, Subsystem),
|
||||
"Number of configured receivers.",
|
||||
[]string{"org", "type"}, nil),
|
||||
|
||||
numNotifications: prometheus.NewDesc(
|
||||
fmt.Sprintf("%s_%s_notifications_total", Namespace, Subsystem),
|
||||
"The total number of attempted notifications.",
|
||||
@@ -204,6 +227,11 @@ func NewAlertmanagerAggregatedMetrics(registries *metrics.TenantRegistries) *Ale
|
||||
}
|
||||
|
||||
func (a *AlertmanagerAggregatedMetrics) Describe(out chan<- *prometheus.Desc) {
|
||||
out <- a.numReceivedAlerts
|
||||
out <- a.numInvalidAlerts
|
||||
out <- a.configuredReceivers
|
||||
out <- a.configuredIntegrations
|
||||
|
||||
out <- a.numNotifications
|
||||
out <- a.numFailedNotifications
|
||||
out <- a.numNotificationRequestsTotal
|
||||
@@ -234,6 +262,11 @@ func (a *AlertmanagerAggregatedMetrics) Describe(out chan<- *prometheus.Desc) {
|
||||
func (a *AlertmanagerAggregatedMetrics) Collect(out chan<- prometheus.Metric) {
|
||||
data := a.registries.BuildMetricFamiliesPerTenant()
|
||||
|
||||
data.SendSumOfCountersPerTenant(out, a.numReceivedAlerts, "alertmanager_alerts_received_total", metrics.WithLabels("status"))
|
||||
data.SendSumOfCountersPerTenant(out, a.numInvalidAlerts, "alertmanager_alerts_invalid_total")
|
||||
data.SendSumOfGaugesPerTenantWithLabels(out, a.configuredReceivers, "grafana_alerting_alertmanager_receivers", "state")
|
||||
data.SendSumOfGaugesPerTenantWithLabels(out, a.configuredIntegrations, "grafana_alerting_alertmanager_integrations", "type")
|
||||
|
||||
data.SendSumOfCountersPerTenant(out, a.numNotifications, "alertmanager_notifications_total", metrics.WithLabels("integration"), metrics.WithSkipZeroValueMetrics)
|
||||
data.SendSumOfCountersPerTenant(out, a.numFailedNotifications, "alertmanager_notifications_failed_total", metrics.WithLabels("integration"), metrics.WithSkipZeroValueMetrics)
|
||||
data.SendSumOfCountersPerTenant(out, a.numNotificationRequestsTotal, "alertmanager_notification_requests_total", metrics.WithLabels("integration"), metrics.WithSkipZeroValueMetrics)
|
||||
|
||||
Reference in New Issue
Block a user