mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Add metrics for active receiver and integrations (#64050)
* Alerting: Add metrics for active receiver and integrations Introduces metrics that allows us to track the number of configured receivers and integration in the Alertmanager for all orgs. As a bonus, I realised that the alert reception metrics where not being exported nor collected. This does that too.
This commit is contained in:
parent
fd37ff29b5
commit
5422f7cf56
4
go.mod
4
go.mod
@ -64,7 +64,7 @@ require (
|
||||
github.com/google/uuid v1.3.0
|
||||
github.com/google/wire v0.5.0
|
||||
github.com/gorilla/websocket v1.5.0
|
||||
github.com/grafana/alerting v0.0.0-20230203015918-0e4e2675d7aa
|
||||
github.com/grafana/alerting v0.0.0-20230302124801-c17c23256fba
|
||||
github.com/grafana/cuetsy v0.1.6
|
||||
github.com/grafana/grafana-aws-sdk v0.12.0
|
||||
github.com/grafana/grafana-azure-sdk-go v1.6.0
|
||||
@ -127,7 +127,7 @@ require (
|
||||
gopkg.in/mail.v2 v2.3.1
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
xorm.io/builder v0.3.6
|
||||
xorm.io/builder v0.3.6 // indirect
|
||||
xorm.io/core v0.7.3
|
||||
xorm.io/xorm v0.8.2
|
||||
)
|
||||
|
4
go.sum
4
go.sum
@ -1257,8 +1257,8 @@ github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/ad
|
||||
github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
||||
github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc=
|
||||
github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
||||
github.com/grafana/alerting v0.0.0-20230203015918-0e4e2675d7aa h1:ue2fctL9LHJWYw9V+R1O/uWLNTfAr/KU1EUFRsqWlK4=
|
||||
github.com/grafana/alerting v0.0.0-20230203015918-0e4e2675d7aa/go.mod h1:NoSLbfmUwE+omWFReFrLtbtOItmvTbuQERJ6XFYp9ME=
|
||||
github.com/grafana/alerting v0.0.0-20230302124801-c17c23256fba h1:ScqEvk2xPpUIKE+m45meufYlLlediv7Y/hHKcotW/1o=
|
||||
github.com/grafana/alerting v0.0.0-20230302124801-c17c23256fba/go.mod h1:NoSLbfmUwE+omWFReFrLtbtOItmvTbuQERJ6XFYp9ME=
|
||||
github.com/grafana/codejen v0.0.3 h1:tAWxoTUuhgmEqxJPOLtJoxlPBbMULFwKFOcRsPRPXDw=
|
||||
github.com/grafana/codejen v0.0.3/go.mod h1:zmwwM/DRyQB7pfuBjTWII3CWtxcXh8LTwAYGfDfpR6s=
|
||||
github.com/grafana/cuetsy v0.1.6 h1:61QGIDy1rVABU3OkoarOn0+qPdGopIJr34PyWVmGDfs=
|
||||
|
@ -72,6 +72,12 @@ func (moa *MultiOrgAlertmanager) GetOrCreateOrgRegistry(id int64) prometheus.Reg
|
||||
type AlertmanagerAggregatedMetrics struct {
|
||||
registries *metrics.TenantRegistries
|
||||
|
||||
// metrics gather from the in-house "Alertmanager" directly.
|
||||
numReceivedAlerts *prometheus.Desc
|
||||
numInvalidAlerts *prometheus.Desc
|
||||
configuredReceivers *prometheus.Desc
|
||||
configuredIntegrations *prometheus.Desc
|
||||
|
||||
// exported metrics, gathered from Alertmanager PipelineBuilder
|
||||
numNotifications *prometheus.Desc
|
||||
numFailedNotifications *prometheus.Desc
|
||||
@ -107,6 +113,23 @@ func NewAlertmanagerAggregatedMetrics(registries *metrics.TenantRegistries) *Ale
|
||||
aggregatedMetrics := &AlertmanagerAggregatedMetrics{
|
||||
registries: registries,
|
||||
|
||||
numReceivedAlerts: prometheus.NewDesc(
|
||||
fmt.Sprintf("%s_%s_alerts_received_total", Namespace, Subsystem),
|
||||
"The total number of received alerts.",
|
||||
[]string{"org", "status"}, nil),
|
||||
numInvalidAlerts: prometheus.NewDesc(
|
||||
fmt.Sprintf("%s_%s_alerts_invalid_total", Namespace, Subsystem),
|
||||
"The total number of received alerts that were invalid.",
|
||||
[]string{"org"}, nil),
|
||||
configuredReceivers: prometheus.NewDesc(
|
||||
fmt.Sprintf("%s_%s_alertmanager_receivers", Namespace, Subsystem),
|
||||
"Number of configured receivers by state. It is considered active if used within a route.",
|
||||
[]string{"org", "state"}, nil),
|
||||
configuredIntegrations: prometheus.NewDesc(
|
||||
fmt.Sprintf("%s_%s_alertmanager_integrations", Namespace, Subsystem),
|
||||
"Number of configured receivers.",
|
||||
[]string{"org", "type"}, nil),
|
||||
|
||||
numNotifications: prometheus.NewDesc(
|
||||
fmt.Sprintf("%s_%s_notifications_total", Namespace, Subsystem),
|
||||
"The total number of attempted notifications.",
|
||||
@ -204,6 +227,11 @@ func NewAlertmanagerAggregatedMetrics(registries *metrics.TenantRegistries) *Ale
|
||||
}
|
||||
|
||||
func (a *AlertmanagerAggregatedMetrics) Describe(out chan<- *prometheus.Desc) {
|
||||
out <- a.numReceivedAlerts
|
||||
out <- a.numInvalidAlerts
|
||||
out <- a.configuredReceivers
|
||||
out <- a.configuredIntegrations
|
||||
|
||||
out <- a.numNotifications
|
||||
out <- a.numFailedNotifications
|
||||
out <- a.numNotificationRequestsTotal
|
||||
@ -234,6 +262,11 @@ func (a *AlertmanagerAggregatedMetrics) Describe(out chan<- *prometheus.Desc) {
|
||||
func (a *AlertmanagerAggregatedMetrics) Collect(out chan<- prometheus.Metric) {
|
||||
data := a.registries.BuildMetricFamiliesPerTenant()
|
||||
|
||||
data.SendSumOfCountersPerTenant(out, a.numReceivedAlerts, "alertmanager_alerts_received_total", metrics.WithLabels("status"))
|
||||
data.SendSumOfCountersPerTenant(out, a.numInvalidAlerts, "alertmanager_alerts_invalid_total")
|
||||
data.SendSumOfGaugesPerTenantWithLabels(out, a.configuredReceivers, "grafana_alerting_alertmanager_receivers", "state")
|
||||
data.SendSumOfGaugesPerTenantWithLabels(out, a.configuredIntegrations, "grafana_alerting_alertmanager_integrations", "type")
|
||||
|
||||
data.SendSumOfCountersPerTenant(out, a.numNotifications, "alertmanager_notifications_total", metrics.WithLabels("integration"), metrics.WithSkipZeroValueMetrics)
|
||||
data.SendSumOfCountersPerTenant(out, a.numFailedNotifications, "alertmanager_notifications_failed_total", metrics.WithLabels("integration"), metrics.WithSkipZeroValueMetrics)
|
||||
data.SendSumOfCountersPerTenant(out, a.numNotificationRequestsTotal, "alertmanager_notification_requests_total", metrics.WithLabels("integration"), metrics.WithSkipZeroValueMetrics)
|
||||
|
Loading…
Reference in New Issue
Block a user