Alerting: Add matchers metrics to Alertmanager (#69855)

This commit is contained in:
George Robinson 2023-06-15 09:18:01 +01:00 committed by GitHub
parent f649619e20
commit f085e99d3c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 114 additions and 2 deletions

View File

@ -10,12 +10,47 @@ import (
type Alertmanager struct {
Registerer prometheus.Registerer
*metrics.Alerts
*AlertmanagerConfigMetrics
}
// NewAlertmanagerMetrics creates a set of metrics for the Alertmanager of each organization.
func NewAlertmanagerMetrics(r prometheus.Registerer) *Alertmanager {
other := prometheus.WrapRegistererWithPrefix(fmt.Sprintf("%s_%s_", Namespace, Subsystem), r)
return &Alertmanager{
Registerer: r,
Alerts: metrics.NewAlerts("grafana", prometheus.WrapRegistererWithPrefix(fmt.Sprintf("%s_%s_", Namespace, Subsystem), r)),
Registerer: r,
Alerts: metrics.NewAlerts("grafana", other),
AlertmanagerConfigMetrics: NewAlertmanagerConfigMetrics(r),
}
}
type AlertmanagerConfigMetrics struct {
Matchers prometheus.Gauge
MatchRE prometheus.Gauge
Match prometheus.Gauge
ObjectMatchers prometheus.Gauge
}
func NewAlertmanagerConfigMetrics(r prometheus.Registerer) *AlertmanagerConfigMetrics {
m := &AlertmanagerConfigMetrics{
Matchers: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "alertmanager_config_matchers",
Help: "The total number of matchers",
}),
MatchRE: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "alertmanager_config_match_re",
Help: "The total number of matche_re",
}),
Match: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "alertmanager_config_match",
Help: "The total number of match",
}),
ObjectMatchers: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "alertmanager_config_object_matchers",
Help: "The total number of object_matchers",
}),
}
if r != nil {
r.MustRegister(m.Matchers, m.MatchRE, m.Match, m.ObjectMatchers)
}
return m
}

View File

@ -107,6 +107,13 @@ type AlertmanagerAggregatedMetrics struct {
// exported metrics, gathered from Alertmanager Dispatcher
dispatchAggrGroups *prometheus.Desc
dispatchProcessingDuration *prometheus.Desc
// added to measure usage of matchers, match_re, match and
// object_matchers
matchers *prometheus.Desc
matchRE *prometheus.Desc
match *prometheus.Desc
objectMatchers *prometheus.Desc
}
func NewAlertmanagerAggregatedMetrics(registries *metrics.TenantRegistries) *AlertmanagerAggregatedMetrics {
@ -221,6 +228,23 @@ func NewAlertmanagerAggregatedMetrics(registries *metrics.TenantRegistries) *Ale
fmt.Sprintf("%s_%s_dispatcher_alert_processing_duration_seconds", Namespace, Subsystem),
"Summary of latencies for the processing of alerts.",
nil, nil),
matchers: prometheus.NewDesc(
fmt.Sprintf("%s_%s_alertmanager_config_matchers", Namespace, Subsystem),
"The total number of matchers",
nil, nil),
matchRE: prometheus.NewDesc(
fmt.Sprintf("%s_%s_alertmanager_config_match_re", Namespace, Subsystem),
"The total number of matchRE",
nil, nil),
match: prometheus.NewDesc(
fmt.Sprintf("%s_%s_alertmanager_config_match", Namespace, Subsystem),
"The total number of match",
nil, nil),
objectMatchers: prometheus.NewDesc(
fmt.Sprintf("%s_%s_alertmanager_config_object_matchers", Namespace, Subsystem),
"The total number of object_matchers",
nil, nil),
}
return aggregatedMetrics
@ -257,6 +281,11 @@ func (a *AlertmanagerAggregatedMetrics) Describe(out chan<- *prometheus.Desc) {
out <- a.dispatchAggrGroups
out <- a.dispatchProcessingDuration
out <- a.matchers
out <- a.matchRE
out <- a.match
out <- a.objectMatchers
}
func (a *AlertmanagerAggregatedMetrics) Collect(out chan<- prometheus.Metric) {
@ -292,4 +321,9 @@ func (a *AlertmanagerAggregatedMetrics) Collect(out chan<- prometheus.Metric) {
data.SendSumOfGauges(out, a.dispatchAggrGroups, "alertmanager_dispatcher_aggregation_groups")
data.SendSumOfSummaries(out, a.dispatchProcessingDuration, "alertmanager_dispatcher_alert_processing_duration_seconds")
data.SendSumOfGauges(out, a.matchers, "alertmanager_config_matchers")
data.SendSumOfGauges(out, a.matchRE, "alertmanager_config_match_re")
data.SendSumOfGauges(out, a.match, "alertmanager_config_match")
data.SendSumOfGauges(out, a.objectMatchers, "alertmanager_config_object_matchers")
}

View File

@ -12,6 +12,7 @@ import (
alertingNotify "github.com/grafana/alerting/notify"
"github.com/grafana/alerting/receivers"
alertingTemplates "github.com/grafana/alerting/templates"
"github.com/prometheus/alertmanager/config"
amv2 "github.com/prometheus/alertmanager/api/v2/models"
@ -47,6 +48,7 @@ type Alertmanager struct {
Base *alertingNotify.GrafanaAlertmanager
logger log.Logger
ConfigMetrics *metrics.AlertmanagerConfigMetrics
Settings *setting.Cfg
Store AlertingStore
fileStore *FileStore
@ -133,6 +135,7 @@ func newAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store A
am := &Alertmanager{
Base: gam,
ConfigMetrics: m.AlertmanagerConfigMetrics,
Settings: cfg,
Store: store,
NotificationService: ns,
@ -237,6 +240,44 @@ func (am *Alertmanager) ApplyConfig(ctx context.Context, dbCfg *ngmodels.AlertCo
return outerErr
}
type AggregateMatchersUsage struct {
Matchers int
MatchRE int
Match int
ObjectMatchers int
}
func (am *Alertmanager) updateConfigMetrics(cfg *apimodels.PostableUserConfig) {
var amu AggregateMatchersUsage
am.aggregateRouteMatchers(cfg.AlertmanagerConfig.Route, &amu)
am.aggregateInhibitMatchers(cfg.AlertmanagerConfig.InhibitRules, &amu)
am.ConfigMetrics.Matchers.Set(float64(amu.Matchers))
am.ConfigMetrics.MatchRE.Set(float64(amu.MatchRE))
am.ConfigMetrics.Match.Set(float64(amu.Match))
am.ConfigMetrics.ObjectMatchers.Set(float64(amu.ObjectMatchers))
}
func (am *Alertmanager) aggregateRouteMatchers(r *apimodels.Route, amu *AggregateMatchersUsage) {
amu.Matchers += len(r.Matchers)
amu.MatchRE += len(r.MatchRE)
amu.Match += len(r.Match)
amu.ObjectMatchers += len(r.ObjectMatchers)
for _, next := range r.Routes {
am.aggregateRouteMatchers(next, amu)
}
}
func (am *Alertmanager) aggregateInhibitMatchers(rules []config.InhibitRule, amu *AggregateMatchersUsage) {
for _, r := range rules {
amu.Matchers += len(r.SourceMatchers)
amu.Matchers += len(r.TargetMatchers)
amu.MatchRE += len(r.SourceMatchRE)
amu.MatchRE += len(r.TargetMatchRE)
amu.Match += len(r.SourceMatch)
amu.Match += len(r.TargetMatch)
}
}
// applyConfig applies a new configuration by re-initializing all components using the configuration provided.
// It returns a boolean indicating whether the user config was changed and an error.
// It is not safe to call concurrently.
@ -274,6 +315,8 @@ func (am *Alertmanager) applyConfig(cfg *apimodels.PostableUserConfig, rawConfig
return false, nil
}
am.updateConfigMetrics(cfg)
err = am.Base.ApplyConfig(AlertingConfiguration{
rawAlertmanagerConfig: rawConfig,
alertmanagerConfig: cfg.AlertmanagerConfig,