mirror of
https://github.com/grafana/grafana.git
synced 2025-02-11 16:15:42 -06:00
Alerting: Add matchers metrics to Alertmanager (#69855)
This commit is contained in:
parent
f649619e20
commit
f085e99d3c
@ -10,12 +10,47 @@ import (
|
||||
type Alertmanager struct {
|
||||
Registerer prometheus.Registerer
|
||||
*metrics.Alerts
|
||||
*AlertmanagerConfigMetrics
|
||||
}
|
||||
|
||||
// NewAlertmanagerMetrics creates a set of metrics for the Alertmanager of each organization.
|
||||
func NewAlertmanagerMetrics(r prometheus.Registerer) *Alertmanager {
|
||||
other := prometheus.WrapRegistererWithPrefix(fmt.Sprintf("%s_%s_", Namespace, Subsystem), r)
|
||||
return &Alertmanager{
|
||||
Registerer: r,
|
||||
Alerts: metrics.NewAlerts("grafana", prometheus.WrapRegistererWithPrefix(fmt.Sprintf("%s_%s_", Namespace, Subsystem), r)),
|
||||
Registerer: r,
|
||||
Alerts: metrics.NewAlerts("grafana", other),
|
||||
AlertmanagerConfigMetrics: NewAlertmanagerConfigMetrics(r),
|
||||
}
|
||||
}
|
||||
|
||||
type AlertmanagerConfigMetrics struct {
|
||||
Matchers prometheus.Gauge
|
||||
MatchRE prometheus.Gauge
|
||||
Match prometheus.Gauge
|
||||
ObjectMatchers prometheus.Gauge
|
||||
}
|
||||
|
||||
func NewAlertmanagerConfigMetrics(r prometheus.Registerer) *AlertmanagerConfigMetrics {
|
||||
m := &AlertmanagerConfigMetrics{
|
||||
Matchers: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "alertmanager_config_matchers",
|
||||
Help: "The total number of matchers",
|
||||
}),
|
||||
MatchRE: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "alertmanager_config_match_re",
|
||||
Help: "The total number of matche_re",
|
||||
}),
|
||||
Match: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "alertmanager_config_match",
|
||||
Help: "The total number of match",
|
||||
}),
|
||||
ObjectMatchers: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "alertmanager_config_object_matchers",
|
||||
Help: "The total number of object_matchers",
|
||||
}),
|
||||
}
|
||||
if r != nil {
|
||||
r.MustRegister(m.Matchers, m.MatchRE, m.Match, m.ObjectMatchers)
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
@ -107,6 +107,13 @@ type AlertmanagerAggregatedMetrics struct {
|
||||
// exported metrics, gathered from Alertmanager Dispatcher
|
||||
dispatchAggrGroups *prometheus.Desc
|
||||
dispatchProcessingDuration *prometheus.Desc
|
||||
|
||||
// added to measure usage of matchers, match_re, match and
|
||||
// object_matchers
|
||||
matchers *prometheus.Desc
|
||||
matchRE *prometheus.Desc
|
||||
match *prometheus.Desc
|
||||
objectMatchers *prometheus.Desc
|
||||
}
|
||||
|
||||
func NewAlertmanagerAggregatedMetrics(registries *metrics.TenantRegistries) *AlertmanagerAggregatedMetrics {
|
||||
@ -221,6 +228,23 @@ func NewAlertmanagerAggregatedMetrics(registries *metrics.TenantRegistries) *Ale
|
||||
fmt.Sprintf("%s_%s_dispatcher_alert_processing_duration_seconds", Namespace, Subsystem),
|
||||
"Summary of latencies for the processing of alerts.",
|
||||
nil, nil),
|
||||
|
||||
matchers: prometheus.NewDesc(
|
||||
fmt.Sprintf("%s_%s_alertmanager_config_matchers", Namespace, Subsystem),
|
||||
"The total number of matchers",
|
||||
nil, nil),
|
||||
matchRE: prometheus.NewDesc(
|
||||
fmt.Sprintf("%s_%s_alertmanager_config_match_re", Namespace, Subsystem),
|
||||
"The total number of matchRE",
|
||||
nil, nil),
|
||||
match: prometheus.NewDesc(
|
||||
fmt.Sprintf("%s_%s_alertmanager_config_match", Namespace, Subsystem),
|
||||
"The total number of match",
|
||||
nil, nil),
|
||||
objectMatchers: prometheus.NewDesc(
|
||||
fmt.Sprintf("%s_%s_alertmanager_config_object_matchers", Namespace, Subsystem),
|
||||
"The total number of object_matchers",
|
||||
nil, nil),
|
||||
}
|
||||
|
||||
return aggregatedMetrics
|
||||
@ -257,6 +281,11 @@ func (a *AlertmanagerAggregatedMetrics) Describe(out chan<- *prometheus.Desc) {
|
||||
|
||||
out <- a.dispatchAggrGroups
|
||||
out <- a.dispatchProcessingDuration
|
||||
|
||||
out <- a.matchers
|
||||
out <- a.matchRE
|
||||
out <- a.match
|
||||
out <- a.objectMatchers
|
||||
}
|
||||
|
||||
func (a *AlertmanagerAggregatedMetrics) Collect(out chan<- prometheus.Metric) {
|
||||
@ -292,4 +321,9 @@ func (a *AlertmanagerAggregatedMetrics) Collect(out chan<- prometheus.Metric) {
|
||||
|
||||
data.SendSumOfGauges(out, a.dispatchAggrGroups, "alertmanager_dispatcher_aggregation_groups")
|
||||
data.SendSumOfSummaries(out, a.dispatchProcessingDuration, "alertmanager_dispatcher_alert_processing_duration_seconds")
|
||||
|
||||
data.SendSumOfGauges(out, a.matchers, "alertmanager_config_matchers")
|
||||
data.SendSumOfGauges(out, a.matchRE, "alertmanager_config_match_re")
|
||||
data.SendSumOfGauges(out, a.match, "alertmanager_config_match")
|
||||
data.SendSumOfGauges(out, a.objectMatchers, "alertmanager_config_object_matchers")
|
||||
}
|
||||
|
@ -12,6 +12,7 @@ import (
|
||||
alertingNotify "github.com/grafana/alerting/notify"
|
||||
"github.com/grafana/alerting/receivers"
|
||||
alertingTemplates "github.com/grafana/alerting/templates"
|
||||
"github.com/prometheus/alertmanager/config"
|
||||
|
||||
amv2 "github.com/prometheus/alertmanager/api/v2/models"
|
||||
|
||||
@ -47,6 +48,7 @@ type Alertmanager struct {
|
||||
Base *alertingNotify.GrafanaAlertmanager
|
||||
logger log.Logger
|
||||
|
||||
ConfigMetrics *metrics.AlertmanagerConfigMetrics
|
||||
Settings *setting.Cfg
|
||||
Store AlertingStore
|
||||
fileStore *FileStore
|
||||
@ -133,6 +135,7 @@ func newAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store A
|
||||
|
||||
am := &Alertmanager{
|
||||
Base: gam,
|
||||
ConfigMetrics: m.AlertmanagerConfigMetrics,
|
||||
Settings: cfg,
|
||||
Store: store,
|
||||
NotificationService: ns,
|
||||
@ -237,6 +240,44 @@ func (am *Alertmanager) ApplyConfig(ctx context.Context, dbCfg *ngmodels.AlertCo
|
||||
return outerErr
|
||||
}
|
||||
|
||||
type AggregateMatchersUsage struct {
|
||||
Matchers int
|
||||
MatchRE int
|
||||
Match int
|
||||
ObjectMatchers int
|
||||
}
|
||||
|
||||
func (am *Alertmanager) updateConfigMetrics(cfg *apimodels.PostableUserConfig) {
|
||||
var amu AggregateMatchersUsage
|
||||
am.aggregateRouteMatchers(cfg.AlertmanagerConfig.Route, &amu)
|
||||
am.aggregateInhibitMatchers(cfg.AlertmanagerConfig.InhibitRules, &amu)
|
||||
am.ConfigMetrics.Matchers.Set(float64(amu.Matchers))
|
||||
am.ConfigMetrics.MatchRE.Set(float64(amu.MatchRE))
|
||||
am.ConfigMetrics.Match.Set(float64(amu.Match))
|
||||
am.ConfigMetrics.ObjectMatchers.Set(float64(amu.ObjectMatchers))
|
||||
}
|
||||
|
||||
func (am *Alertmanager) aggregateRouteMatchers(r *apimodels.Route, amu *AggregateMatchersUsage) {
|
||||
amu.Matchers += len(r.Matchers)
|
||||
amu.MatchRE += len(r.MatchRE)
|
||||
amu.Match += len(r.Match)
|
||||
amu.ObjectMatchers += len(r.ObjectMatchers)
|
||||
for _, next := range r.Routes {
|
||||
am.aggregateRouteMatchers(next, amu)
|
||||
}
|
||||
}
|
||||
|
||||
func (am *Alertmanager) aggregateInhibitMatchers(rules []config.InhibitRule, amu *AggregateMatchersUsage) {
|
||||
for _, r := range rules {
|
||||
amu.Matchers += len(r.SourceMatchers)
|
||||
amu.Matchers += len(r.TargetMatchers)
|
||||
amu.MatchRE += len(r.SourceMatchRE)
|
||||
amu.MatchRE += len(r.TargetMatchRE)
|
||||
amu.Match += len(r.SourceMatch)
|
||||
amu.Match += len(r.TargetMatch)
|
||||
}
|
||||
}
|
||||
|
||||
// applyConfig applies a new configuration by re-initializing all components using the configuration provided.
|
||||
// It returns a boolean indicating whether the user config was changed and an error.
|
||||
// It is not safe to call concurrently.
|
||||
@ -274,6 +315,8 @@ func (am *Alertmanager) applyConfig(cfg *apimodels.PostableUserConfig, rawConfig
|
||||
return false, nil
|
||||
}
|
||||
|
||||
am.updateConfigMetrics(cfg)
|
||||
|
||||
err = am.Base.ApplyConfig(AlertingConfiguration{
|
||||
rawAlertmanagerConfig: rawConfig,
|
||||
alertmanagerConfig: cfg.AlertmanagerConfig,
|
||||
|
Loading…
Reference in New Issue
Block a user