mirror of
https://github.com/grafana/grafana.git
synced 2025-02-25 18:55:37 -06:00
Alerting: Add metric to check for default AM configurations (#80225)
* Alerting: Add metric to check for default AM configurations * Use a gauge for the config hash * don't go out of bounds when converting uint64 to float64 * expose metric for config hash * update metrics after applying config
This commit is contained in:
parent
06800e2d31
commit
3afd94185c
@ -24,6 +24,7 @@ func NewAlertmanagerMetrics(r prometheus.Registerer) *Alertmanager {
|
||||
}
|
||||
|
||||
type AlertmanagerConfigMetrics struct {
|
||||
ConfigHash *prometheus.GaugeVec
|
||||
Matchers prometheus.Gauge
|
||||
MatchRE prometheus.Gauge
|
||||
Match prometheus.Gauge
|
||||
@ -32,6 +33,10 @@ type AlertmanagerConfigMetrics struct {
|
||||
|
||||
func NewAlertmanagerConfigMetrics(r prometheus.Registerer) *AlertmanagerConfigMetrics {
|
||||
m := &AlertmanagerConfigMetrics{
|
||||
ConfigHash: prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: "alertmanager_config_hash",
|
||||
Help: "The hash of the Alertmanager configuration.",
|
||||
}, []string{"org"}),
|
||||
Matchers: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "alertmanager_config_matchers",
|
||||
Help: "The total number of matchers",
|
||||
@ -50,7 +55,7 @@ func NewAlertmanagerConfigMetrics(r prometheus.Registerer) *AlertmanagerConfigMe
|
||||
}),
|
||||
}
|
||||
if r != nil {
|
||||
r.MustRegister(m.Matchers, m.MatchRE, m.Match, m.ObjectMatchers)
|
||||
r.MustRegister(m.ConfigHash, m.Matchers, m.MatchRE, m.Match, m.ObjectMatchers)
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
@ -117,6 +117,8 @@ type AlertmanagerAggregatedMetrics struct {
|
||||
matchRE *prometheus.Desc
|
||||
match *prometheus.Desc
|
||||
objectMatchers *prometheus.Desc
|
||||
|
||||
configHash *prometheus.Desc
|
||||
}
|
||||
|
||||
func NewAlertmanagerAggregatedMetrics(registries *metrics.TenantRegistries) *AlertmanagerAggregatedMetrics {
|
||||
@ -253,6 +255,11 @@ func NewAlertmanagerAggregatedMetrics(registries *metrics.TenantRegistries) *Ale
|
||||
fmt.Sprintf("%s_%s_alertmanager_config_object_matchers", Namespace, Subsystem),
|
||||
"The total number of object_matchers",
|
||||
nil, nil),
|
||||
|
||||
configHash: prometheus.NewDesc(
|
||||
fmt.Sprintf("%s_%s_alertmanager_config_hash", Namespace, Subsystem),
|
||||
"The hash of the Alertmanager configuration.",
|
||||
[]string{"org"}, nil),
|
||||
}
|
||||
|
||||
return aggregatedMetrics
|
||||
@ -296,6 +303,8 @@ func (a *AlertmanagerAggregatedMetrics) Describe(out chan<- *prometheus.Desc) {
|
||||
out <- a.matchRE
|
||||
out <- a.match
|
||||
out <- a.objectMatchers
|
||||
|
||||
out <- a.configHash
|
||||
}
|
||||
|
||||
func (a *AlertmanagerAggregatedMetrics) Collect(out chan<- prometheus.Metric) {
|
||||
@ -338,4 +347,6 @@ func (a *AlertmanagerAggregatedMetrics) Collect(out chan<- prometheus.Metric) {
|
||||
data.SendSumOfGauges(out, a.matchRE, "alertmanager_config_match_re")
|
||||
data.SendSumOfGauges(out, a.match, "alertmanager_config_match")
|
||||
data.SendSumOfGauges(out, a.objectMatchers, "alertmanager_config_object_matchers")
|
||||
|
||||
data.SendMaxOfGaugesPerTenant(out, a.configHash, "alertmanager_config_hash")
|
||||
}
|
||||
|
@ -3,6 +3,7 @@ package notifier
|
||||
import (
|
||||
"context"
|
||||
"crypto/md5"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
@ -255,6 +256,10 @@ func (am *alertmanager) updateConfigMetrics(cfg *apimodels.PostableUserConfig) {
|
||||
am.ConfigMetrics.MatchRE.Set(float64(amu.MatchRE))
|
||||
am.ConfigMetrics.Match.Set(float64(amu.Match))
|
||||
am.ConfigMetrics.ObjectMatchers.Set(float64(amu.ObjectMatchers))
|
||||
|
||||
am.ConfigMetrics.ConfigHash.
|
||||
WithLabelValues(strconv.FormatInt(am.orgID, 10)).
|
||||
Set(hashAsMetricValue(am.Base.ConfigHash()))
|
||||
}
|
||||
|
||||
func (am *alertmanager) aggregateRouteMatchers(r *apimodels.Route, amu *AggregateMatchersUsage) {
|
||||
@ -315,8 +320,6 @@ func (am *alertmanager) applyConfig(cfg *apimodels.PostableUserConfig, rawConfig
|
||||
return false, nil
|
||||
}
|
||||
|
||||
am.updateConfigMetrics(cfg)
|
||||
|
||||
err = am.Base.ApplyConfig(AlertingConfiguration{
|
||||
rawAlertmanagerConfig: rawConfig,
|
||||
alertmanagerConfig: cfg.AlertmanagerConfig,
|
||||
@ -327,6 +330,7 @@ func (am *alertmanager) applyConfig(cfg *apimodels.PostableUserConfig, rawConfig
|
||||
return false, err
|
||||
}
|
||||
|
||||
am.updateConfigMetrics(cfg)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
@ -421,3 +425,13 @@ func (e AlertValidationError) Error() string {
|
||||
type nilLimits struct{}
|
||||
|
||||
func (n nilLimits) MaxNumberOfAggregationGroups() int { return 0 }
|
||||
|
||||
// This function is taken from upstream, modified to take a [16]byte instead of a []byte.
|
||||
// https://github.com/prometheus/alertmanager/blob/30fa9cd44bc91c0d6adcc9985609bb08a09a127b/config/coordinator.go#L149-L156
|
||||
func hashAsMetricValue(data [16]byte) float64 {
|
||||
// We only want 48 bits as a float64 only has a 53 bit mantissa.
|
||||
smallSum := data[0:6]
|
||||
bytes := make([]byte, 8)
|
||||
copy(bytes, smallSum)
|
||||
return float64(binary.LittleEndian.Uint64(bytes))
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user